forked from I2P_Developers/i2p.i2p
176 lines
5.8 KiB
Java
176 lines
5.8 KiB
Java
package net.i2p.router.news;
|
|
|
|
/******************************************************************
|
|
* Contains code modified from JaxpParser:
|
|
*
|
|
* CyberXML for Java
|
|
*
|
|
* Copyright (C) Satoshi Konno 2004
|
|
*
|
|
* Author: Markus Thurner (http://thoean.com)
|
|
*
|
|
* Contains code modified from Node:
|
|
*
|
|
* CyberXML for Java
|
|
*
|
|
* Copyright (C) Satoshi Konno 2002
|
|
******************************************************************/
|
|
|
|
import org.w3c.dom.NamedNodeMap;
|
|
|
|
import net.i2p.I2PAppContext;
|
|
import net.i2p.util.Log;
|
|
import org.cybergarage.xml.Attribute;
|
|
import org.cybergarage.xml.Node;
|
|
import org.cybergarage.xml.XML;
|
|
import org.cybergarage.xml.parser.JaxpParser;
|
|
|
|
|
|
/**
|
|
* Override so that XHTML is parsed correctly.
|
|
*
|
|
* This requires us to maintain mixed text and subnodes and output both.
|
|
*
|
|
* @since 0.9.17
|
|
*/
|
|
public class XMLParser extends JaxpParser {
|
|
private final Log _log;
|
|
|
|
public static final String TEXT_NAME = "#text";
|
|
|
|
public XMLParser(I2PAppContext ctx) {
|
|
super();
|
|
_log = ctx.logManager().getLog(XMLParser.class);
|
|
}
|
|
|
|
/**
|
|
* Modified from UPnP JaxpParser
|
|
*
|
|
* @param parentNode null if at top
|
|
* @param rank parse level, only for debug
|
|
* @return the parsed node, or the parent node, unused except at top level
|
|
*/
|
|
@Override
|
|
public org.cybergarage.xml.Node parse(Node parentNode, org.w3c.dom.Node domNode, int rank) {
|
|
int domNodeType = domNode.getNodeType();
|
|
String domNodeName = domNode.getNodeName();
|
|
String domNodeValue = domNode.getNodeValue();
|
|
NamedNodeMap attrs = domNode.getAttributes();
|
|
int arrrsLen = (attrs != null) ? attrs.getLength() : 0;
|
|
|
|
if (_log.shouldLog(Log.DEBUG)) {
|
|
String val = domNodeValue != null ?
|
|
" = \"" + domNodeValue.replace("\n", "\\n").replace("\r", "\\r") + '"' :
|
|
"";
|
|
_log.debug("[" + rank + "] ELEM : \"" + domNodeName + '"' + val +
|
|
" type = " + domNodeType + " with " + arrrsLen + " attrs");
|
|
}
|
|
|
|
// I2P -
|
|
// If it's only whitespace, skip it altogether.
|
|
// Only add it to the value if we don't have any other nodes.
|
|
// Otherwise, add it as a node.
|
|
if (domNodeType == org.w3c.dom.Node.TEXT_NODE) {
|
|
if (domNodeValue.replaceAll("[ \t\r\n]", "").length() == 0) {
|
|
return parentNode;
|
|
}
|
|
if (!parentNode.hasNodes()) {
|
|
parentNode.addValue(domNodeValue);
|
|
return parentNode;
|
|
}
|
|
// else we will add it as a node below
|
|
} else if (domNodeType != org.w3c.dom.Node.ELEMENT_NODE) {
|
|
return parentNode;
|
|
}
|
|
|
|
Node node = new Node();
|
|
node.setName(domNodeName);
|
|
node.setValue(domNodeValue);
|
|
|
|
if (parentNode != null) {
|
|
// I2P - take the value and convert it to a text node, if it's not just whitespace
|
|
String oldValue = parentNode.getValue();
|
|
if (oldValue != null && oldValue.length() > 0) {
|
|
parentNode.setValue("");
|
|
Node text = new Node();
|
|
text.setName(TEXT_NAME);
|
|
text.setValue(oldValue);
|
|
parentNode.addNode(text);
|
|
if (_log.shouldLog(Log.DEBUG))
|
|
_log.debug("Converted value to node");
|
|
}
|
|
parentNode.addNode(node);
|
|
}
|
|
if (domNodeType == org.w3c.dom.Node.TEXT_NODE)
|
|
return parentNode;
|
|
|
|
if (attrs != null) {
|
|
for (int n = 0; n < arrrsLen; n++) {
|
|
org.w3c.dom.Node attr = attrs.item(n);
|
|
String attrName = attr.getNodeName();
|
|
String attrValue = attr.getNodeValue();
|
|
node.setAttribute(attrName, attrValue);
|
|
}
|
|
}
|
|
|
|
org.w3c.dom.Node child = domNode.getFirstChild();
|
|
if (child == null) {
|
|
node.setValue("");
|
|
return node;
|
|
}
|
|
do{
|
|
parse(node, child, rank+1);
|
|
child = child.getNextSibling();
|
|
} while (child != null);
|
|
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* A replacement for Node.toString(), which does not recognize #text.
|
|
*/
|
|
public static void toString(StringBuilder buf, Node node) {
|
|
output(buf, node, 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* A replacement for Node.output(), which does not recognize #text.
|
|
* Also, we use the empty entity, so <br /> does not turn into <br></br>.
|
|
*/
|
|
private static void output(StringBuilder buf, Node node, int indentLevel) {
|
|
String name = node.getName();
|
|
String value = XML.escapeXMLChars(node.getValue());
|
|
if (name.equals(TEXT_NAME)) {
|
|
buf.append(value);
|
|
return;
|
|
}
|
|
|
|
buf.append('<').append(name);
|
|
int nAttributes = node.getNAttributes();
|
|
for (int n = 0; n < nAttributes; n++) {
|
|
Attribute attr = node.getAttribute(n);
|
|
buf.append(' ').append(attr.getName()).append("=\"").append(XML.escapeXMLChars(attr.getValue())).append('"');
|
|
}
|
|
|
|
// As in Node, output either the nodes or the value.
|
|
// If mixed values and nodes, the values must be text nodes. See parser above.
|
|
if (node.hasNodes()) {
|
|
buf.append('>');
|
|
int nChildNodes = node.getNNodes();
|
|
for (int n = 0; n < nChildNodes; n++) {
|
|
Node cnode = node.getNode(n);
|
|
output(buf, cnode, indentLevel + 1);
|
|
}
|
|
buf.append("</").append(name).append('>');
|
|
} else {
|
|
if (value == null || value.length() == 0) {
|
|
// space for <br />
|
|
buf.append(" />");
|
|
} else {
|
|
buf.append('>').append(value).append("</").append(name).append('>');
|
|
}
|
|
}
|
|
}
|
|
}
|