code below:
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Collections;
import java.util.Map;
import javax.sql.DataSource;
import oracle.sql.CLOB;
import org.apache.log4j.Appender;
import org.apache.log4j.EnhancedPatternLayout;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Attribute;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
public class ProfileXMLAttributes implements AppDriver {
public String driverClassName;
static Logger logLogger;
Appender logAppender;
EnhancedPatternLayout logLayout;
DataSource dataSource;
JdbcTemplate jdbcTemplate;
Connection conn;
Statement stmt;
ResultSet rs;
ResultSetMetaData rsmd;
SQLClob sqlClob;
SAXBuilder saxBuilder;
Document xmlDoc;
Format xmlFmt;
XMLOutputter xmlOutputter;
Element xmlElement;
Element xPathElement;
Element rootElement;
String pathString;
static SQLClob theSQLClob;
Map<String, Integer> tagMap;
String tagKey;
Integer tagValue;
int badXML = 0;
int rowsRead = 0;
boolean goodXML;
String msgID;
/* =========================================================
init is called once by the driver program to set up the
run environment for logging, JDBC, JDOM, etc
========================================================= */
@Override
public void init() {
logLogger = Logger.getLogger(ProfileXMLAttributes.class.getName());
dataSource = (DataSource) main.context.getBean("datasource");
jdbcTemplate = new JdbcTemplate(dataSource);
try{
conn = DriverManager.getConnection("jdbc:oracle:thin:@10.1.6.78:1521:kbfcubs",
"kbfcubs", "kbfcubs");
} catch (SQLException e) {
logLogger.debug("Exception initializing connection");
e.printStackTrace();
}
stmt = null;
try{
stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE,
ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(10000);
} catch (SQLException e) {
logLogger.debug("Exception initializing statement");
e.printStackTrace();
}
rs = null;
try {
String xmlQuery = "select xml_id, xml_contents from xml_table";
rs = stmt.executeQuery(xmlQuery);
} catch (SQLException e) {
logLogger.debug("Exception executing query");
e.printStackTrace();
}
saxBuilder = new SAXBuilder();
xmlFmt = Format.getPrettyFormat().setEncoding("UTF-8");
xmlOutputter = new XMLOutputter(xmlFmt);
structureString = new ArrayList<String>();
tagMap = new HashMap<String, Integer>();
}
/* =========================================================
process is called once by the driver program to do all
the work. For each row returned by the JDBC query it
instantiates the xml document in JDOM and then calls
doProcess passing the rootElement to step through the
xml tags.
========================================================= */
@Override
public void process() throws SQLException {
while(rs.next()) {
xmlDoc = null;
rowsRead ++;
msgID = rs.getString(1);
try {
xmlDoc = saxBuilder.build(rs.getString(2));
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
goodXML = false;
e.printStackTrace();
}
try {
rootElement = xmlDoc.getRootElement();
doProcess(rootElement);
} catch (Exception e) {
e.printStackTrace();
}
}
}
/* =========================================================
doProcess is called once by process for each row in the
JDBC result set. It is also called recursively as the
xml tags are discovered. Each xml tag is added to a
hash map and counts of the occurances of each tag are
accumulated.
========================================================= */
private void doProcess (Element currentElement) {
tagKey = new String(currentElement.getName());
tagValue = tagMap.get(tagKey);
if(tagValue == null) {
tagValue = 1;
} else {
tagValue += 1;
}
tagMap.put(tagKey, tagValue);
Iterator<Element> itr = currentElement.getChildren().iterator();
boolean hasChildren = false;
while (itr.hasNext()) {
hasChildren = true;
Object childElement = itr.next();
}
if (hasChildren == true) {
itr = currentElement.getChildren().iterator();
hasChildren = false;
}
while (itr.hasNext()) {
Object childElement = itr.next();
doProcess((Element) childElement);
}
}
}
Post by cliff palmerHi Rolf
I will post the code later, (sorry late for a meeting) but to answer
- this error occurs when there is an "xmlns" declaration. Since this
is the first instance of an "xmlns" declaration I've encountered with
JDOM and all of the URLs in the "xmlns" declaration that I have found
point to the same bad address, I don't know if the problem is related
to lookup of the URL or just the presence of an "xmlns" declaration.
- the problem is predictable and occurs for each xml document that
uses this bad URL in an "xmlns" declaration.
- I've used the code (I will post it, I promise) to parse over 3
million xml documents, passing a string containing the xml document
(not a URL). The value I pass to saxbuilder.build is the returned
string from the JDBC call ResultSet.getString using a column number
parameter. I haven't been altering or converting the string returned
from JDBC.
Thanks Rolf and I will post the code as soon as the suits are done with me.
Cliff
Post by Rolf LearHi Cliff.
I think there's been some good pointers already, but just to make things
crystal clear... can you perhaps post the relevant code snippet you are
using to parse the document, and perhaps the first few lines of the actual
XML too.
Also, does this problem happen with *all* xml documents (the first one), or
with just some of them?
My guess is that Oliver has the right idea with parsing the wrong string....
remember that the SaxBuilder.build(String) method expects the String to be a
URL, not the actual XML content..... YTour stack trace indicates you are
calling this method...
https://github.com/hunterhacker/jdom/blob/jdom-1.x/core/src/java/org/jdom/input/SAXBuilder.java#L986
Anyway, seeing your code would help....
Rolf
Post by cliff palmerI'm reading through several hundred thousand existing XML documents
building counts of XML tags and have encountered a
Java.net.MalformedURL Exception raised by saxBuilder.build because the
xmlns points to a URL that can not be reached.
I am using JDOM 1.1.2.
Is there a call or parameter setting that will cause saxBuilder to
ignore namespaces when parsing?
Thanks!
Cliff
_______________________________________________