Home > Article > Backend Development > Summary of several ways to parse XML in java

Summary of several ways to parse XML in java

高洛峰Original: 2017-01-11 13:12:541550browse

The first one: DOM.

The full name of DOM is Document Object Model, which is also the document object model. In an application, a DOM-based XML parser converts an XML document into a collection of object models (often called a DOM tree). It is through the operation of this object model that the application implements operations on XML document data. Through the DOM interface, the application can access any part of the data in the XML document at any time. Therefore, this mechanism using the DOM interface is also called a random access mechanism.

The DOM interface provides a way to access XML document information through a hierarchical object model. These hierarchical object models form a node tree based on the XML document structure. No matter what type of information is described in the XML document, even if it is tabulated data, a list of items or a document, the model generated using DOM is in the form of a node tree. That is, DOM forces the use of a tree model to access information in XML documents. Since XML is essentially a hierarchical structure, this description method is quite effective.

The random access method provided by the DOM tree brings great flexibility to application development, and it can arbitrarily control the content of the entire XML document. However, since the DOM parser converts the entire XML document into a DOM tree and stores it in memory, when the document is large or has a complex structure, the memory requirements are relatively high. Moreover, traversing a tree with a complex structure is also a time-consuming operation. Therefore, the DOM analyzer has relatively high requirements on machine performance, and the implementation efficiency is not very ideal. However, because the tree structure idea used by the DOM analyzer is consistent with the structure of the XML document, and in view of the convenience brought by random access, the DOM analyzer still has a wide range of use value.

import java.io.File; 
  
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
  
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 
import org.w3c.dom.NodeList; 
  
public class DomTest1 
{ 
  public static void main(String[] args) throws Exception 
  { 
    // step 1: 获得dom解析器工厂（工作的作用是用于创建具体的解析器） 
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 
      
//   System.out.println("class name: " + dbf.getClass().getName()); 
      
    // step 2:获得具体的dom解析器 
    DocumentBuilder db = dbf.newDocumentBuilder(); 
      
//   System.out.println("class name: " + db.getClass().getName()); 
      
    // step3: 解析一个xml文档，获得Document对象（根结点） 
    Document document = db.parse(new File("candidate.xml")); 
      
    NodeList list = document.getElementsByTagName("PERSON"); 
      
    for(int i = 0; i < list.getLength(); i++) 
    { 
      Element element = (Element)list.item(i); 
        
      String content = element.getElementsByTagName("NAME").item(0).getFirstChild().getNodeValue(); 
        
      System.out.println("name:" + content); 
        
      content = element.getElementsByTagName("ADDRESS").item(0).getFirstChild().getNodeValue(); 
        
      System.out.println("address:" + content); 
        
      content = element.getElementsByTagName("TEL").item(0).getFirstChild().getNodeValue(); 
        
      System.out.println("tel:" + content); 
        
      content = element.getElementsByTagName("FAX").item(0).getFirstChild().getNodeValue(); 
        
      System.out.println("fax:" + content); 
        
      content = element.getElementsByTagName("EMAIL").item(0).getFirstChild().getNodeValue(); 
        
      System.out.println("email:" + content); 
        
      System.out.println("--------------------------------------"); 
    } 
  } 
}

import java.io.File; 
  
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
  
import org.w3c.dom.Attr; 
import org.w3c.dom.Comment; 
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 
import org.w3c.dom.NamedNodeMap; 
import org.w3c.dom.Node; 
import org.w3c.dom.NodeList; 
  
/** 
 * 使用递归解析给定的任意一个xml文档并且将其内容输出到命令行上 
 * @author zhanglong 
 * 
 */
public class DomTest3 
{ 
  public static void main(String[] args) throws Exception 
  { 
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 
    DocumentBuilder db = dbf.newDocumentBuilder(); 
      
    Document doc = db.parse(new File("student.xml")); 
    //获得根元素结点 
    Element root = doc.getDocumentElement(); 
      
    parseElement(root); 
  } 
    
  private static void parseElement(Element element) 
  { 
    String tagName = element.getNodeName(); 
      
    NodeList children = element.getChildNodes(); 
      
    System.out.print("<" + tagName); 
      
    //element元素的所有属性所构成的NamedNodeMap对象，需要对其进行判断 
    NamedNodeMap map = element.getAttributes(); 
      
    //如果该元素存在属性 
    if(null != map) 
    { 
      for(int i = 0; i < map.getLength(); i++) 
      { 
        //获得该元素的每一个属性 
        Attr attr = (Attr)map.item(i); 
          
        String attrName = attr.getName(); 
        String attrValue = attr.getValue(); 
          
        System.out.print(" " + attrName + "=\"" + attrValue + "\""); 
      } 
    } 
      
    System.out.print(">"); 
      
    for(int i = 0; i < children.getLength(); i++) 
    { 
      Node node = children.item(i); 
      //获得结点的类型 
      short nodeType = node.getNodeType(); 
        
      if(nodeType == Node.ELEMENT_NODE) 
      { 
        //是元素，继续递归 
        parseElement((Element)node); 
      } 
      else if(nodeType == Node.TEXT_NODE) 
      { 
        //递归出口 
        System.out.print(node.getNodeValue()); 
      } 
      else if(nodeType == Node.COMMENT_NODE) 
      { 
        System.out.print("<!--"); 
          
        Comment comment = (Comment)node; 
          
        //注释内容 
        String data = comment.getData(); 
          
        System.out.print(data); 
          
        System.out.print("-->"); 
      } 
    } 
      
    System.out.print("</" + tagName + ">"); 
  } 
}

sax: The full name of SAX is Simple APIs for XML, which is the XML simple application programming interface. Unlike DOM, the access mode provided by SAX is a sequential mode, which is a fast way to read and write XML data. When a SAX parser is used to analyze an XML document, a series of events will be triggered and the corresponding event processing functions will be activated. The application uses these event processing functions to access the XML document. Therefore, the SAX interface is also called an event-driven interface. .

import java.io.File; 
  
import javax.xml.parsers.SAXParser; 
import javax.xml.parsers.SAXParserFactory; 
  
import org.xml.sax.Attributes; 
import org.xml.sax.SAXException; 
import org.xml.sax.helpers.DefaultHandler; 
  
public class SaxTest1 
{ 
  public static void main(String[] args) throws Exception 
  { 
    //step1： 获得SAX解析器工厂实例 
    SAXParserFactory factory = SAXParserFactory.newInstance(); 
      
    //step2: 获得SAX解析器实例 
    SAXParser parser = factory.newSAXParser(); 
      
    //step3: 开始进行解析 
    parser.parse(new File("student.xml"), new MyHandler()); 
      
  } 
} 
  
class MyHandler extends DefaultHandler 
{ 
  @Override
  public void startDocument() throws SAXException 
  { 
    System.out.println("parse began"); 
  } 
    
  @Override
  public void endDocument() throws SAXException 
  { 
    System.out.println("parse finished"); 
  } 
    
  @Override
  public void startElement(String uri, String localName, String qName, 
      Attributes attributes) throws SAXException 
  { 
    System.out.println("start element"); 
  } 
    
  @Override
  public void endElement(String uri, String localName, String qName) 
      throws SAXException 
  { 
    System.out.println("finish element"); 
  } 
}

import java.io.File; 
import java.util.Stack; 
  
import javax.xml.parsers.SAXParser; 
import javax.xml.parsers.SAXParserFactory; 
  
import org.xml.sax.Attributes; 
import org.xml.sax.SAXException; 
import org.xml.sax.helpers.DefaultHandler; 
  
public class SaxTest2 
{ 
  public static void main(String[] args) throws Exception 
  { 
    SAXParserFactory factory = SAXParserFactory.newInstance(); 
      
    SAXParser parser = factory.newSAXParser(); 
      
    parser.parse(new File("student.xml"), new MyHandler2()); 
  } 
} 
  
class MyHandler2 extends DefaultHandler 
{ 
  private Stack<String> stack = new Stack<String>(); 
    
  private String name; 
    
  private String gender; 
    
  private String age; 
    
  @Override
  public void startElement(String uri, String localName, String qName, 
      Attributes attributes) throws SAXException 
  { 
    stack.push(qName); 
      
    for(int i = 0; i < attributes.getLength(); i++) 
    { 
      String attrName = attributes.getQName(i); 
      String attrValue = attributes.getValue(i); 
        
      System.out.println(attrName + "=" + attrValue); 
    } 
  } 
    
  @Override
  public void characters(char[] ch, int start, int length) 
      throws SAXException 
  { 
    String tag = stack.peek(); 
      
    if("姓名".equals(tag)) 
    { 
      name = new String(ch, start,length); 
    } 
    else if("性别".equals(tag)) 
    { 
      gender = new String(ch, start, length); 
    } 
    else if("年龄".equals(tag)) 
    { 
      age = new String(ch, start, length); 
    } 
  } 
    
  @Override
  public void endElement(String uri, String localName, String qName) 
      throws SAXException 
  { 
    stack.pop(); //表示该元素已经解析完毕，需要从栈中弹出 
      
    if("学生".equals(qName)) 
    { 
      System.out.println("姓名：" + name); 
      System.out.println("性别：" + gender); 
      System.out.println("年龄：" + age); 
        
      System.out.println(); 
    } 
      
  } 
}

JDOM:

JDOM is an open source project. It is based on a tree structure and uses pure JAVA technology to parse, generate, serialize and perform various operations on XML documents. (http://jdom.org)

•JDOM directly serves JAVA programming. It uses many features of the more powerful JAVA language (method overloading, collection concepts, etc.) to effectively combine the functions of SAX and DOM.

•JDOM is a new API function that uses Java language to read, write, and operate XML. Under the premise of being direct, simple and efficient, these API functions are optimized to the maximum extent.

jdom creates xml

import java.io.FileWriter; 
  
import org.jdom.Attribute; 
import org.jdom.Comment; 
import org.jdom.Document; 
import org.jdom.Element; 
import org.jdom.output.Format; 
import org.jdom.output.XMLOutputter; 
  
public class JDomTest1 
{ 
  public static void main(String[] args) throws Exception 
  { 
    Document document = new Document(); 
  
    Element root = new Element("root"); 
  
    document.addContent(root); 
  
    Comment comment = new Comment("This is my comments"); 
  
    root.addContent(comment); 
  
    Element e = new Element("hello"); 
  
    e.setAttribute("sohu", "www.sohu.com"); 
  
    root.addContent(e); 
  
    Element e2 = new Element("world"); 
  
    Attribute attr = new Attribute("test", "hehe"); 
  
    e2.setAttribute(attr); 
  
    e.addContent(e2); 
  
    e2.addContent(new Element("aaa").setAttribute("a", "b") 
        .setAttribute("x", "y").setAttribute("gg", "hh").setText("text content")); 
  
      
    Format format = Format.getPrettyFormat(); 
      
    format.setIndent("  "); 
//   format.setEncoding("gbk"); 
      
    XMLOutputter out = new XMLOutputter(format); 
  
    out.output(document, new FileWriter("jdom.xml")); 
      
  } 
}

JDOM parses xml

import java.io.File; 
import java.io.FileOutputStream; 
import java.util.List; 
  
import org.jdom.Attribute; 
import org.jdom.Document; 
import org.jdom.Element; 
import org.jdom.input.SAXBuilder; 
import org.jdom.output.Format; 
import org.jdom.output.XMLOutputter; 
  
public class JDomTest2 
{ 
  public static void main(String[] args) throws Exception 
  { 
    SAXBuilder builder = new SAXBuilder(); 
      
    Document doc = builder.build(new File("jdom.xml")); 
      
    Element element = doc.getRootElement(); 
      
    System.out.println(element.getName()); 
      
    Element hello = element.getChild("hello"); 
      
    System.out.println(hello.getText()); 
      
    List list = hello.getAttributes(); 
      
    for(int i = 0 ;i < list.size(); i++) 
    { 
      Attribute attr = (Attribute)list.get(i); 
        
      String attrName = attr.getName(); 
      String attrValue = attr.getValue(); 
        
      System.out.println(attrName + "=" + attrValue); 
    } 
      
    hello.removeChild("world"); 
      
    XMLOutputter out = new XMLOutputter(Format.getPrettyFormat().setIndent("  ")); 
      
      
    out.output(doc, new FileOutputStream("jdom2.xml"));    
      
  } 
}

Dom4j

import java.io.FileOutputStream; 
import java.io.FileWriter; 
  
import org.dom4j.Document; 
import org.dom4j.DocumentHelper; 
import org.dom4j.Element; 
import org.dom4j.io.OutputFormat; 
import org.dom4j.io.XMLWriter; 
  
public class Test1 
{ 
  public static void main(String[] args) throws Exception 
  { 
    // 创建文档并设置文档的根元素节点 ：第一种方式 
    // Document document = DocumentHelper.createDocument(); 
    // 
    // Element root = DocumentHelper.createElement("student"); 
    // 
    // document.setRootElement(root); 
  
    // 创建文档并设置文档的根元素节点 ：第二种方式 
    Element root = DocumentHelper.createElement("student"); 
    Document document = DocumentHelper.createDocument(root); 
  
    root.addAttribute("name", "zhangsan"); 
  
    Element helloElement = root.addElement("hello"); 
    Element worldElement = root.addElement("world"); 
  
    helloElement.setText("hello"); 
    worldElement.setText("world"); 
  
    helloElement.addAttribute("age", "20"); 
  
    XMLWriter xmlWriter = new XMLWriter(); 
    xmlWriter.write(document); 
      
    OutputFormat format = new OutputFormat("  ", true); 
      
    XMLWriter xmlWriter2 = new XMLWriter(new FileOutputStream("student2.xml"), format); 
    xmlWriter2.write(document); 
      
    XMLWriter xmlWriter3 = new XMLWriter(new FileWriter("student3.xml"), format); 
      
    xmlWriter3.write(document); 
    xmlWriter3.close(); 
  
  } 
}

import java.io.File; 
import java.util.Iterator; 
import java.util.List; 
  
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
  
import org.dom4j.Document; 
import org.dom4j.Element; 
import org.dom4j.io.DOMReader; 
import org.dom4j.io.SAXReader; 
  
public class Test2 
{ 
  public static void main(String[] args) throws Exception 
  { 
    SAXReader saxReader = new SAXReader(); 
      
    Document doc = saxReader.read(new File("student2.xml")); 
      
    Element root = doc.getRootElement(); 
      
    System.out.println("root element: " + root.getName()); 
      
    List childList = root.elements(); 
      
    System.out.println(childList.size()); 
      
    List childList2 = root.elements("hello"); 
      
    System.out.println(childList2.size()); 
      
    Element first = root.element("hello"); 
      
    System.out.println(first.attributeValue("age")); 
      
    for(Iterator iter = root.elementIterator(); iter.hasNext();) 
    { 
      Element e = (Element)iter.next(); 
        
      System.out.println(e.attributeValue("age")); 
    } 
      
    System.out.println("---------------------------"); 
      
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 
    DocumentBuilder db = dbf.newDocumentBuilder(); 
    org.w3c.dom.Document document = db.parse(new File("student2.xml")); 
      
    DOMReader domReader = new DOMReader(); 
      
    //将JAXP的Document转换为dom4j的Document 
    Document d = domReader.read(document); 
      
    Element rootElement = d.getRootElement(); 
      
    System.out.println(rootElement.getName()); 
  
  } 
}

import java.io.FileWriter; 
  
import org.jdom.Attribute; 
import org.jdom.Document; 
import org.jdom.Element; 
import org.jdom.output.Format; 
import org.jdom.output.XMLOutputter; 
  
public class Test3 
{ 
  public static void main(String[] args) throws Exception 
  { 
    Document document = new Document(); 
  
    Element root = new Element("联系人列表").setAttribute(new Attribute("公司", 
        "A集团")); 
  
    document.addContent(root); 
      
    Element contactPerson = new Element("联系人"); 
      
    root.addContent(contactPerson); 
  
    contactPerson 
        .addContent(new Element("姓名").setText("张三")) 
        .addContent(new Element("公司").setText("A公司")) 
        .addContent(new Element("电话").setText("021-55556666")) 
        .addContent( 
            new Element("地址") 
                .addContent(new Element("街道").setText("5街")) 
                .addContent(new Element("城市").setText("上海")) 
                .addContent(new Element("省份").setText("上海市"))); 
  
    XMLOutputter output = new XMLOutputter(Format.getPrettyFormat() 
        .setIndent("  ").setEncoding("gbk")); 
  
    output.output(document, new FileWriter("contact.xml")); 
  
  } 
}

For more summary of several ways of parsing XML in java, please pay attention to PHP Chinese website !

Statement：

The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Previous article：java uses xpath and dom4j to parse xmlNext article：java uses xpath and dom4j to parse xml

See more

Summary of several ways to parse XML in java

Related articles