Heim >Backend-Entwicklung >Python-Tutorial >python解析xml模块封装代码

python解析xml模块封装代码

WBOYOriginal: 2016-06-06 11:29:201422Durchsuche

有如下的xml文件：

代码如下:

1
2

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse
class TestHandle(ContentHandler):
    def __init__(self, inlist):
        self.inlist = inlist

    def startElement(self,name,attrs):
        print 'name:',name, 'attrs:',attrs.keys()

    def endElement(self,name):
        print 'endname',name

    def characters(self,chars):
        print 'chars',chars
        self.inlist.append(chars)


if __name__ == '__main__':
    lt = []
    parse('test.xml', TestHandle(lt))
    print lt

结果：
[html] view plaincopy
name: root attrs: []
chars

name: childs attrs: []
chars

name: child attrs: [u'name']
chars 1
endname child
chars

name: child attrs: [u'value']
chars 2
endname child
chars

endname childs
chars

endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

方式2，python模块实现获取根节点，按需查找指定节点：

代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.dom import minidom
xmlstr = '''''

    /2/photos/square/type.xml
    21301
    auth faild!

'''
def doxml(xmlstr):
    dom = minidom.parseString(xmlstr)
    print 'Dom:'
    print dom.toxml()

    root = dom.firstChild
    print 'root:'
    print root.toxml()

    childs = root.childNodes
    for child in childs:
        print child.toxml()
        if child.nodeType == child.TEXT_NODE:
            pass
        else:
            print 'child node attribute name:', child.getAttribute('name')
            print 'child node name:', child.nodeName
            print 'child node len:',len(child.childNodes)
            print 'child data:',child.childNodes[0].data
            print '======================================='
            print 'more help info to see:'
            for med in dir(child):
                print help(med)


if __name__ == '__main__':
    doxml(xmlstr)

结果：
[html] view plaincopy
Dom:

    /2/photos/square/type.xml
    21301
    auth faild!

root:

    /2/photos/square/type.xml
    21301
    auth faild!

/2/photos/square/type.xml
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========

代码如下:

#!\urs\bin\env python
#encoding: utf-8
from xml.dom import minidom

class xmlwrite:
    def __init__(self, resultfile):
        self.resultfile = resultfile
        self.rootname = 'api'
        self.__create_xml_dom()

    def __create_xml_dom(self):
        xmlimpl = minidom.getDOMImplementation()
        self.dom = xmlimpl.createDocument(None, self.rootname, None)
        self.root = self.dom.documentElement

    def __get_spec_node(self, xpath):
        patharr = xpath.split(r'/')
        parentnode = self.root
        exist = 1
        for nodename in patharr:
            if nodename.strip() == '':
                continue
            if not exist:
                return None
            spcindex = nodename.find('[')
            if spcindex > -1:
                index = int(nodename[spcindex+1:-1])
            else:
                index = 0
            count = 0
            childs = parentnode.childNodes
            for child in childs:
                if child.nodeName == nodename[:spcindex]:
                    if count == index:
                        parentnode = child
                        exist = 1
                        break
                    count += 1
                    continue
                else:
                    exist = 0
        return parentnode


    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
        node = self.dom.createElement(nodename)
        if value:
            if CDATA:
                nodedata = self.dom.createCDATASection(value)
            else:
                nodedata = self.dom.createTextNode(value)
            node.appendChild(nodedata)
            if attribute and isinstance(attribute, dict):
                for key, value in attribute.items():
                    node.setAttribute(key, value)
        try:
            parentnode = self.__get_spec_node(parent)
        except:
            print 'Get parent Node Fail, Use the Root as parent Node'
            parentnode = self.root
        parentnode.appendChild(node)


    def write_start_time(self, time):
        self.write_node('/','StartTime', time)

    def write_end_time(self, time):
        self.write_node('/','EndTime', time)

    def write_pass_count(self, count):
        self.write_node('/','PassCount', count)

    def write_fail_count(self, count):
        self.write_node('/','FailCount', count)

    def write_case(self):
        self.write_node('/','Case', None)

    def write_case_no(self, index, value):
        self.write_node('/Case[%s]/' % index,'No', value)

    def write_case_url(self, index, value):
        self.write_node('/Case[%s]/' % index,'URL', value)

    def write_case_dbdata(self, index, value):
        self.write_node('/Case[%s]/' % index,'DBData', value)

    def write_case_apidata(self, index, value):
        self.write_node('/Case[%s]/' % index,'APIData', value)

    def write_case_dbsql(self, index, value):
        self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)

    def write_case_apixpath(self, index, value):
        self.write_node('/Case[%s]/' % index,'APIXPath', value)

    def save_xml(self):
        myfile = file(self.resultfile, 'w')
        self.dom.writexml(myfile, encoding='utf-8')
        myfile.close()

if __name__ == '__main__':
      xr = xmlwrite(r'D:\test.xml')
      xr.write_start_time('2223')
      xr.write_end_time('444')
      xr.write_pass_count('22')
      xr.write_fail_count('33')
      xr.write_case()
      xr.write_case()
      xr.write_case_no(0, '0')
      xr.write_case_url(0, 'http://www.google.com')
      xr.write_case_url(0, 'http://www.google.com')
      xr.write_case_dbsql(0, 'select * from ')
      xr.write_case_dbdata(0, 'dbtata')
      xr.write_case_apixpath(0, '/xpath')
      xr.write_case_apidata(0, 'apidata')
      xr.write_case_no(1, '1')
      xr.write_case_url(1, 'http://www.baidu.com')
      xr.write_case_url(1, 'http://www.baidu.com')
      xr.write_case_dbsql(1, 'select 1 from ')
      xr.write_case_dbdata(1, 'dbtata1')
      xr.write_case_apixpath(1, '/xpath1')
      xr.write_case_apidata(1, 'apidata1')
      xr.save_xml()

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。
比如：/root/child[1], 表示root的第2个child节点。

Stellungnahme：

Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn

Vorheriger Artikel：python基础教程之udp端口扫描Nächster Artikel：python list转dict示例分享

In Verbindung stehende Artikel

Mehr sehen