lxml的使用
使用
lxml
#!/usr/bin/env python3#-*- coding utf-8 -*-# https://lxml.de/tutorial.html lxml 文档from lxml import etreefrom copy import deepcopyroot = etree.Element("root")print(root.tag) # rootroot.append(etree.Element("child1"))child2 = etree.SubElement(root, "child2")child3 = etree.SubElement(root, "child3")# b'<root>\n <child1/>\n <child2/>\n <child3/>\n</root>\n'print(etree.tostring(root, pretty_print=True))child = root[0]print(child.tag) # child1print(len(root)) # 3print(root.index(root[1])) # 1# 插入root.insert(0, etree.Element("child0"))for child in root: print(child.tag)print(etree.iselement(root)) # 判断是否是一个elementprint(root is root[0].getparent()) # 判断 root 是 root[0]的父级elementprint(root[0] is root[1].getprevious()) # 上一个节点print(root[1] is root[0].getnext()) # 下一个节点temp = deepcopy(root[1]) # 复制节点print(temp.tag)root = etree.Element("root", id="root") # 设置属性print(etree.tostring(root))
简易的例子
#!/usr/bin/env python3#-*- coding utf-8 -*-from lxml import etreetext = '''<div> <ul> <li class="item1">1</li> <li class="item2">2</li> <li class="item3">3</li> </ul></div>'''html = etree.HTML(text) # string 转换为 lxml.etree._Elementresult = etree.tostring(html, encoding='utf-8')print(type(html))print(type(result))print(result.decode('utf-8'))print("======================")htmlEmt = etree.parse('./test/text.xml') # 读文件转换为lxml.etree._Elementprint(type(htmlEmt))result = etree.tostring(htmlEmt, pretty_print=True)print(result)# 获取所有的li元素result = htmlEmt.xpath('//li')for r in result: print(etree.tostring(r)) # 输出element string print(r.text) # 输出textprint("======================")# 通过id获取元素# // 表示从任意位置# li 表示元素名# [@id='root'] 表示id为root的result = htmlEmt.xpath("//li[@id='root']")print(type(result))print(len(result))for r in result: print(etree.tostring(r)) # 输出element string print(r.text) # 输出text