lxml.pyfrom lxml import etreefrom io import StringIOtest_html = '''<html> <body> <div> <!-- 这里是注释 --> <h4>手机品牌商<span st......
bs4确实没这个好用,bs4的树太复杂lxml很好定位非常好详细解说在注释里面有了 1 #!/usr/bin/python3.4 2 # -*- coding: utf-8 -*- 3 4 from lxml import etree 5 import urllib.request 6 7 # ......