将xml文件转为txt文件

2020-09-17 10:22:38 浏览数 (1)

代码语言:javascript复制
import os
import re
import sys
import glob
import xml.etree.ElementTree as ET

def xml_to_txt(indir,outdir):

    os.chdir(indir)
    annotations = os.listdir('.')
    annotations = glob.glob(str(annotations) '*.xml')
    pat = re.compile('(?<=>).*?(?=<)')

    for i, file in enumerate(annotations):
        file_save = file.split('.')[0] '.txt'
        file_txt=os.path.join(outdir,file_save)
        f_w = open(file_txt,'w',encoding="utf-8")

        tree=ET.parse(file)
        root = tree.getroot()

        for obj in root.iter('PostItem'):
                current = list()
                for ele in obj.iter():
                    if "content" in ele.tag:
                        content = obj.find('content').text
                        if content:
                            content = re.sub(r'</?w [^>]*>','',content).replace("&nbsp;"," ").strip()
                            print(content)
                            f_w.write(content)
                            f_w.write("n")
                    if "caption" in ele.tag:
                        caption = obj.find('caption').text
                        if caption:
                            caption = re.sub(r'</?w [^>]*>','',caption).replace("&nbsp;"," ").strip()
                            f_w.write(caption)
                            f_w.write("n")
                            print(caption)

indir='E:Datademo-xml' #打开目录
outdir='E:Datademo-txt' #保存目录

xml_to_txt(indir,outdir)

0 人点赞