代码语言:javascript复制
#! /etc/env/bin python3
#! *_* coding=utf8 *_*
from pathlib import Path
from docx import Document
import os
# 从word中导出图片
def extract_img_word(filename='',doc_path=''):
'''
docx文档其实也是一个zip压缩包,所以我们可以通过zip包解压它
也可以直接改文件后缀
'''
from zipfile import ZipFile
with ZipFile(filename) as zip_file:
for names in zip_file.namelist():
if names.startswith("word/media/image"):
zip_file.extract(names, doc_path)
'''
pip install python-docx
https://python-docx.readthedocs.io/en/latest/
'''
#创建文档
def createWord():
document = Document()
document.add_heading('Document Title', 0)
document.add_paragraph('A plain paragraph having some')
document.add_heading('Heading, level 1', level=1)
document.add_heading('Heading, level 1', level=2)
document.add_paragraph('以下段落需要删除')
document.add_paragraph('A plain paragraph')
document.add_paragraph('A plain paragraph 新段落')
document.add_heading('Heading, level 2', level=2)
document.save('H:/temp/test.docx')
createWord()
#删除指定段落
def delete_paragraph(paragraph):
p = paragraph._element
p.getparent().remove(p)
# p._p = p._element = None
paragraph._p = paragraph._element = None
def delWordContent(docx_file='',dest_file=''):
#读取文本
doc = Document(docx_file)
paragraphs = doc.paragraphs
i = 0
flag = False
for p in paragraphs:
i =1
#print(str(i))
#print(p.text)
if p.text.find('需要删除') > -1:
#print('找到了')
flag = True
if flag is True:
#print('deleting')
delete_paragraph(p)
if flag is True:
#保存为新文件
doc.save(dest_file)
delWordContent(docx_file='H:/temp/test.docx',dest_file='H:/temp/test-new.docx')
def testDel():
dest_dir = 'words'
for filename in Path('H:/').glob('*.docx'):
print(str(filename))
dest_file = str(filename.parent / f'{dest_dir}'/filename.name)
delWordContent(docx_file = str(filename), dest_file = dest_file)
os.remove(str(filename))