from pathlib import Pathfrom docx import Documentfrom openpyxl import Workbookimport rewb = Workbook()ws = wb.activews.append(["文件名","时间","单位名称","指导单位名称"])in_path = Path(r"D:\示例\指定word段落读取")out_path = in_path.joinpath("汇总表.xlsx")for file in in_path.glob("*.docx"): date, dw_a, dw_b = None, None, None name = f'=HYPERLINK("{file}","{file.stem}")' doc = Document(file) for para in doc.paragraphs: text = para.text if not text: continue if not date and (m_date:=re.search(r"\d{4}年\d{1,2}月\d{1,2}日",para.text)): date = m_date.group() if not dw_a and (m_a:=re.search(r"主办单位[::]\s*(.+)",para.text)): dw_a = m_a.group(1) if not dw_b and (m_b:=re.search(r"在([^在,。;;]+?)(?:指导|领导|支持|帮助|协调|协助)下",para.text)): dw_b = m_b.group(1) ws.append([name,date,dw_a,dw_b])wb.save(out_path)print(f"处理完成!文件已保存到: {out_path}")