python-docx-template
基于Jinja2模板引擎,为Word文档提供了强大的模板功能:
功能 | python-docx-template | python-docx | docxtpl |
---|---|---|---|
设计方式 | 模板驱动 | 代码驱动 | 模板驱动 |
学习曲线 | 简单 | 中等 | 中等 |
格式控制 | 直接在Word中完成 | 需要代码实现 | 直接在Word中完成 |
变量替换 | ✓ | 需自行实现 | ✓ |
条件逻辑 | ✓ | 需自行实现 | ✓ |
复杂结构 | 表格、列表、图片 | 完全可控 | 有限支持 |
适用场景 | 批量生成标准化文档 | 精确控制文档结构 | 简单文档生成 |
# 安装核心库
pip install docxtpl
# 安装可选依赖(图像处理)
pip install pillow
# 安装可选依赖(Excel数据导入)
pip install pandas openpyxl
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm, Inches, Pt
import jinja2
创建一个模板需要在Word文档中插入特定格式的变量:
{{ variable_name }}
模板示例:
尊敬的{{ name }}:
感谢您对{{ company }}的关注与支持。我们已收到您于{{ date }}提交的申请,
申请编号为{{ application_id }}。我们将在{{ response_days }}个工作日内给予回复。
如有疑问,请联系:{{ contact_email }}
此致
敬礼
{{ company }}
{{ current_date }}
from docxtpl import DocxTemplate
from datetime import datetime
# 加载模板
doc = DocxTemplate("template.docx")
# 准备上下文数据
context = {
"name": "张三",
"company": "ABC科技有限公司",
"date": "2023年5月15日",
"application_id": "APP-2023-0587",
"response_days": 3,
"contact_email": "[email protected]",
"current_date": datetime.now().strftime("%Y年%m月%d日")
}
# 渲染文档
doc.render(context)
# 保存生成的文档
doc.save("generated_document.docx")
from docxtpl import DocxTemplate
import jinja2
# 创建自定义过滤器
def currency_format(value):
"""将数字格式化为货币形式"""
return "{:,.2f}".format(value)
def date_format(value, format="%Y年%m月%d日"):
"""格式化日期"""
if isinstance(value, str):
from datetime import datetime
try:
value = datetime.strptime(value, "%Y-%m-%d")
except:
return value
return value.strftime(format)
# 设置自定义Jinja2环境
doc = DocxTemplate("invoice_template.docx")
jinja_env = jinja2.Environment(autoescape=True)
jinja_env.filters["currency"] = currency_format
jinja_env.filters["date_format"] = date_format
# 使用自定义环境
doc.render(context, jinja_env)
doc.save("generated_invoice.docx")
from docxtpl import DocxTemplate, RichText
doc = DocxTemplate("richtext_template.docx")
# 创建富文本对象
rt = RichText()
rt.add('这是', style="Normal")
rt.add('红色', color='FF0000')
rt.add('和', style="Normal")
rt.add('粗体', bold=True)
rt.add('文本', style="Normal")
rt.add(',还有', style="Normal")
rt.add('斜体', italic=True)
rt.add('效果', style="Normal")
# 添加到上下文
context = {
"rich_paragraph": rt
}
# 渲染文档
doc.render(context)
doc.save("richtext_output.docx")
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm
doc = DocxTemplate("image_template.docx")
# 准备图片
logo_image = InlineImage(doc, "company_logo.png", width=Mm(30))
signature = InlineImage(doc, "signature.png", width=Mm(50))
# 添加到上下文
context = {
"company_name": "XYZ集团",
"logo": logo_image,
"manager_name": "李经理",
"signature": signature
}
# 渲染文档
doc.render(context)
doc.save("with_images.docx")
from docxtpl import DocxTemplate
doc = DocxTemplate("table_template.docx")
# 准备表格数据
products = [
{"id": "P001", "name": "笔记本电脑", "price": 5999.00, "quantity": 2},
{"id": "P002", "name": "无线鼠标", "price": 99.00, "quantity": 5},
{"id": "P003", "name": "显示器", "price": 1299.00, "quantity": 1},
{"id": "P004", "name": "机械键盘", "price": 399.00, "quantity": 3}
]
# 计算每个产品的小计和总计
for product in products:
product["subtotal"] = product["price"] * product["quantity"]
total = sum(product["subtotal"] for product in products)
# 添加到上下文
context = {
"customer": "王先生",
"order_id": "ORD-2023-0892",
"products": products,
"total": total
}
# 渲染文档
doc.render(context)
doc.save("invoice_with_table.docx")
在Word模板中使用条件语句:
尊敬的客户:
{% if membership == "premium" %}
感谢您成为我们的高级会员,您将享受专属VIP服务。
{% elif membership == "standard" %}
感谢您成为我们的标准会员,您将享受优质服务。
{% else %}
感谢您的惠顾,希望您体验愉快。
{% endif %}
{% if balance > 1000 %}
您当前账户余额充足,可以享受额外优惠。
{% endif %}
Python代码:
from docxtpl import DocxTemplate
doc = DocxTemplate("conditional_template.docx")
# 准备不同上下文数据
premium_context = {
"customer_name": "张女士",
"membership": "premium",
"balance": 1500.00
}
standard_context = {
"customer_name": "李先生",
"membership": "standard",
"balance": 800.00
}
# 渲染不同文档
doc.render(premium_context)
doc.save("premium_letter.docx")
# 重新加载模板(避免重复渲染)
doc = DocxTemplate("conditional_template.docx")
doc.render(standard_context)
doc.save("standard_letter.docx")
在Word模板中使用循环:
交易记录摘要:
{% for transaction in transactions %}
- {{ transaction.date }} | {{ transaction.type }} | {{ transaction.amount }}元
{% endfor %}
{% if transactions|length > 5 %}
以上仅显示最近5笔交易,查看完整记录请登录网站。
{% endif %}
Python代码:
from docxtpl import DocxTemplate
from datetime import datetime, timedelta
doc = DocxTemplate("loop_template.docx")
# 生成交易记录数据
transactions = []
today = datetime.now()
for i in range(7):
tdate = today - timedelta(days=i)
ttype = "存入" if i % 3 == 0 else "支出"
tamount = 1000 + i * 100 if ttype == "存入" else 500 + i * 50
transactions.append({
"date": tdate.strftime("%Y-%m-%d"),
"type": ttype,
"amount": tamount
})
# 添加到上下文
context = {
"customer_name": "王女士",
"account_id": "6225 **** **** 3752",
"transactions": transactions
}
# 渲染文档
doc.render(context)
doc.save("transaction_report.docx")
在Word模板中使用计算和表达式:
账单摘要:
基础费用:{{ base_price }}元
附加服务:{{ additional_services }}元
优惠折扣:{{ discount }}元
应付金额:{{ base_price + additional_services - discount }}元
{% if (base_price + additional_services - discount) > 2000 %}
您的消费已达到高级会员标准,可享受后续服务9折优惠。
{% endif %}
Python代码:
from docxtpl import DocxTemplate
doc = DocxTemplate("expression_template.docx")
# 准备上下文数据
context = {
"customer_id": "CID-38291",
"base_price": 1299.00,
"additional_services": 899.00,
"discount": 199.00
}
# 渲染文档
doc.render(context)
doc.save("bill_with_calculations.docx")
from docxtpl import DocxTemplate
doc = DocxTemplate("dynamic_table_template.docx")
# 动态生成学生成绩数据
students = []
for i in range(1, 11):
math_score = 70 + (i % 3) * 10
english_score = 75 + (i % 4) * 8
science_score = 80 + (i % 5) * 5
total = math_score + english_score + science_score
average = total / 3
students.append({
"id": f"S{2023100 + i}",
"name": f"学生{i}",
"math": math_score,
"english": english_score,
"science": science_score,
"total": total,
"average": round(average, 1),
"pass": "通过" if average >= 60 else "未通过"
})
# 添加到上下文
context = {
"class_name": "高一(3)班",
"term": "2023年春季学期",
"exam_date": "2023年6月15日",
"students": students
}
# 渲染文档
doc.render(context)
doc.save("class_report_card.docx")
from docxtpl import DocxTemplate
doc = DocxTemplate("nested_table_template.docx")
# 准备部门和员工数据
departments = [
{
"name": "研发部",
"manager": "张工",
"employees": [
{"id": "E001", "name": "李明", "position": "高级工程师", "salary": 18000},
{"id": "E002", "name": "王芳", "position": "工程师", "salary": 15000},
{"id": "E003", "name": "赵强", "position": "工程师", "salary": 14000}
]
},
{
"name": "市场部",
"manager": "刘总",
"employees": [
{"id": "E010", "name": "张丽", "position": "市场经理", "salary": 16000},
{"id": "E011", "name": "周红", "position": "销售主管", "salary": 13000}
]
}
]
# 计算部门统计信息
for dept in departments:
dept["employee_count"] = len(dept["employees"])
dept["avg_salary"] = sum(e["salary"] for e in dept["employees"]) / dept["employee_count"]
# 添加到上下文
context = {
"company_name": "ABC科技有限公司",
"report_date": "2023年7月1日",
"departments": departments
}
# 渲染文档
doc.render(context)
doc.save("company_structure_report.docx")
from docxtpl import DocxTemplate, RichText
doc = DocxTemplate("styled_table_template.docx")
# 准备数据
sales_data = [
{"region": "东区", "q1": 1250000, "q2": 1380000, "q3": 1420000, "q4": 1600000},
{"region": "西区", "q1": 980000, "q2": 1040000, "q3": 1100000, "q4": 1250000},
{"region": "南区", "q1": 1100000, "q2": 1150000, "q3": 1300000, "q4": 1450000},
{"region": "北区", "q1": 850000, "q2": 920000, "q3": 980000, "q4": 1050000}
]
# 计算同比增长并添加样式
for region in sales_data:
# 计算年度总销售额
region["total"] = region["q1"] + region["q2"] + region["q3"] + region["q4"]
# 创建富文本对象并添加颜色(根据销售额)
for quarter in ["q1", "q2", "q3", "q4", "total"]:
value = region[quarter]
rt = RichText()
if quarter == "total":
# 总销售额显示为粗体
rt.add(f"{value:,}", bold=True)
else:
# 季度销售额根据数值显示不同颜色
if value >= 1300000:
rt.add(f"{value:,}", color="007700") # 绿色(好)
elif value >= 1000000:
rt.add(f"{value:,}", color="000000") # 黑色(一般)
else:
rt.add(f"{value:,}", color="CC0000") # 红色(差)
region[f"{quarter}_styled"] = rt
# 添加到上下文
context = {
"year": "2023",
"report_title": "区域销售业绩分析",
"sales_data": sales_data
}
# 渲染文档
doc.render(context)
doc.save("sales_performance_report.docx")
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm
from datetime import datetime
import os
import pandas as pd
def generate_contract(customer_data, template_path="contract_template.docx", output_dir="generated_contracts"):
"""根据客户数据生成合同"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 加载合同模板
doc = DocxTemplate(template_path)
# 准备上下文数据
context = customer_data.copy()
# 添加日期格式化
context["contract_date"] = datetime.now().strftime("%Y年%m月%d日")
context["start_date"] = datetime.strptime(context["start_date"], "%Y-%m-%d").strftime("%Y年%m月%d日")
context["end_date"] = datetime.strptime(context["end_date"], "%Y-%m-%d").strftime("%Y年%m月%d日")
# 计算合同金额中文大写
from num2chinese import num2chinese
context["amount_chinese"] = num2chinese(context["amount"])
# 添加签名图片
if "signature_path" in context and os.path.exists(context["signature_path"]):
context["signature"] = InlineImage(doc, context["signature_path"], width=Mm(30))
# 添加公司印章图片
if "stamp_path" in context and os.path.exists(context["stamp_path"]):
context["company_stamp"] = InlineImage(doc, context["stamp_path"], width=Mm(40))
# 渲染文档
doc.render(context)
# 构建输出文件名
output_filename = f"合同_{context['customer_name']}_{context['contract_id']}.docx"
output_path = os.path.join(output_dir, output_filename)
# 保存生成的合同
doc.save(output_path)
return output_path
# 从Excel导入客户数据
def generate_contracts_from_excel(excel_path, template_path, output_dir):
"""从Excel批量生成合同"""
# 读取Excel数据
df = pd.read_excel(excel_path)
# 转换为字典列表
customers = df.to_dict(orient='records')
# 生成的合同路径列表
generated_files = []
# 批量生成合同
for customer in customers:
output_path = generate_contract(customer, template_path, output_dir)
generated_files.append(output_path)
print(f"已生成合同: {output_path}")
return generated_files
# 使用示例
generated_contracts = generate_contracts_from_excel(
"customer_data.xlsx",
"contract_template.docx",
"output_contracts"
)
print(f"成功生成 {len(generated_contracts)} 份合同文档")
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm
import os
import pandas as pd
from datetime import datetime
import qrcode
from PIL import Image
def generate_certificate(participant_data, template_path="certificate_template.docx",
output_dir="generated_certificates", add_qr=True):
"""生成参与者证书"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 加载证书模板
doc = DocxTemplate(template_path)
# 准备上下文数据
context = participant_data.copy()
# 格式化日期
issue_date = datetime.now().strftime("%Y年%m月%d日")
context["issue_date"] = issue_date
# 生成证书验证二维码
if add_qr:
qr_data = f"证书ID: {context['certificate_id']}\n"
qr_data += f"姓名: {context['name']}\n"
qr_data += f"课程: {context['course_name']}\n"
qr_data += f"日期: {issue_date}\n"
qr_data += f"验证网址: https://example.com/verify/{context['certificate_id']}"
# 生成二维码图片
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
qr.add_data(qr_data)
qr.make(fit=True)
qr_img = qr.make_image(fill_color="black", back_color="white")
qr_path = os.path.join(output_dir, f"qr_{context['certificate_id']}.png")
qr_img.save(qr_path)
# 添加二维码到上下文
context["qr_code"] = InlineImage(doc, qr_path, width=Mm(25))
# 添加签名图片
if "signature_path" in context and os.path.exists(context["signature_path"]):
context["signature"] = InlineImage(doc, context["signature_path"], width=Mm(40))
# 添加证书图章
if "seal_path" in context and os.path.exists(context["seal_path"]):
context["seal"] = InlineImage(doc, context["seal_path"], width=Mm(35))
# 渲染文档
doc.render(context)
# 构建输出文件名
output_filename = f"证书_{context['name']}_{context['certificate_id']}.docx"
output_path = os.path.join(output_dir, output_filename)
# 保存生成的证书
doc.save(output_path)
# 删除临时生成的二维码图片
if add_qr and os.path.exists(qr_path):
os.remove(qr_path)
return output_path
# 从Excel批量生成证书
def generate_certificates_from_excel(excel_path, template_path, output_dir):
"""从Excel批量生成证书"""
# 读取Excel数据
df = pd.read_excel(excel_path)
# 转换为字典列表
participants = df.to_dict(orient='records')
# 生成的证书路径列表
generated_files = []
# 批量生成证书
for participant in participants:
output_path = generate_certificate(participant, template_path, output_dir)
generated_files.append(output_path)
print(f"已生成证书: {output_path}")
return generated_files
# 使用示例
generated_certificates = generate_certificates_from_excel(
"course_participants.xlsx",
"certificate_template.docx",
"output_certificates"
)
print(f"成功生成 {len(generated_certificates)} 份证书")
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm, Cm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from datetime import datetime
def generate_sales_report(data_file, template_path, output_path, company_logo=None):
"""生成销售数据分析报告"""
# 加载销售数据
sales_df = pd.read_excel(data_file)
# 加载报告模板
doc = DocxTemplate(template_path)
# 创建图表目录
charts_dir = "temp_charts"
os.makedirs(charts_dir, exist_ok=True)
# 准备上下文数据
context = {
"report_title": "月度销售业绩分析报告",
"report_date": datetime.now().strftime("%Y年%m月%d日"),
"period": "2023年第二季度",
}
# 添加公司标志
if company_logo and os.path.exists(company_logo):
context["company_logo"] = InlineImage(doc, company_logo, width=Mm(40))
# 1. 销售总览数据
total_sales = sales_df['amount'].sum()
avg_sales = sales_df['amount'].mean()
max_sales = sales_df['amount'].max()
transaction_count = len(sales_df)
context.update({
"total_sales": f"{total_sales:,.2f}",
"avg_sale": f"{avg_sales:,.2f}",
"max_sale": f"{max_sales:,.2f}",
"transaction_count": transaction_count
})
# 2. 按产品类别统计
category_sales = sales_df.groupby('category')['amount'].sum().reset_index()
category_sales = category_sales.sort_values('amount', ascending=False)
# 生成产品类别饼图
plt.figure(figsize=(8, 6))
plt.pie(category_sales['amount'], labels=category_sales['category'],
autopct='%1.1f%%', startangle=90)
plt.axis('equal')
plt.title('各产品类别销售占比')
category_chart_path = os.path.join(charts_dir, "category_sales.png")
plt.savefig(category_chart_path, dpi=300, bbox_inches='tight')
plt.close()
# 添加到上下文
context["category_chart"] = InlineImage(doc, category_chart_path, width=Cm(12))
# 准备产品类别数据表格
context["categories"] = category_sales.to_dict('records')
# 3. 按销售人员统计
sales_by_person = sales_df.groupby('salesperson')['amount'].agg(['sum', 'count']).reset_index()
sales_by_person = sales_by_person.sort_values('sum', ascending=False)
sales_by_person.columns = ['salesperson', 'total_amount', 'transactions']
# 计算人均销售额
sales_by_person['avg_amount'] = sales_by_person['total_amount'] / sales_by_person['transactions']
# 生成销售人员柱状图
plt.figure(figsize=(10, 6))
plt.bar(sales_by_person['salesperson'], sales_by_person['total_amount'])
plt.xlabel('销售人员')
plt.ylabel('销售额')
plt.title('各销售人员业绩对比')
plt.xticks(rotation=45)
plt.tight_layout()
salesperson_chart_path = os.path.join(charts_dir, "salesperson_sales.png")
plt.savefig(salesperson_chart_path, dpi=300)
plt.close()
# 添加到上下文
context["salesperson_chart"] = InlineImage(doc, salesperson_chart_path, width=Cm(14))
# 准备销售人员数据表格
sales_by_person['total_amount'] = sales_by_person['total_amount'].apply(lambda x: f"{x:,.2f}")
sales_by_person['avg_amount'] = sales_by_person['avg_amount'].apply(lambda x: f"{x:,.2f}")
context["salespeople"] = sales_by_person.to_dict('records')
# 4. 按地区统计
region_sales = sales_df.groupby('region')['amount'].sum().reset_index()
# 生成地区销售折线图
plt.figure(figsize=(10, 6))
plt.plot(region_sales['region'], region_sales['amount'], marker='o', linestyle='-')
plt.xlabel('销售区域')
plt.ylabel('销售额')
plt.title('各地区销售业绩')
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
region_chart_path = os.path.join(charts_dir, "region_sales.png")
plt.savefig(region_chart_path, dpi=300)
plt.close()
# 添加到上下文
context["region_chart"] = InlineImage(doc, region_chart_path, width=Cm(14))
# 渲染文档
doc.render(context)
doc.save(output_path)
# 清理临时图表文件
for chart_file in [category_chart_path, salesperson_chart_path, region_chart_path]:
if os.path.exists(chart_file):
os.remove(chart_file)
return output_path
# 使用示例
report_file = generate_sales_report(
"sales_data.xlsx",
"report_template.docx",
"Q2_Sales_Report.docx",
"company_logo.png"
)
print(f"报告已生成: {report_file}")
import time
import concurrent.futures
from docxtpl import DocxTemplate
import os
import pandas as pd
def generate_document(template_path, context, output_path):
"""生成单个文档"""
try:
# 每次创建新的DocxTemplate实例避免渲染冲突
doc = DocxTemplate(template_path)
doc.render(context)
doc.save(output_path)
return (True, output_path)
except Exception as e:
return (False, str(e))
def batch_generate_documents_parallel(data_list, template_path, output_dir, max_workers=4):
"""并行批量生成文档"""
start_time = time.time()
os.makedirs(output_dir, exist_ok=True)
# 预处理数据
contexts = []
output_paths = []
for i, data in enumerate(data_list):
# 构建输出路径
output_filename = f"document_{i+1}.docx"
if "id" in data:
output_filename = f"document_{data['id']}.docx"
output_path = os.path.join(output_dir, output_filename)
contexts.append(data)
output_paths.append(output_path)
# 并行处理
results = []
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务
future_to_idx = {
executor.submit(generate_document, template_path, context, output_path): i
for i, (context, output_path) in enumerate(zip(contexts, output_paths))
}
# 获取结果
for future in concurrent.futures.as_completed(future_to_idx):
idx = future_to_idx[future]
try:
success, result = future.result()
results.append({
"index": idx,
"success": success,
"result": result
})
except Exception as e:
results.append({
"index": idx,
"success": False,
"result": str(e)
})
# 计算统计信息
successful = sum(1 for r in results if r["success"])
elapsed_time = time.time() - start_time
return {
"total": len(data_list),
"successful": successful,
"failed": len(data_list) - successful,
"elapsed_time": elapsed_time,
"documents_per_second": len(data_list) / elapsed_time,
"details": results
}
# 使用示例
# 假设我们有一个大的数据集要生成文档
data_df = pd.read_csv("large_dataset.csv")
data_list = data_df.to_dict('records')
result = batch_generate_documents_parallel(
data_list,
"template.docx",
"output_batch",
max_workers=8 # 根据CPU核心数调整
)
print(f"批量处理完成:")
print(f"总数: {result['total']}")
print(f"成功: {result['successful']}")
print(f"失败: {result['failed']}")
print(f"总耗时: {result['elapsed_time']:.2f}秒")
print(f"处理速度: {result['documents_per_second']:.2f}文档/秒")
import os
import hashlib
from docxtpl import DocxTemplate
import pickle
import time
class TemplateManager:
"""模板管理器,支持缓存和预处理"""
def __init__(self, cache_dir="template_cache"):
self.cache_dir = cache_dir
os.makedirs(cache_dir, exist_ok=True)
self.templates = {}
def get_template(self, template_path):
"""获取模板,优先使用缓存"""
# 计算模板文件的哈希值
file_hash = self._get_file_hash(template_path)
cache_key = f"{os.path.basename(template_path)}_{file_hash}"
# 检查内存缓存
if cache_key in self.templates:
return self.templates[cache_key]
# 检查磁盘缓存
cache_path = os.path.join(self.cache_dir, f"{cache_key}.pkl")
if os.path.exists(cache_path):
try:
with open(cache_path, 'rb') as f:
template = pickle.load(f)
# 保存到内存缓存
self.templates[cache_key] = template
return template
except:
# 缓存加载失败,重新创建
pass
# 创建新模板
template = DocxTemplate(template_path)
# 保存到缓存
self.templates[cache_key] = template
try:
with open(cache_path, 'wb') as f:
pickle.dump(template, f)
except:
# 缓存保存失败,但仍可使用模板
pass
return template
def _get_file_hash(self, file_path):
"""计算文件的MD5哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def clear_cache(self):
"""清除缓存"""
self.templates.clear()
for file in os.listdir(self.cache_dir):
if file.endswith('.pkl'):
os.remove(os.path.join(self.cache_dir, file))
# 使用示例
template_manager = TemplateManager()
# 性能测试函数
def test_template_performance(template_path, iterations=100):
"""测试模板加载性能"""
# 直接加载
start_time = time.time()
for _ in range(iterations):
DocxTemplate(template_path)
direct_time = time.time() - start_time
# 使用模板管理器
start_time = time.time()
for _ in range(iterations):
template_manager.get_template(template_path)
managed_time = time.time() - start_time
return {
"direct_load_time": direct_time,
"managed_load_time": managed_time,
"speedup_factor": direct_time / managed_time if managed_time > 0 else float('inf')
}
# 执行性能测试
perf_results = test_template_performance("complex_template.docx", iterations=100)
print(f"直接加载时间: {perf_results['direct_load_time']:.4f}秒")
print(f"管理器加载时间: {perf_results['managed_load_time']:.4f}秒")
print(f"速度提升: {perf_results['speedup_factor']:.2f}倍")
from docxtpl import DocxTemplate
import logging
import os
import traceback
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
filename='document_generation.log'
)
logger = logging.getLogger('docxtpl_manager')
class DocumentGenerator:
"""带有健壮错误处理的文档生成器"""
def __init__(self, template_path, output_dir="output"):
self.template_path = template_path
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
self.error_count = 0
self.success_count = 0
def generate(self, context, output_filename=None, retries=2):
"""生成文档,带有重试机制"""
if output_filename is None:
output_filename = f"document_{id(context)}.docx"
output_path = os.path.join(self.output_dir, output_filename)
# 尝试生成文档
for attempt in range(retries + 1):
try:
doc = DocxTemplate(self.template_path)
doc.render(context)
doc.save(output_path)
self.success_count += 1
logger.info(f"成功生成文档: {output_path}")
return (True, output_path)
except Exception as e:
if attempt < retries:
logger.warning(f"文档生成失败,正在重试({attempt+1}/{retries}): {output_path}")
continue
# 所有重试都失败
self.error_count += 1
error_detail = traceback.format_exc()
logger.error(f"文档生成失败: {output_path}\n错误: {e}\n{error_detail}")
# 创建一个错误报告文档
try:
self._generate_error_report(context, e, output_filename)
except:
pass
return (False, str(e))
def _generate_error_report(self, context, error, original_filename):
"""生成错误报告文档"""
try:
# 创建一个简单的错误报告模板
error_template = DocxTemplate("error_template.docx")
if not os.path.exists("error_template.docx"):
# 如果错误模板不存在,创建一个
from docx import Document
doc = Document()
doc.add_heading("文档生成错误报告", 0)
doc.add_paragraph("原始文件名: {{original_filename}}")
doc.add_paragraph("错误信息: {{error_message}}")
doc.add_paragraph("上下文数据:")
doc.add_paragraph("{{context_data}}")
doc.save("error_template.docx")
error_template = DocxTemplate("error_template.docx")
# 准备错误报告上下文
error_context = {
"original_filename": original_filename,
"error_message": str(error),
"context_data": str(context)
}
# 渲染错误报告
error_template.render(error_context)
# 保存错误报告
error_filename = f"ERROR_{original_filename}"
error_path = os.path.join(self.output_dir, error_filename)
error_template.save(error_path)
logger.info(f"已生成错误报告: {error_path}")
except Exception as e:
logger.error(f"生成错误报告失败: {str(e)}")
def get_stats(self):
"""获取生成统计信息"""
return {
"success": self.success_count,
"error": self.error_count,
"total": self.success_count + self.error_count
}
# 使用示例
generator = DocumentGenerator("invoice_template.docx")
# 生成一批文档
contexts = [
{"invoice_id": "INV-001", "customer": "张三", "items": [...]},
{"invoice_id": "INV-002", "customer": "李四", "items": [...]},
# 故意制造一个错误的上下文
{"invoice_id": "INV-003", "customer": None, "items": None},
]
for context in contexts:
invoice_id = context.get("invoice_id", "unknown")
generator.generate(context, f"{invoice_id}.docx")
# 打印统计信息
stats = generator.get_stats()
print(f"文档生成统计:")
print(f"成功: {stats['success']}")
print(f"失败: {stats['error']}")
print(f"总计: {stats['total']}")
from docxtpl import DocxTemplate, RichText, InlineImage
from docx.shared import Mm
import os
# 模板设计最佳实践 - 子模板与结构化设计
class ModularTemplateSystem:
"""模块化模板系统,支持子模板组合"""
def __init__(self, templates_dir="templates"):
self.templates_dir = templates_dir
self.loaded_templates = {}
def get_template(self, template_name):
"""加载模板"""
template_path = os.path.join(self.templates_dir, f"{template_name}.docx")
if template_name not in self.loaded_templates:
if not os.path.exists(template_path):
raise FileNotFoundError(f"模板不存在: {template_path}")
self.loaded_templates[template_name] = DocxTemplate(template_path)
return self.loaded_templates[template_name]
def render_subtemplate(self, template_name, context):
"""渲染子模板并返回渲染后的内容"""
template = self.get_template(template_name)
template.render(context)
# 生成临时文件
temp_file = os.path.join(self.templates_dir, f"temp_{template_name}_{id(context)}.docx")
template.save(temp_file)
return temp_file
def generate_document(self, main_template, context, output_path, sections=None):
"""
生成主文档,可选择性包含子模板
参数:
main_template: 主模板名称
context: 上下文数据
output_path: 输出文件路径
sections: 要包含的子模板列表,格式为[{"name": "模板名", "context": {上下文}}]
"""
# 准备完整上下文
full_context = context.copy()
# 处理子模板部分
if sections:
subcontent = []
for section in sections:
if "name" in section and "context" in section:
section_context = section["context"]
section_name = section["name"]
# 渲染子模板
rendered_section = self.render_subtemplate(section_name, section_context)
# 添加到子内容列表
subcontent.append({
"name": section_name,
"file": rendered_section
})
# 添加子内容到上下文
full_context["subcontent"] = subcontent
# 渲染主模板
main = self.get_template(main_template)
main.render(full_context)
main.save(output_path)
# 清理临时文件
if sections:
for section in subcontent:
try:
os.remove(section["file"])
except:
pass
return output_path
# 使用示例
template_system = ModularTemplateSystem("report_templates")
# 生成报告
report = template_system.generate_document(
"main_report",
{"title": "季度业务报告", "author": "张总监", "date": "2023年8月15日"},
"quarterly_report.docx",
sections=[
{
"name": "executive_summary",
"context": {"highlights": ["销售增长22%", "新增客户45家", "产品线扩展"]}
},
{
"name": "financial_results",
"context": {"revenue": 1250000, "expenses": 780000, "profit": 470000}
},
{
"name": "market_analysis",
"context": {"market_share": "23%", "competitors": ["A公司", "B公司", "C公司"]}
}
]
)
print(f"模块化报告已生成: {report}")
from docxtpl import DocxTemplate, RichText
from docx.shared import Pt, RGBColor
def conditional_format_demo():
"""演示条件格式控制技巧"""
doc = DocxTemplate("conditional_format_template.docx")
# 准备数据 - 财务报表项目
financial_items = [
{"name": "销售收入", "current": 1250000, "previous": 980000},
{"name": "销售成本", "current": 620000, "previous": 510000},
{"name": "毛利润", "current": 630000, "previous": 470000},
{"name": "营业费用", "current": 280000, "previous": 250000},
{"name": "税前利润", "current": 350000, "previous": 220000},
{"name": "税费", "current": 87500, "previous": 55000},
{"name": "净利润", "current": 262500, "previous": 165000},
]
# 计算同比变化
for item in financial_items:
if item["previous"] != 0:
item["change"] = (item["current"] - item["previous"]) / item["previous"] * 100
else:
item["change"] = 100 # 避免除以零
# 使用RichText为变化添加条件格式
for item in financial_items:
# 格式化数字
item["current_fmt"] = f"{item['current']:,}"
item["previous_fmt"] = f"{item['previous']:,}"
# 创建带条件格式的变化百分比
change = item["change"]
change_rt = RichText()
# 根据变化值添加不同颜色
if change > 15:
# 显著增长 - 绿色粗体
change_rt.add(f"+{change:.1f}%", color="008800", bold=True)
elif change > 5:
# 良好增长 - 绿色
change_rt.add(f"+{change:.1f}%", color="008800")
elif change > -5:
# 基本持平 - 黑色
prefix = "+" if change > 0 else ""
change_rt.add(f"{prefix}{change:.1f}%", color="000000")
elif change > -15:
# 下降 - 红色
change_rt.add(f"{change:.1f}%", color="CC0000")
else:
# 显著下降 - 红色粗体
change_rt.add(f"{change:.1f}%", color="CC0000", bold=True)
item["change_rt"] = change_rt
# 准备上下文
context = {
"report_title": "财务业绩同比分析",
"period": "2023年第二季度",
"financial_items": financial_items,
"total_revenue": f"{financial_items[0]['current']:,}",
"total_profit": f"{financial_items[6]['current']:,}",
}
# 渲染文档
doc.render(context)
doc.save("conditional_format_report.docx")
return "conditional_format_report.docx"
# 执行演示
formatted_report = conditional_format_demo()
print(f"条件格式报告已生成: {formatted_report}")
from docxtpl import DocxTemplate
import json
import os
import re
class TemplateDebugger:
"""模板调试工具"""
def __init__(self, template_path):
self.template_path = template_path
self.template = DocxTemplate(template_path)
self.variables = []
self.conditions = []
self.loops = []
self.scan_template()
def scan_template(self):
"""扫描模板,提取变量、条件和循环"""
# 提取文档中的所有文本
text_content = ""
for paragraph in self.template.docx.paragraphs:
text_content += paragraph.text + "\n"
# 扫描表格内容
for table in self.template.docx.tables:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text_content += paragraph.text + "\n"
# 提取变量
var_pattern = r'\{\{\s*([a-zA-Z0-9_\.]+)\s*\}\}'
self.variables = list(set(re.findall(var_pattern, text_content)))
# 提取条件语句
if_pattern = r'\{%\s*if\s+(.+?)\s*%\}'
self.conditions = list(set(re.findall(if_pattern, text_content)))
# 提取循环语句
for_pattern = r'\{%\s*for\s+([a-zA-Z0-9_]+)\s+in\s+([a-zA-Z0-9_\.]+)\s*%\}'
self.loops = re.findall(for_pattern, text_content)
def generate_sample_context(self):
"""生成示例上下文数据"""
context = {}
# 为普通变量生成示例值
for var in self.variables:
if '.' in var:
# 处理嵌套属性
parts = var.split('.')
current = context
for i, part in enumerate(parts):
if i == len(parts) - 1:
# 最后一个部分
current[part] = f"Sample_{part}"
else:
# 创建嵌套字典
if part not in current:
current[part] = {}
current = current[part]
else:
context[var] = f"Sample_{var}"
# 为循环变量生成示例列表
for loop_var, collection in self.loops:
if '.' in collection:
# 处理嵌套属性
parts = collection.split('.')
current = context
for i, part in enumerate(parts):
if i == len(parts) - 1:
# 最后一个部分
current[part] = [{loop_var: f"Item_{j+1}"} for j in range(3)]
else:
# 创建嵌套字典
if part not in current:
current[part] = {}
current = current[part]
else:
# 顶级集合
context[collection] = [{loop_var: f"Item_{j+1}"} for j in range(3)]
return context
def validate_context(self, context):
"""验证上下文是否包含所有需要的变量"""
missing = []
# 检查普通变量
for var in self.variables:
# 提取变量路径
parts = var.split('.')
current = context
valid = True
for part in parts:
if isinstance(current, dict) and part in current:
current = current[part]
else:
valid = False
break
if not valid:
missing.append(var)
# 检查循环集合
for _, collection in self.loops:
parts = collection.split('.')
current = context
valid = True
for part in parts:
if isinstance(current, dict) and part in current:
current = current[part]
else:
valid = False
break
if not valid or not isinstance(current, list):
missing.append(f"list:{collection}")
return missing
def debug_render(self, context, output_path=None):
"""调试渲染模板,并在出错时提供详细信息"""
if output_path is None:
base_name = os.path.splitext(os.path.basename(self.template_path))[0]
output_path = f"debug_{base_name}.docx"
# 验证上下文
missing = self.validate_context(context)
if missing:
print("警告: 上下文缺少以下变量:")
for var in missing:
print(f" - {var}")
# 尝试渲染
try:
self.template.render(context)
self.template.save(output_path)
print(f"模板渲染成功,已保存到: {output_path}")
return True
except Exception as e:
print(f"模板渲染失败: {str(e)}")
# 生成错误报告
error_info = {
"error": str(e),
"template": self.template_path,
"variables": self.variables,
"conditions": self.conditions,
"loops": self.loops,
"context": context
}
error_file = f"error_info_{os.path.basename(self.template_path)}.json"
with open(error_file, 'w', encoding='utf-8') as f:
json.dump(error_info, f, indent=2, ensure_ascii=False)
print(f"错误信息已保存到: {error_file}")
return False
def generate_test_document(self):
"""生成测试文档,使用示例上下文"""
sample_context = self.generate_sample_context()
base_name = os.path.splitext(os.path.basename(self.template_path))[0]
test_output = f"test_{base_name}.docx"
return self.debug_render(sample_context, test_output)
# 使用示例
debugger = TemplateDebugger("complex_report_template.docx")
# 打印模板信息
print(f"模板变量: {debugger.variables}")
print(f"条件语句: {debugger.conditions}")
print(f"循环语句: {debugger.loops}")
# 生成测试文档
debugger.generate_test_document()
# 使用实际数据调试
context = {
"report_title": "季度业务报告",
"customer": {
"name": "ABC公司",
"contact": "张总",
"address": "北京市朝阳区..."
},
"orders": [
{"id": "ORD-001", "product": "产品A", "quantity": 10, "price": 1200},
{"id": "ORD-002", "product": "产品B", "quantity": 5, "price": 2500},
{"id": "ORD-003", "product": "产品C", "quantity": 8, "price": 1800}
]
}
debugger.debug_render(context, "actual_report.docx")
from docxtpl import DocxTemplate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
def generate_data_report(excel_file, template_path, output_path):
"""从Excel数据生成分析报告"""
# 加载数据
df = pd.read_excel(excel_file)
# 创建图表目录
charts_dir = "temp_charts"
os.makedirs(charts_dir, exist_ok=True)
# 加载模板
doc = DocxTemplate(template_path)
# 准备基本上下文
context = {
"report_title": "数据分析报告",
"data_source": excel_file,
"report_date": pd.Timestamp.now().strftime("%Y-%m-%d"),
"record_count": len(df)
}
# 数据摘要统计
numeric_columns = df.select_dtypes(include=[np.number]).columns
summary_stats = []
for col in numeric_columns:
stats = {
"column_name": col,
"mean": f"{df[col].mean():.2f}",
"median": f"{df[col].median():.2f}",
"std": f"{df[col].std():.2f}",
"min": f"{df[col].min():.2f}",
"max": f"{df[col].max():.2f}"
}
summary_stats.append(stats)
context["summary_stats"] = summary_stats
# 生成相关性热图
if len(numeric_columns) > 1:
correlation = df[numeric_columns].corr()
plt.figure(figsize=(10, 8))
plt.matshow(correlation, fignum=1)
plt.title('数据相关性矩阵')
plt.colorbar()
# 添加相关系数标签
for i in range(correlation.shape[0]):
for j in range(correlation.shape[1]):
plt.text(j, i, f"{correlation.iloc[i, j]:.2f}",
ha="center", va="center")
# 设置刻度标签
tick_marks = np.arange(len(correlation.columns))
plt.xticks(tick_marks, correlation.columns, rotation=45)
plt.yticks(tick_marks, correlation.columns)
# 保存图表
corr_plot_path = os.path.join(charts_dir, "correlation.png")
plt.savefig(corr_plot_path, dpi=300, bbox_inches='tight')
plt.close()
# 添加到上下文
context["correlation_chart"] = InlineImage(doc, corr_plot_path, width=Cm(15))
# Top N分析
if len(numeric_columns) > 0:
target_col = numeric_columns[0] # 使用第一个数值列作为目标
# 获取Top 5
top_5 = df.nlargest(5, target_col)
top_5_list = []
for _, row in top_5.iterrows():
item = {}
for col in df.columns:
item[col] = str(row[col])
top_5_list.append(item)
context["top_items"] = top_5_list
context["target_column"] = target_col
# 生成分类分布图(针对分类列)
categorical_columns = df.select_dtypes(include=['object']).columns
if len(categorical_columns) > 0:
cat_col = categorical_columns[0] # 使用第一个分类列
cat_counts = df[cat_col].value_counts()
plt.figure(figsize=(10, 6))
cat_counts.plot(kind='bar')
plt.title(f'{cat_col}分布')
plt.xlabel(cat_col)
plt.ylabel('计数')
plt.xticks(rotation=45)
plt.tight_layout()
# 保存图表
cat_plot_path = os.path.join(charts_dir, "category_dist.png")
plt.savefig(cat_plot_path, dpi=300)
plt.close()
# 添加到上下文
context["category_chart"] = InlineImage(doc, cat_plot_path, width=Cm(15))
context["category_column"] = cat_col
# 渲染文档
doc.render(context)
doc.save(output_path)
# 清理临时文件
for chart_file in [f for f in os.listdir(charts_dir) if f.endswith('.png')]:
try:
os.remove(os.path.join(charts_dir, chart_file))
except:
pass
return output_path
# 使用示例
report_file = generate_data_report(
"sales_data.xlsx",
"data_analysis_template.docx",
"data_analysis_report.docx"
)
print(f"数据分析报告已生成: {report_file}")
from flask import Flask, request, render_template, send_file
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired, FileAllowed
from wtforms import StringField, TextAreaField, SubmitField
from wtforms.validators import DataRequired
from werkzeug.utils import secure_filename
import os
from docxtpl import DocxTemplate
import uuid
from datetime import datetime
app = Flask(__name__)
app.config['SECRET_KEY'] = 'your-secret-key'
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['GENERATED_FOLDER'] = 'generated'
# 确保目录存在
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
os.makedirs(app.config['GENERATED_FOLDER'], exist_ok=True)
# 定义表单
class DocumentForm(FlaskForm):
template = FileField('选择Word模板', validators=[
FileRequired(),
FileAllowed(['docx'], '只允许上传Word文档!')
])
title = StringField('文档标题', validators=[DataRequired()])
content = TextAreaField('文档内容', validators=[DataRequired()])
author = StringField('作者', validators=[DataRequired()])
submit = SubmitField('生成文档')
@app.route('/', methods=['GET', 'POST'])
def index():
form = DocumentForm()
result_file = None
if form.validate_on_submit():
# 保存上传的模板
template_file = form.template.data
template_filename = secure_filename(template_file.filename)
template_path = os.path.join(app.config['UPLOAD_FOLDER'], template_filename)
template_file.save(template_path)
# 准备上下文数据
context = {
'title': form.title.data,
'content': form.content.data,
'author': form.author.data,
'date': datetime.now().strftime('%Y-%m-%d')
}
# 生成文档
doc = DocxTemplate(template_path)
doc.render(context)
# 保存生成的文档
result_filename = f"{uuid.uuid4().hex}.docx"
result_path = os.path.join(app.config['GENERATED_FOLDER'], result_filename)
doc.save(result_path)
# 返回生成的文档路径
result_file = result_filename
return render_template('index.html', form=form, result_file=result_file)
@app.route('/download/' )
def download_file(filename):
"""下载生成的文档"""
return send_file(
os.path.join(app.config['GENERATED_FOLDER'], filename),
as_attachment=True,
download_name=f"generated_document_{datetime.now().strftime('%Y%m%d')}.docx"
)
# HTML模板 (templates/index.html)
"""
Word文档生成器
Word文档生成器
{% if result_file %}
文档生成成功! 点击下载
{% endif %}
"""
if __name__ == '__main__':
app.run(debug=True)
import json
import requests
from docxtpl import DocxTemplate
import os
import base64
class DocumentIntegrationService:
"""文档集成服务 - 连接企业系统"""
def __init__(self, api_base_url, api_key=None, templates_dir="templates"):
self.api_base_url = api_base_url
self.api_key = api_key
self.templates_dir = templates_dir
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}" if api_key else None
}
def get_template(self, template_id):
"""从CMS系统获取模板"""
try:
response = requests.get(
f"{self.api_base_url}/templates/{template_id}",
headers=self.headers
)
response.raise_for_status()
template_data = response.json()
# 保存模板到本地
template_content = base64.b64decode(template_data['content'])
local_path = os.path.join(self.templates_dir, f"{template_id}.docx")
os.makedirs(self.templates_dir, exist_ok=True)
with open(local_path, 'wb') as f:
f.write(template_content)
return local_path
except Exception as e:
print(f"获取模板失败: {str(e)}")
return None
def get_data(self, data_id):
"""从CMS或OA系统获取数据"""
try:
response = requests.get(
f"{self.api_base_url}/data/{data_id}",
headers=self.headers
)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"获取数据失败: {str(e)}")
return None
def upload_document(self, file_path, metadata=None):
"""上传生成的文档到CMS系统"""
try:
# 读取文件内容并进行Base64编码
with open(file_path, 'rb') as f:
file_content = base64.b64encode(f.read()).decode('utf-8')
# 准备上传数据
upload_data = {
"filename": os.path.basename(file_path),
"content": file_content,
"metadata": metadata or {}
}
# 发送请求
response = requests.post(
f"{self.api_base_url}/documents",
headers=self.headers,
json=upload_data
)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"上传文档失败: {str(e)}")
return None
def generate_and_upload(self, template_id, data_id, output_filename=None):
"""生成文档并上传到CMS系统"""
# 获取模板
template_path = self.get_template(template_id)
if not template_path:
return {"success": False, "error": "获取模板失败"}
# 获取数据
context = self.get_data(data_id)
if not context:
return {"success": False, "error": "获取数据失败"}
# 生成文档
try:
doc = DocxTemplate(template_path)
doc.render(context)
# 确定输出文件名
if not output_filename:
output_filename = f"generated_{template_id}_{data_id}.docx"
output_path = os.path.join("generated", output_filename)
os.makedirs("generated", exist_ok=True)
doc.save(output_path)
# 上传文档
metadata = {
"template_id": template_id,
"data_id": data_id,
"generated_at": datetime.now().isoformat()
}
upload_result = self.upload_document(output_path, metadata)
if upload_result:
return {
"success": True,
"document_id": upload_result.get("id"),
"local_path": output_path
}
else:
return {
"success": False,
"error": "上传文档失败",
"local_path": output_path
}
except Exception as e:
return {"success": False, "error": str(e)}
# 使用示例
integration = DocumentIntegrationService(
api_base_url="https://cms.example.com/api/v1",
api_key="your-api-key-here"
)
result = integration.generate_and_upload(
template_id="contract-template-001",
data_id="customer-data-123",
output_filename="Customer_Contract_2023.docx"
)
if result["success"]:
print(f"文档生成并上传成功,文档ID: {result['document_id']}")
print(f"本地副本保存在: {result['local_path']}")
else:
print(f"文档生成失败: {result['error']}")
from docxtpl import DocxTemplate
def resolve_common_issues():
"""演示常见问题及解决方案"""
# 问题1: 变量名包含特殊字符
# 解决方案: 使用下划线代替特殊字符
# 错误: {{ user-name }} # 连字符会被解析为减法运算
# 正确: {{ user_name }}
# 问题2: 变量不存在导致渲染失败
doc = DocxTemplate("template_with_missing_vars.docx")
# 解决方案: 预先检查变量
def get_template_variables(template_docx):
"""提取模板中的所有变量"""
import re
text_content = ""
# 提取段落文本
for paragraph in template_docx.docx.paragraphs:
text_content += paragraph.text + "\n"
# 提取表格文本
for table in template_docx.docx.tables:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text_content += paragraph.text + "\n"
# 查找变量模式
var_pattern = r'\{\{\s*([a-zA-Z0-9_\.]+)\s*\}\}'
return list(set(re.findall(var_pattern, text_content)))
# 获取模板变量
variables = get_template_variables(doc)
print(f"模板变量: {variables}")
# 确保上下文包含所有变量
context = {var: f"Value for {var}" for var in variables}
# 问题3: 嵌套属性访问错误
# 解决方案: 使用字典嵌套
if "user.name" in variables:
# 将点操作符变量转换为嵌套字典
context = {}
for var in variables:
if "." in var:
parts = var.split(".")
current = context
for i, part in enumerate(parts):
if i == len(parts) - 1:
current[part] = f"Value for {var}"
else:
if part not in current:
current[part] = {}
current = current[part]
else:
context[var] = f"Value for {var}"
# 问题4: 格式丢失问题
# 解决方案: 使用RichText替代纯文本
from docxtpl import RichText
if "formatted_text" in variables:
rt = RichText()
rt.add("这是", style="Normal")
rt.add("红色", color="FF0000")
rt.add("和", style="Normal")
rt.add("粗体", bold=True)
rt.add("文本", style="Normal")
context["formatted_text"] = rt
# 渲染并保存
doc.render(context)
doc.save("resolved_issues.docx")
# 执行示例
resolve_common_issues()
from docxtpl import DocxTemplate, RichText
from docx.shared import Pt, RGBColor
import re
def format_preservation_techniques():
"""演示格式保留技巧"""
doc = DocxTemplate("format_template.docx")
# 技巧1: 使用RichText保留格式
heading_rt = RichText("重要通知", color="FF0000", bold=True, size=Pt(16))
# 技巧2: 保留数值格式
# 在模板中使用 {{ value|format(",.2f") }}
# 技巧3: 保留表格格式
# 使用合适的表格模板,只替换内容不改变格式
# 技巧4: 保留超链接
link_rt = RichText()
link_rt.add("点击访问", url="https://www.example.com")
# 技巧5: 保留项目符号和编号
# 在模板中设计好列表样式,然后只替换内容
# 技巧6: 处理HTML内容
def convert_html_to_richtext(html_text):
"""将简单HTML转换为RichText"""
rt = RichText()
# 替换HTML标签为RichText格式
# 简单处理常见标签
# 处理粗体
bold_texts = re.findall(r'(.*?)', html_text)
for bold_text in bold_texts:
html_text = html_text.replace(f'{bold_text}', f'__BOLD__{bold_text}__BOLD__')
# 处理斜体
italic_texts = re.findall(r'(.*?)', html_text)
for italic_text in italic_texts:
html_text = html_text.replace(f'{italic_text}', f'__ITALIC__{italic_text}__ITALIC__')
# 处理颜色
color_texts = re.findall(r'(.*?)', html_text)
for color, text in color_texts:
html_text = html_text.replace(f'{color}">{text}',
f'__COLOR_{color}_{text}__COLOR__')
# 分割处理后的文本
parts = []
current_part = ""
i = 0
while i < len(html_text):
if html_text[i:i+8] == '__BOLD__':
if current_part:
parts.append(("normal", current_part))
current_part = ""
i += 8
bold_text = ""
while i + 8 <= len(html_text) and html_text[i:i+8] != '__BOLD__':
bold_text += html_text[i]
i += 1
parts.append(("bold", bold_text))
i += 8
elif html_text[i:i+10] == '__ITALIC__':
if current_part:
parts.append(("normal", current_part))
current_part = ""
i += 10
italic_text = ""
while i + 10 <= len(html_text) and html_text[i:i+10] != '__ITALIC__':
italic_text += html_text[i]
i += 1
parts.append(("italic", italic_text))
i += 10
elif html_text[i:i+9] == '__COLOR__':
if current_part:
parts.append(("normal", current_part))
current_part = ""
# 提取颜色和文本
color_start = html_text.rfind('_', 0, i) + 1
color_code = html_text[color_start:i]
i += 9
color_text = ""
while i + 9 <= len(html_text) and html_text[i:i+9] != '__COLOR__':
color_text += html_text[i]
i += 1
parts.append(("color", color_text, color_code))
i += 9
else:
current_part += html_text[i]
i += 1
if current_part:
parts.append(("normal", current_part))
# 构建RichText
for part in parts:
if part[0] == "normal":
rt.add(part[1])
elif part[0] == "bold":
rt.add(part[1], bold=True)
elif part[0] == "italic":
rt.add(part[1], italic=True)
elif part[0] == "color":
# 将HTML颜色转换为RGB
color = part[2].lstrip('#')
rgb = tuple(int(color[i:i+2], 16) for i in (0, 2, 4))
rt.add(part[1], color=RGBColor(*rgb))
return rt
# 示例HTML内容
html_content = """
这是一个粗体文本和斜体文本,
还有红色文本和
蓝色文本示例。
"""
html_rt = convert_html_to_richtext(html_content)
# 准备上下文
context = {
"heading": heading_rt,
"value": 1234567.89, # 在模板中使用{{ value|format(",.2f") }}
"html_content": html_rt,
"link": link_rt,
"list_items": ["项目1", "项目2", "项目3"] # 配合模板中的循环使用
}
# 渲染文档
doc.render(context)
doc.save("format_preserved.docx")
# 执行示例
format_preservation_techniques()
from docxtpl import DocxTemplate
import time
import psutil
import os
import gc
def performance_optimization_demo():
"""演示性能与内存优化技巧"""
# 创建一个大型上下文数据
# 模拟复杂的文档生成场景
large_context = {
"title": "性能优化测试文档",
"items": [{"id": i, "name": f"项目 {i}", "value": i * 100} for i in range(1, 1001)]
}
# 计算内存使用情况
def get_memory_usage():
"""获取当前进程的内存使用情况(MB)"""
process = psutil.Process(os.getpid())
return process.memory_info().rss / 1024 / 1024
# 技巧1: 渲染前清理内存
print(f"初始内存使用: {get_memory_usage():.2f} MB")
gc.collect()
print(f"垃圾回收后内存使用: {get_memory_usage():.2f} MB")
# 技巧2: 批量处理时重用模板
print("\n技巧2: 模板重用性能比较")
# 不重用模板的情况
start_time = time.time()
memory_before = get_memory_usage()
for i in range(5):
doc = DocxTemplate("large_template.docx")
doc.render(large_context)
doc.save(f"output_no_reuse_{i}.docx")
memory_after = get_memory_usage()
elapsed_time = time.time() - start_time
print(f"不重用模板 - 时间: {elapsed_time:.2f}秒, 内存增加: {memory_after - memory_before:.2f} MB")
# 重用模板的情况
gc.collect() # 清理前面的对象
start_time = time.time()
memory_before = get_memory_usage()
doc = DocxTemplate("large_template.docx")
for i in range(5):
doc.render(large_context.copy()) # 使用副本避免上下文污染
doc.save(f"output_reuse_{i}.docx")
# 重要: 重新加载模板,避免累积渲染
if i < 4: # 最后一次不需要重载
doc = DocxTemplate("large_template.docx")
memory_after = get_memory_usage()
elapsed_time = time.time() - start_time
print(f"重用模板 - 时间: {elapsed_time:.2f}秒, 内存增加: {memory_after - memory_before:.2f} MB")
# 技巧3: 流式处理大数据集
print("\n技巧3: 流式处理大数据集")
# 模拟大数据集
def get_items_stream(total=1000, batch_size=100):
"""流式生成数据,避免一次加载所有数据到内存"""
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
yield [{"id": i, "name": f"项目 {i}", "value": i * 100} for i in range(start, end)]
start_time = time.time()
memory_before = get_memory_usage()
# 流式处理大数据集
doc = DocxTemplate("list_template.docx")
# 首先渲染不变的部分
base_context = {"title": "流式处理示例"}
doc.render(base_context)
# 保存渲染后的模板副本
doc.save("temp_base.docx")
# 然后分批处理项目
batch_num = 1
for batch in get_items_stream(total=5000, batch_size=500):
print(f"处理批次 {batch_num}, 项目数: {len(batch)}")
# 使用渲染好的基础模板
batch_doc = DocxTemplate("temp_base.docx")
# 只渲染当前批次的数据
batch_context = {"items": batch}
batch_doc.render(batch_context)
# 保存当前批次
batch_doc.save(f"batch_output_{batch_num}.docx")
batch_num += 1
# 确保清理内存
del batch_doc
gc.collect()
# 清理临时文件
if os.path.exists("temp_base.docx"):
os.remove("temp_base.docx")
memory_after = get_memory_usage()
elapsed_time = time.time() - start_time
print(f"流式处理 - 时间: {elapsed_time:.2f}秒, 内存增加: {memory_after - memory_before:.2f} MB")
# 技巧4: 使用适当的图片压缩
print("\n技巧4: 图片压缩优化")
from PIL import Image
from docxtpl import InlineImage
from docx.shared import Mm
# 优化图片
def optimize_image(image_path, max_size=(800, 600), quality=85):
"""优化图片大小和质量"""
img = Image.open(image_path)
# 调整大小
img.thumbnail(max_size, Image.LANCZOS)
# 保存为优化版本
filename, ext = os.path.splitext(image_path)
optimized_path = f"{filename}_optimized{ext}"
# 保存时压缩
img.save(optimized_path, quality=quality, optimize=True)
return optimized_path
# 比较原始图片和优化图片的性能
start_time = time.time()
memory_before = get_memory_usage()
# 原始图片
doc1 = DocxTemplate("image_template.docx")
context1 = {
"title": "原始图片文档",
"image": InlineImage(doc1, "large_image.jpg", width=Mm(150))
}
doc1.render(context1)
doc1.save("original_image_doc.docx")
original_size = os.path.getsize("original_image_doc.docx") / 1024 # KB
print(f"原始图片文档大小: {original_size:.2f} KB")
# 优化图片
optimized_image = optimize_image("large_image.jpg")
doc2 = DocxTemplate("image_template.docx")
context2 = {
"title": "优化图片文档",
"image": InlineImage(doc2, optimized_image, width=Mm(150))
}
doc2.render(context2)
doc2.save("optimized_image_doc.docx")
optimized_size = os.path.getsize("optimized_image_doc.docx") / 1024 # KB
memory_after = get_memory_usage()
elapsed_time = time.time() - start_time
print(f"优化图片文档大小: {optimized_size:.2f} KB")
print(f"大小减少: {original_size - optimized_size:.2f} KB ({(1 - optimized_size/original_size) * 100:.1f}%)")
print(f"处理时间: {elapsed_time:.2f}秒, 内存增加: {memory_after - memory_before:.2f} MB")
# 清理临时文件
if os.path.exists(optimized_image):
os.remove(optimized_image)
# 技巧5: 并行处理多文档
print("\n技巧5: 并行处理建议")
print("对于大量文档生成任务,建议使用多进程并行处理:")
print("1. 使用multiprocessing模块创建进程池")
print("2. 每个进程独立加载模板和生成文档")
print("3. 避免进程间共享大量数据")
print("4. 限制并行进程数,通常不超过CPU核心数")
# 执行示例
performance_optimization_demo()
python-docx-template
是一个功能强大的库,通过结合Word文档的格式控制优势与Python的编程能力,大幅简化了文档自动化生成流程。通过本文介绍的技术,您可以:
python-docx-template
特别适合以下场景:
在数字化转型的时代,文档自动化能力已成为组织效率提升的关键要素。通过掌握python-docx-template
,您将拥有一个强大工具,能够显著减少手动文档处理时间,提高一致性,并实现更高水平的个性化。无论是作为个人开发者还是企业解决方案提供者,这一技能都将为您的工作流程带来革命性的提升。