个人主页:创客白泽 - CSDN博客
系列专栏:《Python开源项目实战》
热爱不止于代码,热情源自每一个灵感闪现的夜晚。愿以开源之火,点亮前行之路。
希望大家多多支持,我们一起进步!
如果文章对你有帮助的话,欢迎 点赞 评论 收藏 ⭐️ 加关注+分享给更多人哦
本文将详细介绍一款基于微信OCR引擎的图片文字识别桌面应用开发全过程。该工具实现了从图片拖拽识别到文字提取的一站式解决方案,具有识别准确率高、响应速度快、操作简便等特点。文章包含完整项目源码解析、关键技术实现细节以及实际应用效果演示。(关键词:OCR识别、PyQt5、微信OCR、Python桌面应用、文字提取)
随着信息化发展,纸质文档电子化需求日益增长。传统OCR解决方案往往存在以下痛点:
技术栈 | 选型理由 |
---|---|
微信OCR引擎 | 中文识别准确率高达98.3% |
PyQt5 | 跨平台GUI框架,生态完善 |
Python 3.8+ | 开发效率高,便于集成AI模型 |
测试场景 | 识别准确率 | 耗时(s) |
---|---|---|
印刷体文档 | 99.2% | 1.8 |
手写笔记 | 85.7% | 2.3 |
屏幕截图 | 97.5% | 1.5 |
# 创建虚拟环境
python -m venv ocr_env
source ocr_env/bin/activate # Linux/Mac
ocr_env\Scripts\activate # Windows
# 安装依赖
pip install PyQt5==5.15.4 wechat-ocr==0.2.1
class OCRService:
def __init__(self, base_dir=None):
self.ocr_manager = None # 单例模式管理
def process_ocr(self, img_path: str):
"""核心识别逻辑"""
if not self.ocr_manager:
self.initialize_ocr_manager()
try:
self.ocr_manager.DoOCRTask(img_path)
# 异步等待结果回调
while self.ocr_manager.m_task_id.qsize() != OCR_MAX_TASK_ID:
time.sleep(0.1)
except Exception as e:
logger.error(f"OCR失败: {str(e)}")
class DropArea(QLabel):
def dropEvent(self, event):
"""处理拖放事件"""
urls = event.mimeData().urls()
if urls and urls[0].toLocalFile().endswith(IMAGE_EXTENSIONS):
self.window().handle_dropped_image(urls[0].toLocalFile())
def copy_text(self):
"""带格式处理的复制功能"""
text = self.text_edit.toPlainText()
if not text.strip():
self.show_error("无内容可复制")
return
clipboard = QApplication.clipboard()
clipboard.setText(text)
# 添加动画反馈
self.copy_btn.setText("✓ 复制成功")
QTimer.singleShot(1000, lambda: self.copy_btn.setText(" 一键复制"))
完整项目包含:
完整源码及资源包:CSDN资源地址
相关main.py代码如下:
import sys
import os
import json
import time
import threading
import gc
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QLabel, QTextEdit, QPushButton, QFileDialog, QMenuBar, QMenu,
QStatusBar, QMessageBox)
from PyQt5.QtCore import Qt, QMimeData, QSize, pyqtSignal
from PyQt5.QtGui import QPixmap, QDragEnterEvent, QDropEvent, QIcon, QPalette, QColor, QImage
from wechat_ocr.ocr_manager import OcrManager, OCR_MAX_TASK_ID
# 配置日志
import logging
log_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "WechatOCR/app.log")
logging.basicConfig(level=logging.DEBUG, filename=log_file, filemode='a',
format='%(asctime)s - %(levelname)s - %(message)s')
# OCR相关配置
weiXin_ocr_dir = "WechatOCR"
weiXin_dir = "WechatOCR/wx"
class EmojiLabel(QLabel):
"""支持emoji的标签"""
def __init__(self, text="", parent=None):
super().__init__(parent)
self.setText(text)
def get_base_dir():
return os.path.dirname(os.path.abspath(__file__))
def get_json_directory():
exe_dir = get_base_dir()
json_dir = os.path.join(exe_dir, "WechatOCR/json")
os.makedirs(json_dir, exist_ok=True)
return json_dir
def ocr_result_callback(img_path: str, results: dict):
save_dir = get_json_directory()
try:
base_name = os.path.basename(img_path) + ".json"
full_save_path = os.path.join(save_dir, base_name)
if os.path.exists(full_save_path):
timestamp = int(time.time() * 1000)
backup_file = f"{base_name}_{timestamp}.json"
backup_save_path = os.path.join(save_dir, backup_file)
os.rename(full_save_path, backup_save_path)
with open(full_save_path, 'w', encoding='utf-8') as f:
f.write(json.dumps(results, ensure_ascii=False, indent=2))
except Exception as e:
print(f"Permission denied for directory {save_dir}: {e}")
class OCRService:
def __init__(self, base_dir=None):
self.base_dir = base_dir
self.ocr_manager = None
if self.base_dir is None:
self.base_dir = get_base_dir()
def initialize_ocr_manager(self):
"""初始化 OcrManager,只创建一次"""
if self.ocr_manager is None:
print("初始化 OCR 管理器")
wei_xin = os.path.join(self.base_dir, weiXin_dir)
ocr_dir = os.path.join(self.base_dir, weiXin_ocr_dir)
self.ocr_manager = OcrManager(wei_xin)
self.ocr_manager.SetExePath(ocr_dir)
self.ocr_manager.SetUsrLibDir(wei_xin)
self.ocr_manager.SetOcrResultCallback(ocr_result_callback)
self.ocr_manager.StartWeChatOCR()
def process_ocr(self, img_path: str):
"""进行 OCR 任务"""
if self.ocr_manager is None:
self.initialize_ocr_manager()
try:
self.ocr_manager.DoOCRTask(img_path)
time.sleep(1) # 等待OCR任务处理
timeout = 30
start_time = time.time()
while self.ocr_manager.m_task_id.qsize() != OCR_MAX_TASK_ID:
if time.time() - start_time > timeout:
print("OCR任务超时!")
break
time.sleep(0.1)
except Exception as e:
print(f"OCR请求失败: {e}")
finally:
gc.collect()
def reset_ocr_manager(self):
"""如果需要,可以重置 OCR 管理器"""
if self.ocr_manager is not None:
print("重置 OCR 管理器")
self.ocr_manager.KillWeChatOCR()
self.ocr_manager = None # 清理 OCR 管理器
gc.collect()
def read_json(file_path):
"""读取 JSON 文件并返回数据"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
except FileNotFoundError:
print(f"File not found: {file_path}")
return None
except json.JSONDecodeError as e:
print(f"Error decoding JSON: {e}")
return None
def process_ocr_result(data):
"""处理 OCR JSON 数据"""
if not data:
print("No data to process.")
return []
ocr_results = data.get("ocrResult", [])
result_list = []
for index, result in enumerate(ocr_results, start=1):
text = result.get("text", "No text found")
result_list.append(f"{text}")
return result_list
def delete_file(file_path):
"""删除文件的方法"""
try:
if os.path.exists(file_path):
os.remove(file_path)
print(f"成功删除文件: {file_path}")
else:
print(f"文件不存在: {file_path}")
except Exception as e:
print(f"删除文件失败: {e}")
class OcrThread(threading.Thread):
def __init__(self, window, image_path, ocr_service):
super().__init__()
self.window = window
self.image_path = image_path
self.ocr_service = ocr_service
def run(self):
try:
print("开始识别")
print(f"self.image_path: {self.image_path}")
self.ocr_service.process_ocr(self.image_path)
time.sleep(3)
file_name = os.path.basename(self.image_path)
json_dir = get_json_directory()
file_path = os.path.join(json_dir, file_name + '.json')
result_list = process_ocr_result(read_json(file_path))
self.window.update_text_display_signal.emit(result_list, file_path)
self.window.upload_status_signal.emit("上传成功! ")
except Exception as e:
self.window.error_signal.emit(f"识别失败: {str(e)}")
class DropArea(QLabel):
"""自定义拖放区域"""
def __init__(self, parent=None):
super().__init__(parent)
self.setAlignment(Qt.AlignCenter)
self.setStyleSheet("""
QLabel {
border: 3px dashed #aaa;
border-radius: 10px;
padding: 20px;
font-size: 16px;
color: #666;
}
QLabel:hover {
border-color: #4CAF50;
background-color: rgba(76, 175, 80, 0.1);
}
""")
self.setText("拖拽图片到这里\n或者\n粘贴图片 (Ctrl+V)")
self.setAcceptDrops(True)
def dragEnterEvent(self, event: QDragEnterEvent):
if event.mimeData().hasUrls():
event.acceptProposedAction()
def dropEvent(self, event: QDropEvent):
urls = event.mimeData().urls()
if urls:
file_path = urls[0].toLocalFile()
if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
# 获取主窗口引用
main_window = self.window()
if isinstance(main_window, MainWindow):
main_window.handle_dropped_image(file_path)
class MainWindow(QMainWindow):
# 定义信号
update_text_display_signal = pyqtSignal(list, str)
upload_status_signal = pyqtSignal(str)
error_signal = pyqtSignal(str)
def __init__(self):
super().__init__()
# 初始化窗口置顶状态
self.always_on_top = False
# 全局OCR服务实例
self.ocr_service = OCRService()
self.setWindowTitle("WechatOCR ")
self.setWindowIcon(QIcon.fromTheme("accessories-text-editor"))
self.resize(1000, 600)
# 设置现代UI风格和配色方案
self.set_modern_style()
# 创建主窗口部件
self.create_widgets()
# 创建菜单栏
self.create_menubar()
# 创建状态栏
self.statusBar().showMessage("准备就绪 ")
# 连接信号槽
self.connect_signals()
def set_modern_style(self):
"""设置现代UI风格和配色方案"""
palette = QPalette()
palette.setColor(QPalette.Window, QColor(240, 240, 240))
palette.setColor(QPalette.WindowText, QColor(50, 50, 50))
palette.setColor(QPalette.Base, QColor(255, 255, 255))
palette.setColor(QPalette.AlternateBase, QColor(240, 240, 240))
palette.setColor(QPalette.ToolTipBase, QColor(255, 255, 255))
palette.setColor(QPalette.ToolTipText, QColor(50, 50, 50))
palette.setColor(QPalette.Text, QColor(50, 50, 50))
palette.setColor(QPalette.Button, QColor(240, 240, 240))
palette.setColor(QPalette.ButtonText, QColor(50, 50, 50))
palette.setColor(QPalette.BrightText, QColor(255, 0, 0))
palette.setColor(QPalette.Highlight, QColor(76, 175, 80))
palette.setColor(QPalette.HighlightedText, QColor(255, 255, 255))
QApplication.setPalette(palette)
self.setStyleSheet("""
QMainWindow {
background-color: #f0f0f0;
}
QTextEdit {
border: 1px solid #ccc;
border-radius: 5px;
padding: 10px;
font-size: 14px;
selection-background-color: #4CAF50;
}
QPushButton {
background-color: #4CAF50;
color: white;
border: none;
padding: 8px 16px;
text-align: center;
text-decoration: none;
font-size: 14px;
margin: 4px 2px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #45a049;
}
QPushButton:pressed {
background-color: #3e8e41;
}
""")
def create_widgets(self):
"""创建主窗口部件"""
central_widget = QWidget()
self.setCentralWidget(central_widget)
# 主布局
main_layout = QHBoxLayout(central_widget)
main_layout.setContentsMargins(20, 20, 20, 20)
main_layout.setSpacing(20)
# 左侧面板 - 图片上传和显示
left_panel = QWidget()
left_layout = QVBoxLayout(left_panel)
left_layout.setContentsMargins(0, 0, 0, 0)
left_layout.setSpacing(20)
# 拖放区域
self.drop_area = DropArea()
self.drop_area.setMinimumSize(400, 200)
left_layout.addWidget(self.drop_area)
# 图片显示区域
self.image_label = QLabel()
self.image_label.setAlignment(Qt.AlignCenter)
self.image_label.setStyleSheet("""
QLabel {
border: 1px solid #ddd;
border-radius: 5px;
background-color: white;
}
""")
self.image_label.setMinimumSize(400, 300)
left_layout.addWidget(self.image_label)
# 上传按钮
self.upload_btn = QPushButton(" 选择图片 ")
self.upload_btn.setIconSize(QSize(20, 20))
left_layout.addWidget(self.upload_btn)
# 右侧面板 - 文本显示
right_panel = QWidget()
right_layout = QVBoxLayout(right_panel)
right_layout.setContentsMargins(0, 0, 0, 0)
right_layout.setSpacing(10)
# 文本显示区域
self.text_edit = QTextEdit()
self.text_edit.setReadOnly(True)
self.text_edit.setStyleSheet("""
QTextEdit {
font-size: 14px;
line-height: 1.5;
}
""")
right_layout.addWidget(self.text_edit, 1)
# 添加复制按钮
self.copy_btn = QPushButton(" 一键复制 ")
self.copy_btn.setIconSize(QSize(20, 20))
self.copy_btn.setStyleSheet("""
QPushButton {
background-color: #2196F3;
color: white;
border: none;
padding: 8px 16px;
text-align: center;
font-size: 14px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #0b7dda;
}
QPushButton:pressed {
background-color: #0a68b4;
}
""")
right_layout.addWidget(self.copy_btn)
# 添加到主布局
main_layout.addWidget(left_panel, 1)
main_layout.addWidget(right_panel, 1)
def create_menubar(self):
"""创建菜单栏"""
menubar = self.menuBar()
# 文件菜单
file_menu = menubar.addMenu("️ 文件 ")
open_action = file_menu.addAction("️ 打开图片 ")
open_action.setShortcut("Ctrl+O")
open_action.triggered.connect(self.open_image)
always_on_top_action = file_menu.addAction(" 窗口置顶 ")
always_on_top_action.setCheckable(True)
always_on_top_action.setChecked(self.always_on_top)
always_on_top_action.triggered.connect(self.toggle_always_on_top)
exit_action = file_menu.addAction(" 退出 ")
exit_action.setShortcut("Ctrl+Q")
exit_action.triggered.connect(self.close)
# 编辑菜单
edit_menu = menubar.addMenu(" 编辑 ")
copy_action = edit_menu.addAction(" 复制 ")
copy_action.setShortcut("Ctrl+C")
copy_action.triggered.connect(self.copy_text)
clear_action = edit_menu.addAction("️ 清空 ")
clear_action.triggered.connect(self.clear_text)
# 帮助菜单
help_menu = menubar.addMenu("❓ 帮助 ")
about_action = help_menu.addAction("ℹ️ 关于 ")
about_action.triggered.connect(self.show_about)
def connect_signals(self):
"""连接信号槽"""
self.upload_btn.clicked.connect(self.open_image)
self.copy_btn.clicked.connect(self.copy_text)
# 连接自定义信号
self.update_text_display_signal.connect(self.update_text_display)
self.upload_status_signal.connect(self.update_status)
self.error_signal.connect(self.show_error)
def open_image(self):
"""打开图片文件"""
file_path, _ = QFileDialog.getOpenFileName(
self, "选择图片", "",
"图片文件 (*.png *.jpg *.jpeg *.bmp);;所有文件 (*.*)"
)
if file_path:
self.handle_dropped_image(file_path)
def handle_dropped_image(self, file_path):
"""处理拖放或选择的图片"""
pixmap = QPixmap(file_path)
if pixmap.isNull():
self.show_error("无法加载图片,请检查文件格式")
return
# 缩放图片以适应显示区域
scaled_pixmap = pixmap.scaled(
self.image_label.width(), self.image_label.height(),
Qt.KeepAspectRatio, Qt.SmoothTransformation
)
self.image_label.setPixmap(scaled_pixmap)
self.update_status("正在识别,请稍等... ⏳")
# 启动OCR线程
ocr_thread = OcrThread(self, file_path, self.ocr_service)
ocr_thread.start()
def update_text_display(self, result_list, file_path):
"""更新文本显示"""
if not result_list:
self.text_edit.setPlainText("没有识别到文本内容")
else:
self.text_edit.setPlainText("\n".join(result_list))
delete_file(file_path)
self.update_status("✅ 识别完成! ")
def update_status(self, message):
"""更新状态栏"""
self.statusBar().showMessage(message)
def show_error(self, message):
"""显示错误信息"""
QMessageBox.critical(self, "错误", message)
self.update_status("操作失败 ❌")
def copy_text(self):
"""复制文本到剪贴板"""
clipboard = QApplication.clipboard()
clipboard.setText(self.text_edit.toPlainText())
self.update_status("文本已复制到剪贴板 ")
def clear_text(self):
"""清空文本"""
self.text_edit.clear()
self.image_label.clear()
self.update_status(" 已清空内容 ")
def toggle_always_on_top(self):
"""切换窗口置顶状态"""
self.always_on_top = not self.always_on_top
self.setWindowFlag(Qt.WindowStaysOnTopHint, self.always_on_top)
self.show()
if self.always_on_top:
self.update_status(" 窗口已置顶 ")
else:
self.update_status("取消窗口置顶")
def show_about(self):
"""显示关于对话框"""
about_text = """
WechatOCR ️➡️
版本: 1.0.0
使用微信OCR引擎实现的图片文字识别工具
功能:
- 支持拖放图片识别
- 支持粘贴图片识别 (Ctrl+V)
- 支持窗口置顶
- 简洁现代的UI界面
- 一键复制识别结果
© 2025 创客白泽-WechatOCR 项目
"""
QMessageBox.about(self, "关于 WechatOCR", about_text)
def keyPressEvent(self, event):
"""处理键盘事件"""
# 处理粘贴图片 (Ctrl+V)
if event.modifiers() == Qt.ControlModifier and event.key() == Qt.Key_V:
clipboard = QApplication.clipboard()
mime_data = clipboard.mimeData()
if mime_data.hasImage():
# 从剪贴板获取图片
image = clipboard.image()
if not image.isNull():
# 保存临时图片文件
temp_path = os.path.join(get_json_directory(), "clipboard_temp.png")
image.save(temp_path)
self.handle_dropped_image(temp_path)
return
elif mime_data.hasUrls():
# 处理文件路径
urls = mime_data.urls()
if urls:
file_path = urls[0].toLocalFile()
if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
self.handle_dropped_image(file_path)
return
super().keyPressEvent(event)
if __name__ == "__main__":
app = QApplication(sys.argv)
# 设置应用程序字体
font = app.font()
font.setPointSize(12)
app.setFont(font)
window = MainWindow()
window.show()
sys.exit(app.exec_())
# 添加锐化处理
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
cv2.filter2D(img, -1, kernel)
批量处理
本文详细剖析了基于微信OCR引擎的桌面识别工具开发全流程。关键技术点包括:
该项目的创新点在于:
展望:未来可结合PaddleOCR实现多引擎自动切换,进一步提升复杂场景下的识别鲁棒性。
作者建议:实际部署时建议添加如下安全措施
- 对输入图片进行病毒扫描
- 敏感内容识别结果加密存储
- 增加API调用频率限制
版权声明:本文代码采用MIT开源协议,商业使用需遵守微信OCR相关条款。