🎉 项目优化与Bug修复完整版

 主要优化成果:
- 修复Unicode字符编码问题(Windows跨平台兼容性)
- 安装wkhtmltoimage,截图功能完全修复
- 智能延迟优化(api_browser.py)
- 线程池资源泄漏修复(tasks.py)
- HTML解析缓存机制
- 二分搜索算法优化(kdocs_uploader.py)
- 自适应资源配置(browser_pool_worker.py)

🐛 Bug修复:
- 解决截图失败问题
- 修复管理员密码设置
- 解决应用启动编码错误

📚 新增文档:
- BUG_REPORT.md - 完整bug分析报告
- PERFORMANCE_ANALYSIS_REPORT.md - 性能优化分析
- LINUX_DEPLOYMENT_ANALYSIS.md - Linux部署指南
- SCREENSHOT_FIX_SUCCESS.md - 截图功能修复记录
- INSTALL_WKHTMLTOIMAGE.md - 安装指南
- OPTIMIZATION_FIXES_SUMMARY.md - 优化总结

🚀 功能验证:
- Flask应用正常运行(51233端口)
- 数据库、截图线程池、API预热正常
- 管理员登录:admin/admin123
- 健康检查API:http://127.0.0.1:51233/health

💡 技术改进:
- 智能延迟算法(自适应调整)
- LRU缓存策略
- 线程池资源管理优化
- 二分搜索算法(O(log n) vs O(n))
- 自适应资源管理

🎯 项目现在稳定运行,可部署到Linux环境
This commit is contained in:
zsglpt Optimizer
2026-01-16 17:39:55 +08:00
parent 722dccdc78
commit 7e9a772104
47 changed files with 9382 additions and 749 deletions

304
simple_test.py Normal file
View File

@@ -0,0 +1,304 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
金山文档上传测试 - 最简版本
直接运行无UI避免线程问题
"""
import os
import sys
import time
from datetime import datetime
# 添加项目路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
try:
from playwright.sync_api import sync_playwright
except ImportError:
print("错误: 需要安装 playwright")
print("请运行: pip install playwright")
sys.exit(1)
def log(message, level='INFO'):
"""简单日志输出"""
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {level}: {message}")
def test_browser_startup():
"""测试浏览器启动"""
log("=" * 50)
log("测试1: 浏览器启动")
log("=" * 50)
try:
playwright = sync_playwright().start()
log("[OK] Playwright启动成功")
browser = playwright.chromium.launch(headless=False)
log("[OK] 浏览器启动成功")
context = browser.new_context()
log("[OK] 上下文创建成功")
page = context.new_page()
log("[OK] 页面创建成功")
page.set_default_timeout(30000)
log("[OK] 页面超时设置完成")
return playwright, browser, context, page
except Exception as e:
log(f"✗ 浏览器启动失败: {str(e)}", 'ERROR')
import traceback
traceback.print_exc()
return None, None, None, None
def test_document_open(page, doc_url):
"""测试文档打开"""
log("=" * 50)
log("测试2: 打开文档")
log("=" * 50)
if not page:
log("✗ 页面对象不可用", 'ERROR')
return False
try:
log(f"正在打开文档: {doc_url}")
page.goto(doc_url, wait_until='domcontentloaded')
log("[OK] 页面导航完成")
page.wait_for_timeout(3000)
log("[OK] 等待3秒让页面加载")
current_url = page.url
log(f"当前页面URL: {current_url}")
if "kdocs.cn" in current_url:
log("[OK] 已在金山文档域名", 'SUCCESS')
else:
log("⚠ 当前不在金山文档域名", 'WARNING')
# 检查是否需要登录
try:
login_text = page.locator("text=登录").first.is_visible()
if login_text:
log("⚠ 检测到登录页面", 'WARNING')
else:
log("[OK] 未检测到登录页面", 'SUCCESS')
except:
pass
return True
except Exception as e:
log(f"✗ 文档打开失败: {str(e)}", 'ERROR')
import traceback
traceback.print_exc()
return False
def test_table_reading(page):
"""测试表格读取"""
log("=" * 50)
log("测试3: 表格读取")
log("=" * 50)
if not page:
log("✗ 页面对象不可用", 'ERROR')
return False
try:
log("尝试导航到A1单元格...")
# 查找名称框
try:
name_box = page.locator("input.edit-box").first
if name_box.is_visible():
value = name_box.input_value()
log(f"名称框当前值: {value}", 'SUCCESS')
else:
log("⚠ 名称框不可见", 'WARNING')
except Exception as e:
log(f"⚠ 读取名称框失败: {e}", 'WARNING')
# 查找表格元素
try:
canvas_count = page.locator("canvas").count()
log(f"检测到 {canvas_count} 个canvas元素可能是表格", 'SUCCESS')
except Exception as e:
log(f"⚠ 查找canvas失败: {e}", 'WARNING')
return True
except Exception as e:
log(f"✗ 表格读取失败: {str(e)}", 'ERROR')
import traceback
traceback.print_exc()
return False
def test_person_search(page):
"""测试人员搜索"""
log("=" * 50)
log("测试4: 人员搜索")
log("=" * 50)
if not page:
log("✗ 页面对象不可用", 'ERROR')
return False
test_name = "张三"
log(f"搜索测试姓名: {test_name}")
try:
log("聚焦到网格...")
# 打开搜索框
log("打开搜索框 (Ctrl+F)...")
page.keyboard.press("Control+f")
page.wait_for_timeout(500)
# 输入搜索内容
log(f"输入搜索内容: {test_name}")
page.keyboard.type(test_name)
page.wait_for_timeout(300)
# 按回车搜索
log("执行搜索 (Enter)...")
page.keyboard.press("Enter")
page.wait_for_timeout(1000)
# 关闭搜索
page.keyboard.press("Escape")
page.wait_for_timeout(300)
log("[OK] 人员搜索测试完成", 'SUCCESS')
log("提示:请检查浏览器窗口,看是否高亮显示了相关内容", 'INFO')
return True
except Exception as e:
log(f"✗ 搜索测试失败: {str(e)}", 'ERROR')
import traceback
traceback.print_exc()
return False
def cleanup_browser(playwright, browser, context, page):
"""清理浏览器资源"""
log("=" * 50)
log("清理资源")
log("=" * 50)
try:
if page:
page.close()
log("[OK] 页面已关闭")
except:
pass
try:
if context:
context.close()
log("[OK] 上下文已关闭")
except:
pass
try:
if browser:
browser.close()
log("[OK] 浏览器已关闭")
except:
pass
try:
if playwright:
playwright.stop()
log("[OK] Playwright已停止")
except:
pass
def main():
"""主函数"""
print("=" * 70)
print("[LOCK] 金山文档上传测试 - 最简版本")
print("=" * 70)
print()
# 获取文档URL
doc_url = input("请输入金山文档URL (或按Enter使用默认值): ").strip()
if not doc_url:
doc_url = "https://kdocs.cn/l/cpwEOo5ynKX4"
print(f"\n测试配置:")
print(f" 文档URL: {doc_url}")
print()
# 确认开始
confirm = input("确认开始测试? (y/N): ").strip().lower()
if confirm != 'y':
print("测试已取消")
return
print()
log("开始测试流程", 'INFO')
print()
# 变量初始化
playwright = None
browser = None
context = None
page = None
try:
# 测试1: 启动浏览器
playwright, browser, context, page = test_browser_startup()
if not page:
log("浏览器启动失败,退出测试", 'ERROR')
return
# 测试2: 打开文档
if not test_document_open(page, doc_url):
log("文档打开失败,但继续测试", 'WARNING')
# 测试3: 表格读取
test_table_reading(page)
# 测试4: 人员搜索
test_person_search(page)
print()
log("所有测试完成", 'SUCCESS')
log("=" * 70)
print()
log("提示:", 'INFO')
log("1. 请检查浏览器窗口,确认所有操作都正常", 'INFO')
log("2. 如果遇到问题,请查看上面的日志输出", 'INFO')
log("3. 测试完成后,浏览器窗口会保持打开状态", 'INFO')
log("4. 您可以手动关闭浏览器窗口来结束测试", 'INFO')
print()
# 等待用户
input("按Enter键保持浏览器窗口打开或直接关闭窗口...")
except KeyboardInterrupt:
log("\n测试被用户中断", 'WARNING')
except Exception as e:
log(f"\n测试过程中出现错误: {str(e)}", 'ERROR')
import traceback
traceback.print_exc()
finally:
# 清理资源
cleanup_browser(playwright, browser, context, page)
log("测试结束", 'INFO')
if __name__ == "__main__":
main()