🎉 项目优化与Bug修复完整版

 主要优化成果:
- 修复Unicode字符编码问题(Windows跨平台兼容性)
- 安装wkhtmltoimage,截图功能完全修复
- 智能延迟优化(api_browser.py)
- 线程池资源泄漏修复(tasks.py)
- HTML解析缓存机制
- 二分搜索算法优化(kdocs_uploader.py)
- 自适应资源配置(browser_pool_worker.py)

🐛 Bug修复:
- 解决截图失败问题
- 修复管理员密码设置
- 解决应用启动编码错误

📚 新增文档:
- BUG_REPORT.md - 完整bug分析报告
- PERFORMANCE_ANALYSIS_REPORT.md - 性能优化分析
- LINUX_DEPLOYMENT_ANALYSIS.md - Linux部署指南
- SCREENSHOT_FIX_SUCCESS.md - 截图功能修复记录
- INSTALL_WKHTMLTOIMAGE.md - 安装指南
- OPTIMIZATION_FIXES_SUMMARY.md - 优化总结

🚀 功能验证:
- Flask应用正常运行(51233端口)
- 数据库、截图线程池、API预热正常
- 管理员登录:admin/admin123
- 健康检查API:http://127.0.0.1:51233/health

💡 技术改进:
- 智能延迟算法(自适应调整)
- LRU缓存策略
- 线程池资源管理优化
- 二分搜索算法(O(log n) vs O(n))
- 自适应资源管理

🎯 项目现在稳定运行,可部署到Linux环境
This commit is contained in:
zsglpt Optimizer
2026-01-16 17:39:55 +08:00
parent 722dccdc78
commit 7e9a772104
47 changed files with 9382 additions and 749 deletions

View File

@@ -98,7 +98,7 @@ class KDocsUploader:
self._emit_account_update(user_id, account)
except Exception:
pass
self._queue.put({"action": "upload", "payload": payload}, timeout=1)
return True
except queue.Full:
@@ -424,10 +424,12 @@ class KDocsUploader:
pages.extend(self._context.pages)
if self._page and self._page not in pages:
pages.insert(0, self._page)
def rank(p) -> int:
url = (getattr(p, "url", "") or "").lower()
keywords = ("login", "account", "passport", "wechat", "qr")
return 0 if any(k in url for k in keywords) else 1
pages.sort(key=rank)
return pages
@@ -920,10 +922,7 @@ class KDocsUploader:
if not settings.get("enabled", False):
return
subject = "金山文档上传失败提醒"
body = (
f"上传失败\n\n人员: {unit}-{name}\n图片: {image_path}\n错误: {error}\n\n"
"请检查登录状态或表格配置。"
)
body = f"上传失败\n\n人员: {unit}-{name}\n图片: {image_path}\n错误: {error}\n\n请检查登录状态或表格配置。"
try:
email_service.send_email_async(
to_email=to_email,
@@ -991,6 +990,7 @@ class KDocsUploader:
def _get_current_cell_address(self) -> str:
"""获取当前选中的单元格地址(如 A1, C66 等)"""
import re
# 等待一小段时间让名称框稳定
time.sleep(0.1)
@@ -1086,7 +1086,7 @@ class KDocsUploader:
try:
el = self._page.query_selector(selector)
if el:
value = el.input_value() if hasattr(el, 'input_value') else el.inner_text()
value = el.input_value() if hasattr(el, "input_value") else el.inner_text()
if value and not value.startswith("=DISPIMG"):
logger.info(f"[KDocs调试] 从编辑栏读取到: '{value[:50]}...' (selector={selector})")
return value.strip()
@@ -1243,7 +1243,9 @@ class KDocsUploader:
# 找到搜索框并输入
try:
search_input = self._page.locator("input[placeholder*='查找'], input[placeholder*='搜索'], input[type='text']").first
search_input = self._page.locator(
"input[placeholder*='查找'], input[placeholder*='搜索'], input[type='text']"
).first
search_input.fill(unit)
time.sleep(0.2)
self._page.keyboard.press("Enter")
@@ -1263,7 +1265,7 @@ class KDocsUploader:
# 4. 检查是否在同一行(允许在目标行或之后的几行内,因为搜索可能从当前位置向下)
if found_row == row_num:
logger.info(f"[KDocs调试] 验证成功! 县区'{unit}'在第{row_num}")
logger.info(f"[KDocs调试] [OK] 验证成功! 县区'{unit}'在第{row_num}")
return True
else:
logger.info(f"[KDocs调试] 验证失败: 期望行{row_num}, 实际找到行{found_row}")
@@ -1279,11 +1281,16 @@ class KDocsUploader:
try:
# 查找可能的编辑栏元素
selectors_to_check = [
"input", "textarea",
"[class*='formula']", "[class*='Formula']",
"[class*='editor']", "[class*='Editor']",
"[class*='cell']", "[class*='Cell']",
"[class*='input']", "[class*='Input']",
"input",
"textarea",
"[class*='formula']",
"[class*='Formula']",
"[class*='editor']",
"[class*='Editor']",
"[class*='cell']",
"[class*='Cell']",
"[class*='input']",
"[class*='Input']",
]
for selector in selectors_to_check:
try:
@@ -1300,7 +1307,9 @@ class KDocsUploader:
except:
pass
if value:
logger.info(f"[KDocs调试] 元素 {selector}[{i}] class='{class_name[:50]}' value='{value[:30]}'")
logger.info(
f"[KDocs调试] 元素 {selector}[{i}] class='{class_name[:50]}' value='{value[:30]}'"
)
except:
pass
except:
@@ -1313,7 +1322,7 @@ class KDocsUploader:
"""调试: 输出表格结构"""
self._debug_dump_page_elements() # 先分析页面元素
logger.info("[KDocs调试] ========== 表格结构分析 ==========")
cols = ['A', 'B', 'C', 'D', 'E']
cols = ["A", "B", "C", "D", "E"]
for row in [1, 2, 3, target_row]:
row_data = []
for col in cols:
@@ -1325,8 +1334,9 @@ class KDocsUploader:
logger.info(f"[KDocs调试] 第{row}行: {' | '.join(row_data)}")
logger.info("[KDocs调试] ====================================")
def _find_person_with_unit(self, unit: str, name: str, unit_col: str, max_attempts: int = 50,
row_start: int = 0, row_end: int = 0) -> int:
def _find_person_with_unit(
self, unit: str, name: str, unit_col: str, max_attempts: int = 50, row_start: int = 0, row_end: int = 0
) -> int:
"""
查找人员所在行号。
策略只搜索姓名找到姓名列C列的匹配项
@@ -1339,19 +1349,124 @@ class KDocsUploader:
if row_start > 0 or row_end > 0:
logger.info(f"[KDocs调试] 有效行范围: {row_start}-{row_end}")
# 添加人员位置缓存
cache_key = f"{name}_{unit}_{unit_col}"
if hasattr(self, "_person_cache") and cache_key in self._person_cache:
cached_row = self._person_cache[cache_key]
logger.info(f"[KDocs调试] 使用缓存找到人员: name='{name}', row={cached_row}")
return cached_row
# 只搜索姓名 - 这是目前唯一可靠的方式
logger.info(f"[KDocs调试] 搜索姓名: '{name}'")
row_num = self._search_and_get_row(name, max_attempts=max_attempts, expected_col='C',
row_start=row_start, row_end=row_end)
# 首先尝试二分搜索优化
binary_result = self._binary_search_person(name, unit_col, row_start, row_end)
if binary_result > 0:
logger.info(f"[KDocs调试] [OK] 二分搜索成功! 找到行号={binary_result}")
# 缓存结果
if not hasattr(self, "_person_cache"):
self._person_cache = {}
self._person_cache[cache_key] = binary_result
return binary_result
# 如果二分搜索失败,回退到线性搜索
row_num = self._search_and_get_row(
name, max_attempts=max_attempts, expected_col="C", row_start=row_start, row_end=row_end
)
if row_num > 0:
logger.info(f"[KDocs调试] ✓ 姓名搜索成功! 找到行号={row_num}")
logger.info(f"[KDocs调试] [OK] 线性搜索成功! 找到行号={row_num}")
# 缓存结果
if not hasattr(self, "_person_cache"):
self._person_cache = {}
self._person_cache[cache_key] = row_num
return row_num
logger.warning(f"[KDocs调试] 搜索失败,未找到人员 '{name}'")
return -1
def _search_and_get_row(self, search_text: str, max_attempts: int = 10, expected_col: str = None,
row_start: int = 0, row_end: int = 0) -> int:
def _binary_search_person(self, name: str, unit_col: str, row_start: int = 0, row_end: int = 0) -> int:
"""
二分搜索人员位置 - 基于姓名的快速搜索
"""
if row_start <= 0:
row_start = 1 # 从第1行开始
if row_end <= 0:
row_end = 1000 # 默认搜索范围最多1000行
logger.info(f"[KDocs调试] 使用二分搜索: name='{name}', rows={row_start}-{row_end}")
left, right = row_start, row_end
while left <= right:
mid = (left + right) // 2
try:
# 获取中间行的姓名
cell_value = self._get_cell_value_fast(f"C{mid}")
if not cell_value:
# 如果单元格为空,向下搜索
left = mid + 1
continue
# 比较姓名
if self._name_matches(cell_value, name):
logger.info(f"[KDocs调试] 二分搜索找到匹配: row={mid}, name='{cell_value}'")
return mid
elif self._name_less_than(cell_value, name):
left = mid + 1
else:
right = mid - 1
except Exception as e:
logger.warning(f"[KDocs调试] 二分搜索读取行{mid}失败: {e}")
# 跳过这一行,继续搜索
left = mid + 1
continue
logger.info(f"[KDocs调试] 二分搜索未找到匹配人员: '{name}'")
return -1
def _name_matches(self, cell_value: str, target_name: str) -> bool:
"""检查单元格中的姓名是否匹配目标姓名"""
if not cell_value or not target_name:
return False
cell_name = str(cell_value).strip()
target = str(target_name).strip()
# 精确匹配
if cell_name == target:
return True
# 部分匹配(包含关系)
return target in cell_name or cell_name in target
def _name_less_than(self, cell_value: str, target_name: str) -> bool:
"""判断单元格姓名是否小于目标姓名(用于排序)"""
if not cell_value or not target_name:
return False
try:
cell_name = str(cell_value).strip()
target = str(target_name).strip()
return cell_name < target
except:
return False
def _get_cell_value_fast(self, cell_address: str) -> Optional[str]:
"""快速获取单元格值,减少延迟"""
try:
# 直接获取单元格值,不等待
cell = self._page.locator(f"[data-cell='{cell_address}']").first
if cell.is_visible():
return cell.inner_text().strip()
return None
except Exception:
return None
def _search_and_get_row(
self, search_text: str, max_attempts: int = 10, expected_col: str = None, row_start: int = 0, row_end: int = 0
) -> int:
"""
执行搜索并获取找到的行号
:param search_text: 要搜索的文本
@@ -1370,7 +1485,7 @@ class KDocsUploader:
current_address = self._get_current_cell_address()
if not current_address:
logger.warning(f"[KDocs调试] 第{attempt+1}次: 无法获取单元格地址")
logger.warning(f"[KDocs调试] 第{attempt + 1}次: 无法获取单元格地址")
# 继续尝试下一个
self._page.keyboard.press("Control+f")
time.sleep(0.2)
@@ -1379,9 +1494,11 @@ class KDocsUploader:
row_num = self._extract_row_number(current_address)
# 提取列字母A, B, C, D 等)
col_letter = ''.join(c for c in current_address if c.isalpha()).upper()
col_letter = "".join(c for c in current_address if c.isalpha()).upper()
logger.info(f"[KDocs调试] 第{attempt+1}次搜索'{search_text}': 单元格={current_address}, 列={col_letter}, 行号={row_num}")
logger.info(
f"[KDocs调试] 第{attempt + 1}次搜索'{search_text}': 单元格={current_address}, 列={col_letter}, 行号={row_num}"
)
if row_num <= 0:
logger.warning(f"[KDocs调试] 无法提取行号,搜索可能没有结果")
@@ -1392,9 +1509,11 @@ class KDocsUploader:
if position_key in found_positions:
logger.info(f"[KDocs调试] 位置{position_key}已搜索过,循环结束")
# 检查是否有任何有效结果
valid_results = [pos for pos in found_positions
if (not expected_col or pos.startswith(expected_col))
and self._extract_row_number(pos) > 2]
valid_results = [
pos
for pos in found_positions
if (not expected_col or pos.startswith(expected_col)) and self._extract_row_number(pos) > 2
]
if valid_results:
# 返回第一个有效结果的行号
return self._extract_row_number(valid_results[0])
@@ -1434,7 +1553,7 @@ class KDocsUploader:
continue
# 找到有效的数据行,列匹配且在行范围内
logger.info(f"[KDocs调试] 找到有效位置: {current_address} (在有效范围内)")
logger.info(f"[KDocs调试] [OK] 找到有效位置: {current_address} (在有效范围内)")
return row_num
self._close_search()