from __future__ import annotations import uuid from http import HTTPStatus from typing import Any from repositories.history_repository import ( append_generated_history, load_history_for_config, upsert_generated_history, ) def _normalize_key_fields(value: Any) -> list[str]: key_fields = value if isinstance(value, list) else [] if not key_fields: key_fields = ["branch", "amount", "type"] return [str(x) for x in key_fields] def run_parse_api(payload: dict[str, Any], ctx: dict[str, Any]) -> tuple[HTTPStatus, dict[str, Any]]: normalize_insurance_year = ctx["normalize_insurance_year"] normalize_insurance_year_choices = ctx["normalize_insurance_year_choices"] load_config = ctx["load_config"] parse_records = ctx["parse_records"] log_parse_skipped = ctx["log_parse_skipped"] append_review_log = ctx["append_review_log"] raw_text = str(payload.get("raw_text", "")) try: insurance_year_choice = normalize_insurance_year(payload.get("insurance_year")) insurance_year_choices = normalize_insurance_year_choices(payload.get("insurance_year_choices")) config = load_config() _, history = load_history_for_config(config, ctx) result = parse_records(raw_text, config, history, insurance_year_choice, insurance_year_choices) log_parse_skipped(result.get("skipped", []), source="api_parse") return HTTPStatus.OK, {"ok": True, "result": result} except ValueError as exc: return HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)} except Exception as exc: append_review_log( "parse_api_error", { "error": str(exc), "raw_text": raw_text, }, ) return HTTPStatus.INTERNAL_SERVER_ERROR, {"ok": False, "error": str(exc)} def run_generate_api(payload: dict[str, Any], ctx: dict[str, Any]) -> tuple[HTTPStatus, dict[str, Any]]: normalize_insurance_year = ctx["normalize_insurance_year"] normalize_insurance_year_choices = ctx["normalize_insurance_year_choices"] load_config = ctx["load_config"] resolve_template_path = ctx["resolve_template_path"] resolve_output_dir = ctx["resolve_output_dir"] parse_records = ctx["parse_records"] generate_records = ctx["generate_records"] set_generation_progress = ctx["set_generation_progress"] append_review_log = ctx["append_review_log"] log_parse_skipped = ctx["log_parse_skipped"] resolve_single_generation_mode = ctx.get("resolve_single_generation_mode") acquire_generation_slot = ctx.get("acquire_generation_slot") release_generation_slot = ctx.get("release_generation_slot") raw_text = str(payload.get("raw_text", "")) progress_token = str(payload.get("progress_token", "")).strip() or uuid.uuid4().hex slot_acquired = False try: template_override = str(payload.get("template_file", "")).strip() or None output_override = str(payload.get("output_dir", "")).strip() or None insurance_year_choice = normalize_insurance_year(payload.get("insurance_year")) insurance_year_choices = normalize_insurance_year_choices(payload.get("insurance_year_choices")) save_history_flag = bool(payload.get("save_history", True)) set_generation_progress( progress_token, status="running", stage="接收请求", percent=1, detail="已收到生成请求", ) config = load_config() single_mode = bool(resolve_single_generation_mode(config)) if callable(resolve_single_generation_mode) else True if single_mode and callable(acquire_generation_slot): slot_acquired = bool(acquire_generation_slot(progress_token)) if not slot_acquired: set_generation_progress( progress_token, status="busy", stage="系统繁忙", percent=0, detail="已有任务在生成,请稍后重试", ) return ( HTTPStatus.TOO_MANY_REQUESTS, { "ok": False, "error": "generate_busy", "error_code": "generate_busy", "message": "已有任务在生成,请稍后再试。", "progress_token": progress_token, }, ) history_path, history = load_history_for_config(config, ctx) set_generation_progress( progress_token, status="running", stage="解析文本", percent=8, detail="正在解析接龙内容", ) parse_result = parse_records( raw_text, config, history, insurance_year_choice, insurance_year_choices, ) log_parse_skipped(parse_result.get("skipped", []), source="api_generate") if parse_result.get("needs_insurance_choice") and insurance_year_choice is None: set_generation_progress( progress_token, status="need_input", stage="等待选择", percent=15, detail="检测到保险记录,等待选择3年交/5年交", ) return ( HTTPStatus.BAD_REQUEST, { "ok": False, "error": "insurance_year_required", "error_code": "insurance_year_required", "result": parse_result, "options": ["3", "5"], "message": "检测到保险记录但未指定年限,请逐条选择3年交或5年交。", "progress_token": progress_token, }, ) new_records = parse_result.get("new_records", []) if not isinstance(new_records, list): new_records = [] if not new_records: set_generation_progress( progress_token, status="done", stage="完成", percent=100, detail="没有可生成的新记录", ) return ( HTTPStatus.OK, { "ok": True, "message": "没有可生成的新记录", "result": parse_result, "generated_count": 0, "progress_token": progress_token, }, ) set_generation_progress( progress_token, status="running", stage="准备模板", percent=12, detail=f"待生成 {len(new_records)} 条", ) template_path = resolve_template_path(config, template_override) output_dir = resolve_output_dir(config, output_override) def on_progress(percent: int, stage: str, detail: str) -> None: set_generation_progress( progress_token, status="running", stage=stage, percent=percent, detail=detail, ) gen_result = generate_records( new_records, config, template_path, output_dir, progress_cb=on_progress, ) history_stat = None if save_history_flag: set_generation_progress( progress_token, status="running", stage="更新历史", percent=96, detail="写入历史记录", ) key_fields = _normalize_key_fields(parse_result.get("dedup_key_fields", ["branch", "amount", "type"])) history_stat = append_generated_history( history_path=history_path, generated_items=gen_result.get("generated", []), key_fields=key_fields, ctx=ctx, ) set_generation_progress( progress_token, status="done", stage="完成", percent=100, detail=f"已生成 {gen_result.get('generated_count', 0)} 张", ) return ( HTTPStatus.OK, { "ok": True, "message": "生成完成", "result": parse_result, "generated_count": gen_result.get("generated_count", 0), "generated": gen_result.get("generated", []), "download_images": gen_result.get("download_images", []), "generation_strategy": gen_result.get("generation_strategy", "legacy"), "history": history_stat, "progress_token": progress_token, }, ) except ValueError as exc: set_generation_progress( progress_token, status="error", stage="失败", percent=100, detail="请求参数错误", error=str(exc), ) return HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)} except Exception as exc: append_review_log( "generate_api_error", { "error": str(exc), "raw_text": raw_text, }, ) set_generation_progress( progress_token, status="error", stage="失败", percent=100, detail="生成过程异常", error=str(exc), ) return HTTPStatus.INTERNAL_SERVER_ERROR, {"ok": False, "error": str(exc)} finally: if slot_acquired and callable(release_generation_slot): release_generation_slot(progress_token) def run_correction_apply_api(payload: dict[str, Any], ctx: dict[str, Any]) -> tuple[HTTPStatus, dict[str, Any]]: load_config = ctx["load_config"] resolve_template_path = ctx["resolve_template_path"] resolve_output_dir = ctx["resolve_output_dir"] resolve_history_path = ctx["resolve_history_path"] normalize_line = ctx["normalize_line"] normalize_branch_value = ctx["normalize_branch_value"] normalize_amount_text = ctx["normalize_amount_text"] normalize_status_value = ctx["normalize_status_value"] infer_page_from_type = ctx["infer_page_from_type"] apply_record_overrides = ctx["apply_record_overrides"] render_output_filename = ctx["render_output_filename"] validate_record_for_generation = ctx["validate_record_for_generation"] generate_records = ctx["generate_records"] infer_correction_rule_keyword = ctx["infer_correction_rule_keyword"] save_or_update_manual_rule = ctx["save_or_update_manual_rule"] append_review_log = ctx["append_review_log"] resolve_issue_marks_by_source_line = ctx["resolve_issue_marks_by_source_line"] resolve_single_generation_mode = ctx.get("resolve_single_generation_mode") acquire_generation_slot = ctx.get("acquire_generation_slot") release_generation_slot = ctx.get("release_generation_slot") issue_resolve_stat: dict[str, Any] = {"count": 0, "ids": []} slot_acquired = False try: record = payload.get("record") if not isinstance(record, dict): raise ValueError("record is required") overrides = payload.get("overrides", {}) if overrides is None: overrides = {} if not isinstance(overrides, dict): raise ValueError("overrides must be an object") config = load_config() single_mode = bool(resolve_single_generation_mode(config)) if callable(resolve_single_generation_mode) else True if single_mode and callable(acquire_generation_slot): slot_acquired = bool(acquire_generation_slot("correction_apply")) if not slot_acquired: return ( HTTPStatus.TOO_MANY_REQUESTS, { "ok": False, "error": "generate_busy", "error_code": "generate_busy", "message": "已有任务在生成,请稍后再试。", }, ) template_override = str(payload.get("template_file", "")).strip() or None output_override = str(payload.get("output_dir", "")).strip() or None template_path = resolve_template_path(config, template_override) output_dir = resolve_output_dir(config, output_override) history_path = resolve_history_path(config) relay_cfg = config.get("relay_handling", {}) parse_rules = relay_cfg.get("parse_rules", {}) if isinstance(relay_cfg, dict) else {} line_pattern = str(parse_rules.get("line_pattern", r"^\d+、\s*")) source_line = str(record.get("source_line", "")).strip() raw_text = str(record.get("raw_text", "")).strip() normalized_line = normalize_line(source_line or raw_text, line_pattern) base_record = { "source_line": source_line or raw_text, "raw_text": normalized_line or raw_text, "branch": normalize_branch_value(record.get("branch", ""), config), "amount": normalize_amount_text(record.get("amount", "")), "type": str(record.get("type", "")).strip(), "page": str(record.get("page", "")).strip(), "status": normalize_status_value(str(record.get("status", "")).strip(), config), "output_file": str(record.get("output_file", "")).strip(), } if not base_record["page"] and base_record["type"]: base_record["page"] = infer_page_from_type(base_record["type"], config) corrected = apply_record_overrides(base_record, overrides, config) corrected["source_line"] = str(corrected.get("source_line") or source_line or raw_text) corrected["raw_text"] = normalize_line(str(corrected.get("raw_text") or normalized_line), line_pattern) if not corrected.get("output_file"): corrected["output_file"] = render_output_filename(config, corrected, 1) validate_record_for_generation(corrected, config) gen_result = generate_records( [corrected], config, template_path, output_dir, progress_cb=None, ) relay_cfg = config.get("relay_handling", {}) dedup_cfg = relay_cfg.get("dedup", {}) if isinstance(relay_cfg, dict) else {} key_fields = _normalize_key_fields(dedup_cfg.get("key_fields", ["branch", "amount", "type"])) history_stat = upsert_generated_history( history_path=history_path, generated_items=gen_result.get("generated", []), key_fields=key_fields, ctx=ctx, ) remember_rule = bool(payload.get("remember_rule", False)) remember_amount = bool(payload.get("remember_amount", False)) rule_keyword = str(payload.get("rule_keyword", "")).strip() note = str(payload.get("note", "")).strip() applied_rule = None if remember_rule: rule_updates: dict[str, Any] = {} for field in ("branch", "type", "page", "status", "amount"): if field not in overrides: continue val = str(corrected.get(field, "")).strip() if val: rule_updates[field] = val if not remember_amount: rule_updates.pop("amount", None) if rule_updates: keyword = rule_keyword or infer_correction_rule_keyword( source_line=str(corrected.get("source_line", "")), normalized_line=str(corrected.get("raw_text", "")), corrected_record=corrected, ) applied_rule = save_or_update_manual_rule( keyword=keyword, updates=rule_updates, note=note, match_mode=str(payload.get("rule_mode", "normalized")), ) append_review_log( "manual_correction_apply", { "source_line": str(corrected.get("source_line", "")), "record_before": base_record, "record_after": corrected, "overrides": overrides, "remember_rule": remember_rule, "rule": applied_rule, "note": note, }, ) issue_resolve_stat = resolve_issue_marks_by_source_line( str(corrected.get("source_line", "")), reason="manual_correction_apply", ) if int(issue_resolve_stat.get("count", 0)) > 0: append_review_log( "issue_auto_resolve", { "source_line": str(corrected.get("source_line", "")), "resolved_issue_count": int(issue_resolve_stat.get("count", 0)), "resolved_issue_ids": issue_resolve_stat.get("ids", []), }, ) return ( HTTPStatus.OK, { "ok": True, "message": "修正已生成", "generated_count": gen_result.get("generated_count", 0), "generated": gen_result.get("generated", []), "download_images": gen_result.get("download_images", []), "generation_strategy": gen_result.get("generation_strategy", "legacy"), "history": history_stat, "rule": applied_rule, "resolved_issue_count": int(issue_resolve_stat.get("count", 0)), "resolved_issue_ids": issue_resolve_stat.get("ids", []), }, ) except ValueError as exc: return HTTPStatus.BAD_REQUEST, {"ok": False, "error": str(exc)} except Exception as exc: append_review_log( "manual_correction_error", { "error": str(exc), "record": payload.get("record") if isinstance(payload, dict) else {}, }, ) return HTTPStatus.INTERNAL_SERVER_ERROR, {"ok": False, "error": str(exc)} finally: if slot_acquired and callable(release_generation_slot): release_generation_slot("correction_apply")