xb/app/server.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
from concurrent.futures import ThreadPoolExecutor, as_completed
import ctypes
import gc
import hashlib
import io
import json
import os
import re
import shutil
import subprocess
import threading
import time
import uuid
from datetime import datetime, timedelta
from http import HTTPStatus
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from typing import Any, Callable
from urllib.parse import quote, urlparse

from pptx.enum.text import PP_ALIGN
from pptx import Presentation
from api_get_routes import handle_get_routes
from api_post_routes import handle_post_routes

BASE_DIR = Path(__file__).resolve().parent
STATIC_DIR = BASE_DIR / "static"
CONFIG_PATH = BASE_DIR / "config" / "xibao_logic.json"
DATA_DIR = BASE_DIR / "data"
DEFAULT_HISTORY_PATH = DATA_DIR / "generated_history.json"
DEFAULT_OUTPUT_DIR = BASE_DIR / "output"
DEFAULT_TEMPLATE_FALLBACK = BASE_DIR.parent / "黄金三十天喜报模版（余额、保险、理财）(1).pptx"
REVIEW_LOG_DIR = DATA_DIR / "review_logs"
MANUAL_RULES_PATH = DATA_DIR / "manual_rules.json"
ISSUE_MARKS_PATH = DATA_DIR / "marked_issues.json"
SKIPPED_SUPPRESS_PATH = DATA_DIR / "skipped_suppressions.json"
CONVERTER_IMAGE = "minidocks/libreoffice"
LOCAL_LIBREOFFICE_BIN = shutil.which("libreoffice") or shutil.which("soffice")
LOCAL_PDFTOPPM_BIN = shutil.which("pdftoppm")

DATA_DIR.mkdir(parents=True, exist_ok=True)
DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
REVIEW_LOG_DIR.mkdir(parents=True, exist_ok=True)
_HISTORY_LOCK = threading.Lock()
_DOWNLOAD_LOCK = threading.Lock()
_CONVERTER_LOCK = threading.Lock()
_LOG_LOCK = threading.Lock()
_PROGRESS_LOCK = threading.Lock()
_RULES_LOCK = threading.Lock()
_ISSUE_LOCK = threading.Lock()
_SKIP_SUPPRESS_LOCK = threading.Lock()
_ACTIVE_JOB_LOCK = threading.Lock()
_GENERATE_SLOT_LOCK = threading.Lock()
_GENERATE_STATE_LOCK = threading.Lock()
DOWNLOAD_CACHE: dict[str, dict[str, Any]] = {}
GEN_PROGRESS: dict[str, dict[str, Any]] = {}
ACTIVE_JOB_DIRS: set[str] = set()
ACTIVE_GENERATION: dict[str, Any] = {"token": "", "started_at": 0.0}
CONVERTER_IMAGE_READY = False
_MALLOC_TRIM_FN: Callable[[int], int] | None = None

TERM_DEFS = [
    {
        "type": "三个月定期",
        "page": "page_0",
        "regex": re.compile(r"(?:三|3)\s*个?月"),
    },
    {
        "type": "六个月定期",
        "page": "page_1",
        "regex": re.compile(r"(?:六|6)\s*个?月|半年"),
    },
    {
        "type": "一年期定期",
        "page": "page_2",
        "regex": re.compile(r"(?:定期)?\s*(?:存\s*)?(?:一|1)\s*年(?:期)?|定期一年|存1年"),
    },
]

CHINESE_DIGIT_MAP = {
    "零": 0,
    "〇": 0,
    "一": 1,
    "二": 2,
    "两": 2,
    "三": 3,
    "四": 4,
    "五": 5,
    "六": 6,
    "七": 7,
    "八": 8,
    "九": 9,
    "壹": 1,
    "贰": 2,
    "叁": 3,
    "肆": 4,
    "伍": 5,
    "陆": 6,
    "柒": 7,
    "捌": 8,
    "玖": 9,
}
CHINESE_UNIT_MAP = {"十": 10, "百": 100, "千": 1000, "万": 10000, "亿": 100000000}
CHINESE_NUMBER_RE = re.compile(r"[零〇一二两三四五六七八九十百千万亿壹贰叁肆伍陆柒捌玖拾佰仟点點]+")
STATUS_HEURISTIC_PATTERNS: dict[str, list[re.Pattern[str]]] = {
    "揽收现金": [
        re.compile(r"揽收[^，。,;；\n]{0,24}现金"),
    ],
    "揽收彩礼": [
        re.compile(r"揽收[^，。,;；\n]{0,24}(?:彩礼|礼金)"),
        re.compile(r"(?:彩礼|礼金)[^，。,;；\n]{0,24}揽收"),
    ],
    "微信提现": [
        re.compile(r"(?:微信|支付宝)[^，。,;；\n]{0,16}提现"),
        re.compile(r"提现[^，。,;；\n]{0,16}(?:微信|支付宝)"),
    ],
    "商户提现": [
        re.compile(r"商户[^，。,;；\n]{0,16}提现"),
        re.compile(r"提现[^，。,;；\n]{0,16}商户"),
    ],
    "揽收商户": [
        re.compile(r"揽收[^，。,;；\n]{0,16}商户"),
        re.compile(r"商户[^，。,;；\n]{0,16}揽收"),
    ],
    "揽收他行": [
        re.compile(r"揽收[^，。,;；\n]{0,20}他行"),
        re.compile(r"他行[^，。,;；\n]{0,20}揽收"),
    ],
    "他行挖转": [
        re.compile(r"挖转[^，。,;；\n]{0,20}他行"),
        re.compile(r"他行[^，。,;；\n]{0,20}挖转"),
        re.compile(r"挖他行"),
    ],
}


class ConfigError(RuntimeError):
    pass


def now_ts() -> str:
    return datetime.now().strftime("%Y%m%d_%H%M%S")


def load_config() -> dict[str, Any]:
    if not CONFIG_PATH.exists():
        raise ConfigError(f"config file not found: {CONFIG_PATH}")

    with CONFIG_PATH.open("r", encoding="utf-8") as f:
        config = json.load(f)

    if not isinstance(config, dict):
        raise ConfigError("config root must be a JSON object")

    return config


def is_windows_path(path_text: str) -> bool:
    return bool(re.match(r"^[A-Za-z]:\\", path_text))


def resolve_history_path(config: dict[str, Any]) -> Path:
    history_file = str(config.get("history_file", "")).strip()
    if history_file and not is_windows_path(history_file):
        p = Path(history_file)
        if not p.is_absolute():
            p = BASE_DIR / p
        p.parent.mkdir(parents=True, exist_ok=True)
        return p

    DEFAULT_HISTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
    return DEFAULT_HISTORY_PATH


def resolve_template_path(config: dict[str, Any], override_path: str | None = None) -> Path:
    candidates: list[Path] = []

    if override_path:
        p = Path(override_path)
        if not p.is_absolute():
            p = BASE_DIR / p
        candidates.append(p)

    cfg_template = str(config.get("template_file", "")).strip()
    if cfg_template and not is_windows_path(cfg_template):
        p = Path(cfg_template)
        if not p.is_absolute():
            p = BASE_DIR / p
        candidates.append(p)

    env_template = os.environ.get("XIBAO_TEMPLATE_FILE", "").strip()
    if env_template:
        p = Path(env_template)
        if not p.is_absolute():
            p = BASE_DIR / p
        candidates.append(p)

    candidates.append(DEFAULT_TEMPLATE_FALLBACK)

    for p in candidates:
        if p.exists() and p.is_file():
            return p

    raise ConfigError(
        "template file not found. Set config.template_file to a valid local path "
        "or pass template_file in request body."
    )


def resolve_output_dir(config: dict[str, Any], override_path: str | None = None) -> Path:
    if override_path:
        p = Path(override_path)
        if not p.is_absolute():
            p = BASE_DIR / p
        p.mkdir(parents=True, exist_ok=True)
        return p

    output_dir = str(config.get("output_dir", "")).strip()
    if output_dir and not is_windows_path(output_dir):
        p = Path(output_dir)
        if not p.is_absolute():
            p = BASE_DIR / p
        p.mkdir(parents=True, exist_ok=True)
        return p

    DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    return DEFAULT_OUTPUT_DIR


def load_history(history_path: Path) -> list[dict[str, Any]]:
    if not history_path.exists():
        return []

    with history_path.open("r", encoding="utf-8") as f:
        raw = json.load(f)

    if isinstance(raw, list):
        return [r for r in raw if isinstance(r, dict)]

    if isinstance(raw, dict) and isinstance(raw.get("records"), list):
        return [r for r in raw["records"] if isinstance(r, dict)]

    return []


def save_history(history_path: Path, records: list[dict[str, Any]]) -> None:
    with history_path.open("w", encoding="utf-8") as f:
        json.dump(records, f, ensure_ascii=False, indent=2)


def load_manual_rules() -> list[dict[str, Any]]:
    if not MANUAL_RULES_PATH.exists():
        return []
    with _RULES_LOCK:
        try:
            with MANUAL_RULES_PATH.open("r", encoding="utf-8") as f:
                raw = json.load(f)
        except Exception:
            return []
    if not isinstance(raw, list):
        return []
    return [x for x in raw if isinstance(x, dict)]


def save_manual_rules(rules: list[dict[str, Any]]) -> None:
    with _RULES_LOCK:
        with MANUAL_RULES_PATH.open("w", encoding="utf-8") as f:
            json.dump(rules, f, ensure_ascii=False, indent=2)


def _load_issue_marks_unlocked() -> list[dict[str, Any]]:
    if not ISSUE_MARKS_PATH.exists():
        return []
    try:
        with ISSUE_MARKS_PATH.open("r", encoding="utf-8") as f:
            raw = json.load(f)
    except Exception:
        return []
    if not isinstance(raw, list):
        return []
    return [x for x in raw if isinstance(x, dict)]


def _save_issue_marks_unlocked(items: list[dict[str, Any]]) -> None:
    ISSUE_MARKS_PATH.parent.mkdir(parents=True, exist_ok=True)
    with ISSUE_MARKS_PATH.open("w", encoding="utf-8") as f:
        json.dump(items, f, ensure_ascii=False, indent=2)


def list_issue_marks(status: str = "active", limit: int = 500) -> tuple[list[dict[str, Any]], int]:
    status_norm = str(status or "active").strip().lower()
    if status_norm not in {"active", "resolved", "all"}:
        status_norm = "active"
    limit = max(1, min(2000, int(limit)))

    with _ISSUE_LOCK:
        items = _load_issue_marks_unlocked()

    normalized: list[dict[str, Any]] = []
    for item in items:
        obj = dict(item)
        item_status = str(obj.get("status", "active")).strip().lower()
        if item_status not in {"active", "resolved"}:
            item_status = "active"
        obj["status"] = item_status
        normalized.append(obj)

    if status_norm == "all":
        filtered = normalized
    else:
        filtered = [x for x in normalized if str(x.get("status", "active")) == status_norm]

    filtered.sort(
        key=lambda x: str(x.get("updated_at") or x.get("created_at") or ""),
        reverse=True,
    )
    total = len(filtered)
    return filtered[:limit], total


def upsert_issue_mark(
    *,
    mark_type: str,
    source_line: str,
    note: str = "",
    record: dict[str, Any] | None = None,
) -> tuple[dict[str, Any], bool]:
    mark_type_text = str(mark_type).strip()
    if mark_type_text not in {"recognition_error", "generation_error"}:
        raise ValueError("mark_type must be recognition_error or generation_error")

    line_text = str(source_line).strip()
    if not line_text:
        raise ValueError("source_line is required")

    now = datetime.now().isoformat(timespec="seconds")
    record_obj = dict(record) if isinstance(record, dict) else {}
    note_text = str(note).strip()

    with _ISSUE_LOCK:
        items = _load_issue_marks_unlocked()
        for item in items:
            if not isinstance(item, dict):
                continue
            if str(item.get("status", "active")).strip().lower() != "active":
                continue
            if str(item.get("mark_type", "")).strip() != mark_type_text:
                continue
            if str(item.get("source_line", "")).strip() != line_text:
                continue
            item["note"] = note_text
            item["record"] = record_obj
            item["updated_at"] = now
            _save_issue_marks_unlocked(items)
            return dict(item), False

        issue = {
            "id": uuid.uuid4().hex[:12],
            "mark_type": mark_type_text,
            "source_line": line_text,
            "note": note_text,
            "record": record_obj,
            "status": "active",
            "created_at": now,
            "updated_at": now,
        }
        items.append(issue)
        _save_issue_marks_unlocked(items)
        return dict(issue), True


def update_issue_mark(
    *,
    issue_id: str,
    mark_type: str | None = None,
    source_line: str | None = None,
    note: str | None = None,
    record: dict[str, Any] | None = None,
) -> dict[str, Any] | None:
    issue_id_text = str(issue_id).strip()
    if not issue_id_text:
        raise ValueError("id is required")

    now = datetime.now().isoformat(timespec="seconds")
    with _ISSUE_LOCK:
        items = _load_issue_marks_unlocked()
        target = None
        for item in items:
            if str(item.get("id", "")).strip() == issue_id_text:
                target = item
                break
        if target is None:
            return None

        if mark_type is not None:
            mark_type_text = str(mark_type).strip()
            if mark_type_text not in {"recognition_error", "generation_error"}:
                raise ValueError("mark_type must be recognition_error or generation_error")
            target["mark_type"] = mark_type_text

        if source_line is not None:
            source_line_text = str(source_line).strip()
            if not source_line_text:
                raise ValueError("source_line is required")
            target["source_line"] = source_line_text

        if note is not None:
            target["note"] = str(note).strip()

        if record is not None:
            target["record"] = dict(record) if isinstance(record, dict) else {}

        target["updated_at"] = now
        _save_issue_marks_unlocked(items)
        return dict(target)


def delete_issue_mark(issue_id: str) -> bool:
    issue_id_text = str(issue_id).strip()
    if not issue_id_text:
        raise ValueError("id is required")

    with _ISSUE_LOCK:
        items = _load_issue_marks_unlocked()
        remain: list[dict[str, Any]] = []
        deleted = False
        for item in items:
            if str(item.get("id", "")).strip() == issue_id_text:
                deleted = True
                continue
            remain.append(item)
        if deleted:
            _save_issue_marks_unlocked(remain)
        return deleted


def resolve_issue_marks_by_source_line(source_line: str, reason: str = "") -> dict[str, Any]:
    line_text = str(source_line).strip()
    if not line_text:
        return {"count": 0, "ids": []}

    now = datetime.now().isoformat(timespec="seconds")
    resolved_ids: list[str] = []
    with _ISSUE_LOCK:
        items = _load_issue_marks_unlocked()
        changed = False
        for item in items:
            if str(item.get("status", "active")).strip().lower() != "active":
                continue
            if str(item.get("source_line", "")).strip() != line_text:
                continue
            item["status"] = "resolved"
            item["resolved_at"] = now
            item["updated_at"] = now
            if reason:
                item["resolved_reason"] = reason
            resolved_ids.append(str(item.get("id", "")).strip())
            changed = True
        if changed:
            _save_issue_marks_unlocked(items)
    return {"count": len([x for x in resolved_ids if x]), "ids": [x for x in resolved_ids if x]}


def normalize_skip_line(line: str) -> str:
    return re.sub(r"\s+", "", str(line or "").strip())


def skip_suppression_key(line: str, reason: str = "") -> str:
    normalized_line = normalize_skip_line(line)
    normalized_reason = str(reason or "").strip()
    if not normalized_line:
        return ""
    return f"{normalized_reason}|{normalized_line}"


def skip_suppression_keys_for_item(line: str, reason: str = "") -> list[str]:
    normalized_line = normalize_skip_line(line)
    if not normalized_line:
        return []
    normalized_reason = str(reason or "").strip()
    keys = [f"*|{normalized_line}"]
    if normalized_reason:
        keys.insert(0, f"{normalized_reason}|{normalized_line}")
    return keys


def _load_skip_suppressions_unlocked() -> list[dict[str, Any]]:
    if not SKIPPED_SUPPRESS_PATH.exists():
        return []
    try:
        with SKIPPED_SUPPRESS_PATH.open("r", encoding="utf-8") as f:
            raw = json.load(f)
    except Exception:
        return []
    if not isinstance(raw, list):
        return []
    return [x for x in raw if isinstance(x, dict)]


def _save_skip_suppressions_unlocked(items: list[dict[str, Any]]) -> None:
    SKIPPED_SUPPRESS_PATH.parent.mkdir(parents=True, exist_ok=True)
    with SKIPPED_SUPPRESS_PATH.open("w", encoding="utf-8") as f:
        json.dump(items, f, ensure_ascii=False, indent=2)


def list_skip_suppressions() -> list[dict[str, Any]]:
    with _SKIP_SUPPRESS_LOCK:
        return _load_skip_suppressions_unlocked()


def build_skip_suppression_lookup(items: list[dict[str, Any]]) -> set[str]:
    keys: set[str] = set()
    for item in items:
        if not isinstance(item, dict):
            continue
        key = str(item.get("key", "")).strip()
        if key:
            keys.add(key)
            continue
        line = str(item.get("line", "")).strip()
        reason = str(item.get("reason", "")).strip()
        fallback = skip_suppression_key(line, reason)
        if fallback:
            keys.add(fallback)
    return keys


def is_skip_item_suppressed(line: str, reason: str, lookup: set[str]) -> bool:
    for key in skip_suppression_keys_for_item(line, reason):
        if key in lookup:
            return True
    return False


def suppress_skip_item(line: str, reason: str = "") -> tuple[dict[str, Any], bool]:
    line_text = str(line or "").strip()
    if not line_text:
        raise ValueError("line is required")
    reason_text = str(reason or "").strip()
    # "*" means suppress the same line regardless of reason.
    normalized_reason = reason_text if reason_text else "*"
    key = skip_suppression_key(line_text, normalized_reason)
    if not key:
        raise ValueError("line is required")

    now = datetime.now().isoformat(timespec="seconds")
    with _SKIP_SUPPRESS_LOCK:
        items = _load_skip_suppressions_unlocked()
        for item in items:
            if str(item.get("key", "")).strip() != key:
                continue
            item["updated_at"] = now
            _save_skip_suppressions_unlocked(items)
            return dict(item), False

        obj = {
            "id": uuid.uuid4().hex[:12],
            "key": key,
            "line": line_text,
            "reason": normalized_reason,
            "created_at": now,
            "updated_at": now,
        }
        items.append(obj)
        _save_skip_suppressions_unlocked(items)
        return obj, True


def clear_skip_suppressions() -> int:
    with _SKIP_SUPPRESS_LOCK:
        items = _load_skip_suppressions_unlocked()
        count = len(items)
        _save_skip_suppressions_unlocked([])
        return count


def register_active_job_dir(job_dir: Path) -> None:
    with _ACTIVE_JOB_LOCK:
        ACTIVE_JOB_DIRS.add(str(job_dir.resolve()))


def unregister_active_job_dir(job_dir: Path) -> None:
    with _ACTIVE_JOB_LOCK:
        ACTIVE_JOB_DIRS.discard(str(job_dir.resolve()))


def get_active_job_dirs() -> set[str]:
    with _ACTIVE_JOB_LOCK:
        return set(ACTIVE_JOB_DIRS)


def acquire_generation_slot(token: str = "") -> bool:
    if not _GENERATE_SLOT_LOCK.acquire(blocking=False):
        return False
    with _GENERATE_STATE_LOCK:
        ACTIVE_GENERATION["token"] = str(token).strip()
        ACTIVE_GENERATION["started_at"] = time.time()
    return True


def release_generation_slot(token: str = "") -> None:
    _ = token
    with _GENERATE_STATE_LOCK:
        ACTIVE_GENERATION["token"] = ""
        ACTIVE_GENERATION["started_at"] = 0.0
    if _GENERATE_SLOT_LOCK.locked():
        try:
            _GENERATE_SLOT_LOCK.release()
        except RuntimeError:
            pass


def get_active_generation() -> dict[str, Any]:
    with _GENERATE_STATE_LOCK:
        return dict(ACTIVE_GENERATION)


def today_log_path() -> Path:
    day = datetime.now().strftime("%Y-%m-%d")
    return REVIEW_LOG_DIR / f"review_{day}.jsonl"


def count_log_lines(path: Path) -> int:
    if not path.exists():
        return 0
    n = 0
    with path.open("r", encoding="utf-8") as f:
        for _ in f:
            n += 1
    return n


def append_review_log(event: str, payload: dict[str, Any] | None = None) -> Path:
    REVIEW_LOG_DIR.mkdir(parents=True, exist_ok=True)
    log_path = today_log_path()
    entry = {
        "ts": datetime.now().isoformat(timespec="seconds"),
        "event": str(event),
    }
    if isinstance(payload, dict):
        entry.update(payload)

    with _LOG_LOCK:
        with log_path.open("a", encoding="utf-8") as f:
            f.write(json.dumps(entry, ensure_ascii=False))
            f.write("\n")
    return log_path


def get_valid_status_list(config: dict[str, Any]) -> list[str]:
    valid_status = config.get("replace_algorithm", {}).get("status", {}).get("valid_status", [])
    if not isinstance(valid_status, list) or not valid_status:
        valid_status = config.get("status_extraction", {}).get("valid_status", [])
    if not isinstance(valid_status, list):
        return []
    return [str(x).strip() for x in valid_status if str(x).strip()]


def normalize_status_value(status: str, config: dict[str, Any]) -> str:
    fallback = str(config.get("status_extraction", {}).get("fallback", "成功营销")).strip() or "成功营销"
    valid_status = get_valid_status_list(config)
    if not valid_status:
        return str(status).strip() or fallback
    s = str(status).strip()
    if s in valid_status:
        return s
    return fallback if fallback in valid_status else valid_status[0]


def is_date_header_line(normalized_line: str) -> bool:
    return bool(re.fullmatch(r"\d{1,2}月\d{1,2}[日号]?", normalized_line))


def log_parse_skipped(skipped: list[dict[str, Any]], source: str) -> int:
    written = 0
    for item in skipped:
        if not isinstance(item, dict):
            continue
        reason = str(item.get("reason", "")).strip()
        # 说明行不记录，避免日志噪音。
        if not reason or reason == "skip_line_rule":
            continue
        source_line = str(item.get("line", "")).strip()
        append_review_log(
            "parse_skip",
            {
                "source": source,
                "reason": reason,
                "source_line": source_line,
            },
        )
        written += 1
    return written


def is_path_under(path: Path, parent: Path) -> bool:
    try:
        path.resolve().relative_to(parent.resolve())
        return True
    except Exception:
        return False


def cleanup_output_artifacts(output_dir: Path) -> dict[str, int]:
    output_dir.mkdir(parents=True, exist_ok=True)
    removed_dirs = 0
    removed_files = 0
    errors = 0
    active_dirs = get_active_job_dirs()

    for entry in output_dir.iterdir():
        try:
            if entry.is_dir() and entry.name.startswith("job_"):
                if str(entry.resolve()) in active_dirs:
                    continue
                shutil.rmtree(entry)
                removed_dirs += 1
                continue
            if entry.is_file() and entry.suffix.lower() in {".png", ".pdf", ".pptx", ".zip"}:
                entry.unlink(missing_ok=True)
                removed_files += 1
        except Exception:
            errors += 1

    with _DOWNLOAD_LOCK:
        stale_tokens: list[str] = []
        for token, meta in DOWNLOAD_CACHE.items():
            file_path = Path(str(meta.get("file_path", "")))
            if not file_path.exists() or is_path_under(file_path, output_dir):
                stale_tokens.append(token)
        for token in stale_tokens:
            DOWNLOAD_CACHE.pop(token, None)

    return {
        "removed_dirs": removed_dirs,
        "removed_files": removed_files,
        "errors": errors,
    }


def daily_cleanup_loop(stop_event: threading.Event) -> None:
    while not stop_event.is_set():
        now = datetime.now()
        next_midnight = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
        wait_seconds = max((next_midnight - now).total_seconds(), 1.0)
        if stop_event.wait(wait_seconds):
            break

        try:
            config = load_config()
            output_dir = resolve_output_dir(config)
            stat = cleanup_output_artifacts(output_dir)
            print(
                "[daily-cleanup] done at midnight, "
                f"removed_dirs={stat['removed_dirs']} removed_files={stat['removed_files']} errors={stat['errors']}"
            )
            append_review_log(
                "daily_cleanup",
                {
                    "output_dir": str(output_dir),
                    **stat,
                },
            )
        except Exception as exc:
            print(f"[daily-cleanup] failed: {exc}")
            append_review_log("daily_cleanup_error", {"error": str(exc)})


def build_history_lookup(history: list[dict[str, Any]], key_fields: list[str]) -> dict[str, dict[str, Any]]:
    lookup: dict[str, dict[str, Any]] = {}
    for item in history:
        if not isinstance(item, dict):
            continue
        k = key_for_record(item, key_fields)
        if not k:
            continue
        existing = lookup.get(k)
        if existing is None:
            lookup[k] = item
            continue
        prev_ts = str(existing.get("created_at", ""))
        curr_ts = str(item.get("created_at", ""))
        if curr_ts >= prev_ts:
            lookup[k] = item
    return lookup


def build_history_signature_lookup(history: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
    lookup: dict[str, dict[str, Any]] = {}
    for item in history:
        if not isinstance(item, dict):
            continue
        k = signature_key_for_record(item)
        if not k:
            continue
        existing = lookup.get(k)
        if existing is None:
            lookup[k] = item
            continue
        prev_ts = str(existing.get("created_at", ""))
        curr_ts = str(item.get("created_at", ""))
        if curr_ts >= prev_ts:
            lookup[k] = item
    return lookup


def resolve_history_image_path(history_record: dict[str, Any], config: dict[str, Any]) -> Path | None:
    candidates: list[Path] = []

    image_path = str(history_record.get("image_path", "")).strip()
    if image_path:
        p = Path(image_path)
        if not p.is_absolute():
            p = BASE_DIR / p
        candidates.append(p)

    output_dir = resolve_output_dir(config)
    png_file = str(history_record.get("png_file", "")).strip()
    output_file = str(history_record.get("output_file", "")).strip()
    job_id = str(history_record.get("job_id", "")).strip()

    if png_file and job_id:
        candidates.append(output_dir / job_id / png_file)
    if output_file and job_id:
        candidates.append(output_dir / job_id / output_file)

    if output_file:
        # 兼容老历史记录：仅存 output_file 时尝试在 job_* 中定位。
        for p in output_dir.glob(f"job_*/{output_file}"):
            candidates.append(p)
            break

    seen: set[str] = set()
    for c in candidates:
        key = str(c)
        if key in seen:
            continue
        seen.add(key)
        if c.exists() and c.is_file():
            return c
    return None


def attach_duplicate_download_info(
    dup_record: dict[str, Any],
    history_record: dict[str, Any] | None,
    config: dict[str, Any],
) -> None:
    if not isinstance(history_record, dict):
        return

    image_path = resolve_history_image_path(history_record, config)
    if not image_path:
        return

    token = register_download(image_path, content_type="image/png")
    dup_record["download_token"] = token
    dup_record["download_url"] = f"/api/download/{token}"
    dup_record["image_name"] = image_path.name
    if not dup_record.get("output_file"):
        dup_record["output_file"] = image_path.name


def build_history_view_items(
    history: list[dict[str, Any]],
    config: dict[str, Any],
    limit: int = 300,
) -> list[dict[str, Any]]:
    items = [x for x in history if isinstance(x, dict)]
    items.sort(key=lambda x: str(x.get("created_at", "")), reverse=True)
    if limit > 0:
        items = items[:limit]

    rows: list[dict[str, Any]] = []
    for item in items:
        row = dict(item)
        image_path = resolve_history_image_path(row, config)
        if image_path:
            token = register_download(image_path, content_type="image/png")
            row["download_token"] = token
            row["download_url"] = f"/api/download/{token}"
            row["image_exists"] = True
            row["image_name"] = image_path.name
        else:
            row["image_exists"] = False
        rows.append(row)
    return rows


def _cleanup_progress_cache_locked() -> None:
    now = time.time()
    stale: list[str] = []
    for token, item in GEN_PROGRESS.items():
        updated_at = float(item.get("updated_at", 0))
        if now - updated_at > 3 * 3600:
            stale.append(token)
    for token in stale:
        GEN_PROGRESS.pop(token, None)


def cleanup_progress_cache() -> None:
    with _PROGRESS_LOCK:
        _cleanup_progress_cache_locked()


def set_generation_progress(
    token: str,
    *,
    status: str,
    stage: str,
    percent: int,
    detail: str = "",
    error: str = "",
) -> None:
    t = str(token).strip()
    if not t:
        return
    now_iso = datetime.now().isoformat(timespec="seconds")
    item = {
        "token": t,
        "status": status,
        "stage": stage,
        "percent": max(0, min(100, int(percent))),
        "detail": detail,
        "error": error,
        "updated_at": time.time(),
        "updated_at_text": now_iso,
    }
    with _PROGRESS_LOCK:
        _cleanup_progress_cache_locked()
        GEN_PROGRESS[t] = item


def get_generation_progress(token: str) -> dict[str, Any] | None:
    t = str(token).strip()
    if not t:
        return None
    with _PROGRESS_LOCK:
        _cleanup_progress_cache_locked()
        item = GEN_PROGRESS.get(t)
        if item is None:
            return None
        return dict(item)


def strip_trailing_zero(num_text: str) -> str:
    if "." not in num_text:
        return num_text
    return num_text.rstrip("0").rstrip(".")


def floor_amount_number(value: Any) -> int | None:
    s = str(value).strip()
    if not s:
        return None
    m = re.search(r"\d+(?:\.\d+)?", s)
    if not m:
        return None
    try:
        num = float(m.group(0))
    except ValueError:
        return None
    return max(0, int(num))


def normalize_amount_text(value: Any) -> str:
    v = floor_amount_number(value)
    if v is None:
        return ""
    return f"{v}万"


def normalize_line(line: str, line_pattern: str) -> str:
    line = line.strip()
    if not line:
        return ""

    try:
        line = re.sub(line_pattern, "", line)
    except re.error:
        pass

    line = re.sub(r"\s+", "", line)
    return line.strip()


def extract_line_serial(source_line: str, line_pattern: str) -> str:
    line = str(source_line).strip()
    if not line:
        return ""

    # Prefer extracting from configured line prefix regex.
    try:
        m = re.match(line_pattern, line)
        if m:
            prefix = m.group(0)
            dm = re.search(r"\d+", prefix)
            if dm:
                return str(int(dm.group(0)))
    except re.error:
        pass

    # Fallback: common numbering formats (e.g. "12、", "12.", "12)").
    dm = re.match(r"^\s*(\d+)\s*(?:[、,，.．\)）]\s*)?", line)
    if dm:
        return str(int(dm.group(1)))
    return ""


def line_signature(normalized_line: str) -> str:
    text = re.sub(r"\s+", "", str(normalized_line).strip())
    if not text:
        return ""
    digest = hashlib.sha1(text.encode("utf-8")).hexdigest()
    return digest[:16]


def extract_branch(line: str, config: dict[str, Any]) -> tuple[str | None, str | None]:
    branches_cfg = config.get("branches", {})
    allowed = branches_cfg.get("allowed", [])
    alias = branches_cfg.get("alias", {})

    candidates: list[str] = []
    if isinstance(alias, dict):
        candidates.extend(str(k) for k in alias.keys())
    if isinstance(allowed, list):
        candidates.extend(str(b) for b in allowed)

    candidates = sorted(set(candidates), key=len, reverse=True)

    raw_branch = None
    for name in candidates:
        if name and name in line:
            raw_branch = name
            break

    if raw_branch is None:
        return None, None

    normalized = str(alias.get(raw_branch, raw_branch)) if isinstance(alias, dict) else raw_branch
    return raw_branch, normalized


def match_status_heuristic(line: str, status: str) -> bool:
    patterns = STATUS_HEURISTIC_PATTERNS.get(str(status).strip(), [])
    for pattern in patterns:
        if pattern.search(line):
            return True
    return False


def extract_status(line: str, config: dict[str, Any]) -> str:
    # Normalize common wording variants before status matching.
    status_line = str(line or "").replace("外行", "他行")
    cfg = config.get("status_extraction", {})
    rules = cfg.get("extraction_rules", {})
    order = cfg.get("priority_order", [])
    fallback = str(cfg.get("fallback", "成功营销"))

    if not isinstance(rules, dict):
        return fallback

    if not isinstance(order, list) or not order:
        order = [str(k) for k in rules.keys()]

    for status in order:
        keywords = rules.get(status, [status])
        if not isinstance(keywords, list):
            continue
        for kw in keywords:
            kw_text = str(kw)
            if kw_text and kw_text in status_line:
                return normalize_status_value(str(status), config)

    valid_status = set(get_valid_status_list(config))
    for status in order:
        s = str(status).strip()
        if not s:
            continue
        if valid_status and s not in valid_status:
            continue
        if match_status_heuristic(status_line, s):
            return normalize_status_value(s, config)

    # Heuristic: plain "提现" defaults to 微信提现 unless it clearly indicates 商户.
    if "提现" in status_line:
        if "商户" in status_line and "商户提现" in valid_status:
            return "商户提现"
        if "微信提现" in valid_status:
            return "微信提现"

    # Heuristic: "揽收他行40万存一年" 这类语句不能回退到“成功营销”。
    if "他行" in status_line:
        if "揽收他行" in valid_status:
            return "揽收他行"
        if "他行挖转" in valid_status:
            return "他行挖转"

    return normalize_status_value(fallback, config)


def normalize_chinese_number_text(text: str) -> str:
    return (
        str(text)
        .strip()
        .replace("點", "点")
        .replace("拾", "十")
        .replace("佰", "百")
        .replace("仟", "千")
    )


def chinese_integer_to_float(text: str) -> float | None:
    s = normalize_chinese_number_text(text)
    if not s:
        return None

    total = 0
    section = 0
    number = 0
    seen = False

    for ch in s:
        if ch in CHINESE_DIGIT_MAP:
            number = CHINESE_DIGIT_MAP[ch]
            seen = True
            continue

        unit = CHINESE_UNIT_MAP.get(ch)
        if unit is None:
            return None

        seen = True
        if unit in {10, 100, 1000}:
            if number == 0:
                number = 1
            section += number * unit
            number = 0
            continue

        section += number
        if section == 0:
            section = 1
        total += section * unit
        section = 0
        number = 0

    if not seen:
        return None
    return float(total + section + number)


def chinese_text_to_float(text: str) -> float | None:
    s = normalize_chinese_number_text(text)
    if not s:
        return None

    if "点" in s:
        int_part, frac_part = s.split("点", 1)
        if not frac_part:
            return None
        int_value = chinese_integer_to_float(int_part) if int_part else 0.0
        if int_value is None:
            return None
        frac_digits: list[str] = []
        for ch in frac_part:
            digit = CHINESE_DIGIT_MAP.get(ch)
            if digit is None:
                return None
            frac_digits.append(str(digit))
        return float(f"{int(int_value)}.{''.join(frac_digits)}")

    return chinese_integer_to_float(s)


def chinese_digit_fraction(text: str) -> float | None:
    s = normalize_chinese_number_text(text)
    if not s:
        return None
    if not all(ch in CHINESE_DIGIT_MAP for ch in s):
        return None
    digits = "".join(str(CHINESE_DIGIT_MAP[ch]) for ch in s)
    return int(digits) / (10 ** len(digits))


def parse_wan_suffix_shorthand(text: str) -> float | None:
    s = normalize_chinese_number_text(text)
    if not s:
        return 0.0

    if any(ch in s for ch in ("十", "百", "千", "万", "亿")):
        return None

    if "点" in s:
        right_val = chinese_text_to_float(s)
        if right_val is None:
            return None
        # e.g. "五点五" after "万" => 0.55 万
        return right_val / 10.0

    return chinese_digit_fraction(s)


def parse_chinese_amount_value(raw_text: str, right_1: str, right_2: str) -> float | None:
    token = normalize_chinese_number_text(raw_text)
    if not token:
        return None

    has_digit = any(ch in CHINESE_DIGIT_MAP for ch in token)
    has_small_unit = any(ch in token for ch in ("十", "百", "千"))
    if not has_digit and not has_small_unit:
        return None

    has_wan = ("万" in token) or (right_1 == "万") or right_2.startswith("万元")
    if has_wan:
        if "万" in token:
            left, right = token.split("万", 1)
            left_text = left.strip()
            right_text = right.strip()

            left_value = chinese_text_to_float(left_text) if left_text else 0.0
            if left_value is None:
                return None

            if not right_text:
                return left_value

            right_norm = normalize_chinese_number_text(right_text)

            # Full unit expression (e.g. 两万五千、两万零五百) -> convert absolute value to "万".
            if any(ch in right_norm for ch in ("十", "百", "千", "万", "亿")):
                absolute = chinese_text_to_float(token)
                if absolute is None:
                    return None
                return absolute / 10000.0

            # Colloquial shorthand (e.g. 两万五 -> 2.5万, 一万二 -> 1.2万).
            shorthand = parse_wan_suffix_shorthand(right_norm)
            if shorthand is not None:
                return left_value + shorthand

            right_value = chinese_text_to_float(right_norm)
            if right_value is None:
                return left_value
            return left_value + (right_value / 10.0)

        # e.g. "五 万" where token may be "五"
        value = chinese_text_to_float(token)
        return value

    return chinese_text_to_float(token)


def extract_amount_candidates(text: str) -> list[dict[str, Any]]:
    candidates: list[dict[str, Any]] = []

    for m in re.finditer(r"\d+(?:\.\d+)?", text):
        num_text = m.group(0)
        start, end = m.span()

        right_2 = text[end : end + 2]
        right_1 = text[end : end + 1]

        # Exclude time/date/tenor hints: 2月23日, 5年交, 6个月 ...
        if right_1 in {"年", "月", "期", "号"}:
            continue
        if right_2.startswith("个月"):
            continue

        has_wan = False
        if right_1 in {"万", "W", "w"} or right_2.startswith("万元"):
            has_wan = True

        try:
            value = float(num_text)
        except ValueError:
            continue

        candidates.append(
            {
                "value": value,
                "num": strip_trailing_zero(num_text),
                "start": start,
                "end": end,
                "has_wan": has_wan,
            }
        )

    for m in CHINESE_NUMBER_RE.finditer(text):
        raw_text = m.group(0)
        start, end = m.span()

        right_2 = text[end : end + 2]
        right_1 = text[end : end + 1]

        if right_1 in {"年", "月", "期", "号"}:
            continue
        if right_2.startswith("个月"):
            continue

        # Avoid treating single Chinese digits in names as amount, e.g. "钱七/周八/吴九".
        # Keep unit-bearing forms (e.g. "十万", "五万元") untouched.
        if (
            len(raw_text) == 1
            and raw_text in CHINESE_DIGIT_MAP
            and right_1 not in {"万", "元"}
            and not right_2.startswith("万元")
        ):
            continue

        value = parse_chinese_amount_value(raw_text, right_1, right_2)
        if value is None:
            continue

        has_wan = ("万" in raw_text) or (right_1 == "万") or right_2.startswith("万元")
        candidates.append(
            {
                "value": value,
                "num": strip_trailing_zero(str(value)),
                "start": start,
                "end": end,
                "has_wan": has_wan,
            }
        )

    return candidates


def amount_num_to_text(num_text: str) -> str:
    v = floor_amount_number(num_text)
    if v is None:
        return ""
    return f"{v}万"


def pick_amount_from_candidates(
    text: str,
    candidates: list[dict[str, Any]],
    anchor_pos: int | None = None,
) -> str | None:
    if not candidates:
        return None

    has_wan_candidates = [c for c in candidates if bool(c.get("has_wan"))]

    if anchor_pos is None:
        # Prefer "合计/共计" amount, otherwise largest amount.
        preferred = has_wan_candidates or candidates
        for c in preferred:
            near = text[max(0, c["start"] - 2) : c["start"] + 2]
            if "合计" in near or "共计" in near:
                return amount_num_to_text(c["num"])

        best = max(preferred, key=lambda x: x["value"])
        return amount_num_to_text(best["num"])

    def score(c: dict[str, Any]) -> tuple[float, int]:
        distance = abs(c["start"] - anchor_pos)

        left = text[max(0, c["start"] - 3) : c["start"]]
        if "合计" in left or "共计" in left:
            distance -= 3

        # Prefer value on the left side if equally close.
        side = 0 if c["start"] <= anchor_pos else 1
        return (distance, side)

    preferred = has_wan_candidates or candidates
    best = min(preferred, key=score)
    return amount_num_to_text(best["num"])


def split_segments(text: str) -> list[str]:
    parts = re.split(r"[，,；;。]", text)
    return [p.strip() for p in parts if p.strip()]


def normalize_insurance_year(value: Any) -> str | None:
    if value is None:
        return None
    s = str(value).strip()
    if s in {"3", "3年", "3年交", "三年", "三年交"}:
        return "3"
    if s in {"5", "5年", "5年交", "五年", "五年交"}:
        return "5"
    return None


def normalize_insurance_year_choices(value: Any) -> dict[str, str]:
    if not isinstance(value, dict):
        return {}
    out: dict[str, str] = {}
    for k, v in value.items():
        key = str(k).strip()
        year = normalize_insurance_year(v)
        if key and year:
            out[key] = year
    return out


def build_insurance_choice_key(
    *,
    source_line: str,
    raw_text: str,
    branch: str,
    amount: str,
    page: str,
    item_index: int,
) -> str:
    seed = f"{source_line}|{raw_text}|{branch}|{amount}|{page}|{item_index}"
    return hashlib.sha1(seed.encode("utf-8")).hexdigest()[:16]


def pick_insurance_year_for_record(
    *,
    choice_key: str,
    source_line: str,
    raw_text: str,
    branch: str,
    amount: str,
    year_map: dict[str, str],
    default_year: str | None,
) -> str | None:
    candidates = [
        choice_key,
        source_line,
        raw_text,
        f"{source_line}|{amount}",
        f"{raw_text}|{amount}",
        f"{branch}|{amount}",
    ]
    for key in candidates:
        if key and key in year_map:
            y = normalize_insurance_year(year_map.get(key))
            if y in {"3", "5"}:
                return y
    return normalize_insurance_year(default_year)


def make_product(type_keyword: str, page: str, amount: str | None, **kwargs: Any) -> dict[str, Any]:
    item = {
        "type": type_keyword,
        "page": page,
        "amount": amount,
    }
    item.update(kwargs)
    return item


def detect_products(
    line: str,
    config: dict[str, Any],
    insurance_year_choice: str | None,
) -> list[dict[str, Any]]:
    insurance_cfg = config.get("insurance_handling", {})
    insurance_page = str(insurance_cfg.get("page", "page_3")) if isinstance(insurance_cfg, dict) else "page_3"

    products: list[dict[str, Any]] = []
    seen: set[str] = set()

    segments = split_segments(line)
    if not segments:
        segments = [line]
    line_candidates = extract_amount_candidates(line)
    seg_search_pos = 0

    def locate_segment_start(seg_text: str) -> int:
        nonlocal seg_search_pos
        if not seg_text:
            return 0
        pos = line.find(seg_text, seg_search_pos)
        if pos < 0:
            pos = line.find(seg_text)
        if pos >= 0:
            seg_search_pos = pos + len(seg_text)
            return pos
        return 0

    def pick_segment_amount(
        seg_text: str,
        seg_candidates: list[dict[str, Any]],
        seg_anchor: int | None,
        seg_start: int,
    ) -> str | None:
        amount = pick_amount_from_candidates(seg_text, seg_candidates, seg_anchor)
        if amount:
            return amount
        if not line_candidates:
            return None
        line_anchor = None
        if isinstance(seg_anchor, int) and seg_anchor >= 0:
            line_anchor = seg_start + seg_anchor
        return pick_amount_from_candidates(line, line_candidates, line_anchor)

    for seg in segments:
        seg_candidates = extract_amount_candidates(seg)
        seg_start = locate_segment_start(seg)

        # 1) Insurance (always mapped to 期交 page)
        if "保险" in seg or "期交" in seg or re.search(r"(?:3|5|三|五)\s*年交", seg):
            year = None
            if re.search(r"(?:3|三)\s*年交", seg):
                year = "3"
            elif re.search(r"(?:5|五)\s*年交", seg):
                year = "5"
            elif insurance_year_choice in {"3", "5"}:
                year = insurance_year_choice

            insurance_anchor = seg.find("保险")
            if insurance_anchor < 0:
                insurance_anchor = 0
            amount = pick_segment_amount(seg, seg_candidates, insurance_anchor, seg_start)

            if year == "3":
                type_keyword = "3年交"
            elif year == "5":
                type_keyword = "5年交"
            else:
                type_keyword = "保险"

            key = f"{type_keyword}|{insurance_page}|{amount}"
            if key not in seen:
                products.append(
                    make_product(
                        type_keyword,
                        insurance_page,
                        amount,
                        is_insurance=True,
                        needs_insurance_year=(year is None),
                    )
                )
                seen.add(key)

        # 2) Wealth/Fund/Asset
        if "理财" in seg:
            amount = pick_segment_amount(seg, seg_candidates, seg.find("理财"), seg_start)
            key = f"理财|page_5|{amount}"
            if key not in seen:
                products.append(make_product("理财", "page_5", amount))
                seen.add(key)

        if "基金" in seg:
            amount = pick_segment_amount(seg, seg_candidates, seg.find("基金"), seg_start)
            key = f"基金|page_7|{amount}"
            if key not in seen:
                products.append(make_product("基金", "page_7", amount))
                seen.add(key)

        if "资管" in seg:
            amount = pick_segment_amount(seg, seg_candidates, seg.find("资管"), seg_start)
            key = f"资管|page_6|{amount}"
            if key not in seen:
                products.append(make_product("资管", "page_6", amount))
                seen.add(key)

        # 3) Fixed-term deposit variants
        for term in TERM_DEFS:
            rgx = term["regex"]
            for m in rgx.finditer(seg):
                # Avoid interpreting "2年理财" as one-year term.
                if term["page"] == "page_2" and "理财" in seg and "定期" not in seg and "存" not in seg:
                    continue

                amount = pick_segment_amount(seg, seg_candidates, m.start(), seg_start)
                key = f"{term['type']}|{term['page']}|{amount}"
                if key not in seen:
                    products.append(make_product(str(term["type"]), str(term["page"]), amount))
                    seen.add(key)

    return products


def is_demand_deposit_only_line(line: str) -> bool:
    # "活期"类关键词但没有明确期限（3/6个月、半年、1年、一年）时，默认不生成。
    demand_markers = [
        "微信提现",
        "微信支付宝提现",
        "支付宝提现",
        "挖转",
        "他行",
        "揽收现金",
        "存量提升",
        "揽收商户",
        "商户提现",
        "揽收彩礼",
    ]
    explicit_product_markers = [
        "保险",
        "年交",
        "理财",
        "基金",
        "资管",
        "趸交",
        "三个月",
        "3个月",
        "六个月",
        "6个月",
        "半年",
        "一年",
        "1年",
    ]

    if not extract_amount_candidates(line):
        return False
    if not any(x in line for x in demand_markers):
        return False
    if any(x in line for x in explicit_product_markers):
        return False
    return True


def key_for_record(record: dict[str, Any], key_fields: list[str]) -> str:
    line_serial = str(record.get("line_serial", "")).strip()
    if line_serial:
        line_product_index = str(record.get("line_product_index", "")).strip() or "1"
        sig = str(record.get("line_signature", "")).strip()
        if not sig:
            sig = line_signature(str(record.get("raw_text", "")))
        parts = [f"line:{line_serial}", f"item:{line_product_index}"]
        if sig:
            parts.append(f"sig:{sig}")
        return "|".join(parts)

    return "|".join(str(record.get(k, "")).strip() for k in key_fields)


def signature_key_for_record(record: dict[str, Any]) -> str:
    line_product_index = str(record.get("line_product_index", "")).strip() or "1"
    sig = str(record.get("line_signature", "")).strip()
    if not sig:
        raw_text = str(record.get("raw_text", "")).strip()
        if raw_text:
            sig = line_signature(raw_text)
        else:
            source_line = str(record.get("source_line", "")).strip()
            if source_line:
                normalized_source = normalize_line(source_line, r"^\s*\d+\s*(?:[、,，.．\)）]\s*)?")
                sig = line_signature(normalized_source)
    if not sig:
        return ""
    return f"sig:{sig}|item:{line_product_index}"


def render_output_filename(config: dict[str, Any], record: dict[str, Any], index: int) -> str:
    settings = config.get("output_settings", {})
    pattern = "喜报_{branch}_{index}.png"
    if isinstance(settings, dict):
        pattern = str(settings.get("output_pattern", pattern))
    try:
        name = pattern.format(branch=record.get("branch", "未知网点"), index=index)
    except Exception:
        name = f"喜报_{record.get('branch', '未知网点')}_{index}.png"

    if not name.lower().endswith(".png"):
        name = f"{name}.png"
    return name


def normalize_branch_value(branch: Any, config: dict[str, Any]) -> str:
    b = str(branch).strip()
    alias = config.get("branches", {}).get("alias", {})
    if isinstance(alias, dict) and b in alias:
        return str(alias.get(b, b)).strip()
    return b


def infer_page_from_type(type_keyword: str, config: dict[str, Any]) -> str:
    t = str(type_keyword).strip()
    if not t:
        return ""

    type_map = config.get("type_matching", {})
    if isinstance(type_map, dict):
        direct = str(type_map.get(t, "")).strip()
        if direct:
            return direct

        pairs = [(str(k), str(v)) for k, v in type_map.items()]
        pairs.sort(key=lambda x: len(x[0]), reverse=True)
        for k, v in pairs:
            if k and k in t and v:
                return v.strip()

    for term in TERM_DEFS:
        if t == str(term.get("type", "")):
            return str(term.get("page", "")).strip()
    return ""


def apply_record_overrides(
    record: dict[str, Any],
    updates: dict[str, Any],
    config: dict[str, Any],
) -> dict[str, Any]:
    out = dict(record)
    if not isinstance(updates, dict):
        return out

    if "branch" in updates:
        branch = normalize_branch_value(updates.get("branch", ""), config)
        if branch:
            out["branch"] = branch

    if "amount" in updates:
        amount = normalize_amount_text(updates.get("amount", ""))
        if amount:
            out["amount"] = amount

    if "type" in updates:
        t = str(updates.get("type", "")).strip()
        if t:
            out["type"] = t
            if "page" not in updates:
                page = infer_page_from_type(t, config)
                if page:
                    out["page"] = page

    if "page" in updates:
        page = str(updates.get("page", "")).strip()
        if page:
            out["page"] = page

    if "status" in updates:
        status = str(updates.get("status", "")).strip()
        if status:
            out["status"] = normalize_status_value(status, config)

    return out


def validate_record_for_generation(record: dict[str, Any], config: dict[str, Any]) -> None:
    branch = str(record.get("branch", "")).strip()
    amount = normalize_amount_text(record.get("amount", ""))
    type_keyword = str(record.get("type", "")).strip()
    page = str(record.get("page", "")).strip()
    status = str(record.get("status", "")).strip()

    if not branch:
        raise ValueError("branch is required")
    allowed = config.get("branches", {}).get("allowed", [])
    if isinstance(allowed, list) and allowed:
        allow_set = {str(x).strip() for x in allowed}
        if branch not in allow_set:
            raise ValueError(f"branch not allowed: {branch}")

    if not amount:
        raise ValueError("amount is required")
    record["amount"] = amount

    if not type_keyword:
        raise ValueError("type is required")
    if not page:
        page = infer_page_from_type(type_keyword, config)
        if page:
            record["page"] = page
    page = str(record.get("page", "")).strip()
    if not page:
        raise ValueError("page is required")

    pages = config.get("pages", {})
    if isinstance(pages, dict) and page not in pages:
        raise ValueError(f"invalid page: {page}")

    if not status:
        status = str(config.get("status_extraction", {}).get("fallback", "成功营销")).strip() or "成功营销"
    valid_status = get_valid_status_list(config)
    if valid_status and status not in valid_status:
        raise ValueError(f"invalid status: {status}")
    record["status"] = normalize_status_value(status, config)


def match_manual_rule(rule: dict[str, Any], source_line: str, normalized_line: str) -> bool:
    if not isinstance(rule, dict) or not bool(rule.get("enabled", True)):
        return False
    keyword = str(rule.get("keyword", "")).strip()
    if not keyword:
        return False
    mode = str(rule.get("match_mode", "normalized")).strip().lower()
    if mode == "source":
        return keyword in source_line
    if mode == "both":
        return keyword in source_line or keyword in normalized_line
    return keyword in normalized_line


def apply_manual_rules_to_record(
    record: dict[str, Any],
    *,
    source_line: str,
    normalized_line: str,
    rules: list[dict[str, Any]],
    config: dict[str, Any],
) -> tuple[dict[str, Any], list[str]]:
    if not rules:
        return record, []

    out = dict(record)
    hit_ids: list[str] = []
    for rule in rules:
        if not match_manual_rule(rule, source_line, normalized_line):
            continue
        updates = rule.get("updates", {})
        if not isinstance(updates, dict):
            continue
        try:
            out = apply_record_overrides(out, updates, config)
        except Exception:
            continue
        hit_ids.append(str(rule.get("id", "")))
    return out, [x for x in hit_ids if x]


def save_or_update_manual_rule(
    *,
    keyword: str,
    updates: dict[str, Any],
    note: str = "",
    match_mode: str = "normalized",
) -> dict[str, Any]:
    kw = re.sub(r"\s+", "", str(keyword).strip())
    if not kw:
        raise ValueError("rule keyword is required")
    if not isinstance(updates, dict) or not updates:
        raise ValueError("rule updates is required")

    normalized_updates: dict[str, Any] = {}
    for k in ("branch", "type", "page", "status", "amount"):
        if k in updates:
            v = str(updates.get(k, "")).strip()
            if v:
                normalized_updates[k] = v
    if not normalized_updates:
        raise ValueError("rule updates is empty")

    mode = str(match_mode or "normalized").strip().lower()
    if mode not in {"source", "normalized", "both"}:
        mode = "normalized"

    rules = load_manual_rules()
    now = datetime.now().isoformat(timespec="seconds")
    for rule in rules:
        if not isinstance(rule, dict):
            continue
        if str(rule.get("keyword", "")).strip() != kw:
            continue
        if str(rule.get("match_mode", "normalized")).strip().lower() != mode:
            continue
        if rule.get("updates") != normalized_updates:
            continue
        rule["updated_at"] = now
        if note:
            rule["note"] = note
        save_manual_rules(rules)
        return dict(rule)

    item = {
        "id": uuid.uuid4().hex[:12],
        "keyword": kw,
        "match_mode": mode,
        "updates": normalized_updates,
        "enabled": True,
        "created_at": now,
        "updated_at": now,
    }
    if note:
        item["note"] = note
    rules.append(item)
    save_manual_rules(rules)
    return dict(item)


def infer_correction_rule_keyword(
    *,
    source_line: str,
    normalized_line: str,
    corrected_record: dict[str, Any],
) -> str:
    status = str(corrected_record.get("status", "")).strip()
    if status and (status in normalized_line or status in source_line):
        return status

    t = str(corrected_record.get("type", "")).strip()
    if t and (t in normalized_line or t in source_line) and len(t) <= 8:
        return t

    if normalized_line:
        return normalized_line[:24]
    return re.sub(r"\s+", "", source_line)[:24]


def parse_records(
    raw_text: str,
    config: dict[str, Any],
    history: list[dict[str, Any]],
    insurance_year_choice: str | None = None,
    insurance_year_choices: dict[str, str] | None = None,
) -> dict[str, Any]:
    relay_cfg = config.get("relay_handling", {})
    parse_rules = relay_cfg.get("parse_rules", {}) if isinstance(relay_cfg, dict) else {}
    trigger = str(relay_cfg.get("trigger_keyword", "#接龙"))
    line_pattern = str(parse_rules.get("line_pattern", r"^\d+、\s*"))
    skip_lines = parse_rules.get("skip_lines", [])
    skip_lines = [str(x) for x in skip_lines] if isinstance(skip_lines, list) else []

    has_trigger = trigger in raw_text
    lines = [line.strip() for line in raw_text.splitlines() if line.strip()]

    filtered_lines: list[dict[str, str]] = []
    skipped: list[dict[str, str]] = []
    for line in lines:
        source_line = line.strip()
        if trigger and trigger in source_line and source_line == trigger:
            continue

        normalized = normalize_line(source_line, line_pattern)
        if not normalized:
            continue

        if is_date_header_line(normalized):
            skipped.append({"line": source_line, "reason": "skip_line_rule"})
            continue

        if any(token and token in normalized for token in skip_lines):
            skipped.append({"line": source_line, "reason": "skip_line_rule"})
            continue

        filtered_lines.append(
            {
                "source_line": source_line,
                "normalized_line": normalized,
                "line_serial": extract_line_serial(source_line, line_pattern),
            }
        )

    if not filtered_lines and raw_text.strip():
        source_line = raw_text.strip()
        normalized_single = normalize_line(source_line, line_pattern)
        if normalized_single:
            filtered_lines = [
                {
                    "source_line": source_line,
                    "normalized_line": normalized_single,
                    "line_serial": extract_line_serial(source_line, line_pattern),
                }
            ]

    branches_cfg = config.get("branches", {})
    allowed_branches = set(str(x) for x in branches_cfg.get("allowed", [])) if isinstance(branches_cfg, dict) else set()

    dedup_cfg = relay_cfg.get("dedup", {}) if isinstance(relay_cfg, dict) else {}
    key_fields = dedup_cfg.get("key_fields", ["branch", "amount", "type"])
    if not isinstance(key_fields, list) or not key_fields:
        key_fields = ["branch", "amount", "type"]
    key_fields = [str(k) for k in key_fields]
    insurance_year_map = dict(insurance_year_choices) if isinstance(insurance_year_choices, dict) else {}

    history_lookup = build_history_lookup(history, key_fields)
    history_keys = set(history_lookup.keys())
    history_signature_lookup = build_history_signature_lookup(history)
    history_signature_keys = set(history_signature_lookup.keys())
    batch_keys: set[str] = set()
    batch_signature_keys: set[str] = set()
    suppressed_lookup = build_skip_suppression_lookup(list_skip_suppressions())
    manual_rules = load_manual_rules()

    records: list[dict[str, Any]] = []
    new_records: list[dict[str, Any]] = []
    pending_insurance_records: list[dict[str, Any]] = []
    duplicate_records: list[dict[str, Any]] = []

    for line_item in filtered_lines:
        source_line = str(line_item.get("source_line", ""))
        normalized_line = str(line_item.get("normalized_line", ""))
        line_serial = str(line_item.get("line_serial", "")).strip()
        normalized_signature = line_signature(normalized_line)

        branch_raw, branch = extract_branch(normalized_line, config)
        if not branch:
            skipped.append({"line": source_line, "reason": "branch_not_found"})
            continue

        if allowed_branches and branch not in allowed_branches:
            skipped.append({"line": source_line, "reason": f"branch_not_allowed:{branch}"})
            continue

        status = normalize_status_value(extract_status(normalized_line, config), config)
        products = detect_products(normalized_line, config, insurance_year_choice)
        if not products:
            if is_demand_deposit_only_line(normalized_line):
                skipped.append({"line": source_line, "reason": "demand_deposit_not_generate"})
            else:
                skipped.append({"line": source_line, "reason": "type_not_found"})
            continue

        for product_index, product in enumerate(products, start=1):
            amount = str(product.get("amount", "")).strip()
            if not amount:
                skipped.append({"line": source_line, "reason": "amount_not_found"})
                continue

            type_keyword = str(product.get("type", "")).strip()
            page = str(product.get("page", "")).strip()
            if not type_keyword or not page:
                skipped.append({"line": source_line, "reason": "type_not_found"})
                continue

            record = {
                "source_line": source_line,
                "raw_text": normalized_line,
                "branch_raw": branch_raw,
                "branch": branch,
                "amount": amount,
                "type": type_keyword,
                "page": page,
                "status": status,
                "needs_insurance_year": bool(product.get("needs_insurance_year", False)),
            }
            if line_serial:
                record["line_serial"] = line_serial
            if normalized_signature:
                record["line_signature"] = normalized_signature
            record["line_product_index"] = str(product_index)
            if record["needs_insurance_year"]:
                choice_key = build_insurance_choice_key(
                    source_line=source_line,
                    raw_text=normalized_line,
                    branch=branch,
                    amount=amount,
                    page=page,
                    item_index=product_index,
                )
                record["insurance_choice_key"] = choice_key
                selected_year = pick_insurance_year_for_record(
                    choice_key=choice_key,
                    source_line=source_line,
                    raw_text=normalized_line,
                    branch=branch,
                    amount=amount,
                    year_map=insurance_year_map,
                    default_year=insurance_year_choice,
                )
                if selected_year in {"3", "5"}:
                    record["type"] = f"{selected_year}年交"
                    record["needs_insurance_year"] = False
                    record["insurance_year"] = selected_year
            record, hit_rule_ids = apply_manual_rules_to_record(
                record,
                source_line=source_line,
                normalized_line=normalized_line,
                rules=manual_rules,
                config=config,
            )
            record["amount"] = normalize_amount_text(record.get("amount", ""))
            record["status"] = normalize_status_value(str(record.get("status", "")), config)
            if hit_rule_ids:
                record["applied_rule_ids"] = hit_rule_ids
            if record["needs_insurance_year"]:
                forced_year = normalize_insurance_year(record.get("type"))
                if forced_year in {"3", "5"}:
                    record["type"] = f"{forced_year}年交"
                    record["needs_insurance_year"] = False
                    record["insurance_year"] = forced_year

            if not str(record.get("amount", "")).strip():
                skipped.append({"line": source_line, "reason": "amount_not_found"})
                continue
            if not str(record.get("page", "")).strip():
                inferred_page = infer_page_from_type(str(record.get("type", "")), config)
                if inferred_page:
                    record["page"] = inferred_page
            pages = config.get("pages", {})
            if isinstance(pages, dict) and str(record.get("page", "")).strip() not in pages:
                skipped.append({"line": source_line, "reason": "type_not_found"})
                continue
            if allowed_branches and str(record.get("branch", "")).strip() not in allowed_branches:
                skipped.append({"line": source_line, "reason": f"branch_not_allowed:{record.get('branch', '')}"})
                continue

            records.append(record)

            dedup_key = key_for_record(record, key_fields)
            record["dedup_key"] = dedup_key
            signature_key = signature_key_for_record(record)
            if signature_key:
                record["signature_key"] = signature_key

            history_record = history_lookup.get(dedup_key)
            if history_record is None and signature_key:
                history_record = history_signature_lookup.get(signature_key)

            if history_record is not None or dedup_key in history_keys or (signature_key and signature_key in history_signature_keys):
                dup = dict(record)
                dup["duplicate_reason"] = "history_duplicate"
                attach_duplicate_download_info(dup, history_record, config)
                duplicate_records.append(dup)
                continue

            if dedup_key in batch_keys or (signature_key and signature_key in batch_signature_keys):
                dup = dict(record)
                dup["duplicate_reason"] = "input_duplicate"
                duplicate_records.append(dup)
                continue

            # Insurance without explicit 3/5 year should ask user only when truly pending.
            # If this line was manually suppressed in skip rules, keep it out of prompt queue.
            if record["needs_insurance_year"]:
                if is_skip_item_suppressed(source_line, "insurance_year_pending", suppressed_lookup):
                    batch_keys.add(dedup_key)
                    if signature_key:
                        batch_signature_keys.add(signature_key)
                    continue

                pending = dict(record)
                pending["insurance_options"] = ["3", "5"]
                pending_insurance_records.append(pending)
                batch_keys.add(dedup_key)
                if signature_key:
                    batch_signature_keys.add(signature_key)
                continue

            batch_keys.add(dedup_key)
            if signature_key:
                batch_signature_keys.add(signature_key)
            new_records.append(record)

    for i, rec in enumerate(new_records, start=1):
        rec["output_file"] = render_output_filename(config, rec, i)

    visible_skipped: list[dict[str, str]] = []
    for item in skipped:
        line_text = str(item.get("line", ""))
        reason_text = str(item.get("reason", ""))
        if is_skip_item_suppressed(line_text, reason_text, suppressed_lookup):
            continue
        visible_skipped.append(item)

    summary = {
        "input_lines": len(lines),
        "parsed": len(records),
        "new": len(new_records),
        "duplicate": len(duplicate_records),
        "skipped": len(visible_skipped),
        "insurance_pending": len(pending_insurance_records),
    }

    return {
        "has_trigger": has_trigger,
        "records": records,
        "new_records": new_records,
        "pending_insurance_records": pending_insurance_records,
        "duplicate_records": duplicate_records,
        "skipped": visible_skipped,
        "summary": summary,
        "dedup_key_fields": key_fields,
        "needs_insurance_choice": len(pending_insurance_records) > 0,
    }


def append_new_history(
    history_path: Path,
    current_history: list[dict[str, Any]],
    records: list[dict[str, Any]],
    key_fields: list[str],
) -> dict[str, Any]:
    existing = {key_for_record(r, key_fields) for r in current_history}
    to_add = []
    now = datetime.now().isoformat(timespec="seconds")

    for rec in records:
        if not isinstance(rec, dict):
            continue
        k = key_for_record(rec, key_fields)
        if not k or k in existing:
            continue

        item = dict(rec)
        item.pop("download_token", None)
        item.pop("download_url", None)
        item["created_at"] = now
        to_add.append(item)
        existing.add(k)

    merged = current_history + to_add
    save_history(history_path, merged)

    return {
        "added": len(to_add),
        "total": len(merged),
    }


def upsert_history_records(
    history_path: Path,
    current_history: list[dict[str, Any]],
    records: list[dict[str, Any]],
    key_fields: list[str],
) -> dict[str, Any]:
    merged = [x for x in current_history if isinstance(x, dict)]
    index_map: dict[str, int] = {}
    for i, item in enumerate(merged):
        k = key_for_record(item, key_fields)
        if k:
            index_map[k] = i

    now = datetime.now().isoformat(timespec="seconds")
    added = 0
    replaced = 0
    for rec in records:
        if not isinstance(rec, dict):
            continue
        item = dict(rec)
        item.pop("download_token", None)
        item.pop("download_url", None)
        item["created_at"] = now
        k = key_for_record(item, key_fields)
        if not k:
            continue
        if k in index_map:
            merged[index_map[k]] = item
            replaced += 1
        else:
            index_map[k] = len(merged)
            merged.append(item)
            added += 1

    save_history(history_path, merged)
    return {
        "added": added,
        "replaced": replaced,
        "total": len(merged),
    }


def page_to_slide_index(page_name: str) -> int:
    m = re.match(r"^page_(\d+)$", page_name)
    if not m:
        raise ValueError(f"invalid page name: {page_name}")
    return int(m.group(1))


def safe_filename(name: str) -> str:
    s = re.sub(r"[\\/:*?\"<>|\r\n]+", "_", name).strip()
    s = re.sub(r"\s+", "_", s)
    return s or f"file_{uuid.uuid4().hex[:8]}"


def amount_to_digits(amount: str) -> str:
    v = floor_amount_number(amount)
    if v is None:
        t = str(amount).strip()
        return t.replace("万元", "").replace("万", "")
    return str(v)


def pick_paragraph_index(paragraph_count: int, configured_index: int) -> int:
    if paragraph_count <= 0:
        return 0
    if 0 <= configured_index < paragraph_count:
        return configured_index
    # Compatible with 1-based config values.
    one_based = configured_index - 1
    if 0 <= one_based < paragraph_count:
        return one_based
    return 0


def paragraph_plain_text(para: Any) -> str:
    if getattr(para, "runs", None):
        return "".join((run.text or "") for run in para.runs)
    return str(getattr(para, "text", "") or "")


def set_paragraph_text(para: Any, text: str) -> None:
    if getattr(para, "runs", None):
        para.runs[0].text = text
        for run in para.runs[1:]:
            run.text = ""
        return
    para.text = text


def paragraph_has_branch_marker(para: Any) -> bool:
    txt = paragraph_plain_text(para).strip()
    return bool(txt) and ("营业所" in txt)


def replace_company_placeholder_text(text: str, company_replacement: str) -> str:
    company_text = str(company_replacement or "").strip()
    if not company_text:
        return str(text or "")
    return re.sub(r"([XxＸｘ*＊]+)\s*(?=分公司)", company_text, str(text or ""), count=1)


def build_branch_text(
    original_text: str,
    branch: str,
    fallback_text: str,
    company_replacement: str = "",
) -> str:
    text = str(original_text or "")
    if not text.strip():
        return fallback_text

    # Some templates use placeholder tokens like "XX/**" before "分公司".
    # Replace them first to avoid rendering "XX分公司".
    text = replace_company_placeholder_text(text, company_replacement)

    # Keep the template prefix (e.g. "永州市道县分公司"), only replace the branch part.
    m = re.search(r"(分公司)([^|，。,；;\s]*)营业所", text)
    if m:
        return f"{text[:m.start(2)]}{branch}营业所{text[m.end():]}"

    m = re.search(r"([^|，。,；;\s]*)营业所", text)
    if m:
        return f"{text[:m.start(1)]}{branch}营业所{text[m.end():]}"

    return fallback_text


def replace_branch_in_slide(slide: Any, branch: str, config: dict[str, Any]) -> None:
    branch_cfg = config.get("replace_algorithm", {}).get("branch", {})
    location_cfg = config.get("branches", {}).get("template_location", {})

    shape_index = int(branch_cfg.get("shape_index", location_cfg.get("shape_index", 3)))
    paragraph_index = int(branch_cfg.get("paragraph_index", location_cfg.get("paragraph_index", 1)))
    fmt = str(branch_cfg.get("format", location_cfg.get("display_format", "{branch}营业所")))
    company_replacement = str(
        branch_cfg.get("company_replacement", location_cfg.get("company_replacement", ""))
    ).strip()
    new_text = fmt.format(branch=branch)

    target_para = None
    fallback_para = None
    target_shape = None
    cfg_paragraphs = []

    if 0 <= shape_index < len(slide.shapes):
        cfg_shape = slide.shapes[shape_index]
        if getattr(cfg_shape, "has_text_frame", False):
            cfg_paragraphs = cfg_shape.text_frame.paragraphs
            if cfg_paragraphs:
                p_index = pick_paragraph_index(len(cfg_paragraphs), paragraph_index)
                fallback_para = cfg_paragraphs[p_index]
                if paragraph_has_branch_marker(cfg_paragraphs[p_index]):
                    target_para = cfg_paragraphs[p_index]
                    target_shape = cfg_shape
                else:
                    for para in cfg_paragraphs:
                        if paragraph_has_branch_marker(para):
                            target_para = para
                            target_shape = cfg_shape
                            break

    if target_para is None:
        for shape in slide.shapes:
            if not getattr(shape, "has_text_frame", False):
                continue
            paragraphs = shape.text_frame.paragraphs
            for para in paragraphs:
                if paragraph_has_branch_marker(para):
                    target_para = para
                    target_shape = shape
                    break
            if target_para is not None:
                break

    if target_para is not None:
        original_text = paragraph_plain_text(target_para)
        set_paragraph_text(
            target_para,
            build_branch_text(original_text, branch, new_text, company_replacement),
        )
        if company_replacement and target_shape is not None and getattr(target_shape, "has_text_frame", False):
            for para in target_shape.text_frame.paragraphs:
                old_text = paragraph_plain_text(para)
                new_company_text = replace_company_placeholder_text(old_text, company_replacement)
                if new_company_text != old_text:
                    set_paragraph_text(para, new_company_text)
        return

    if fallback_para is not None:
        set_paragraph_text(fallback_para, new_text)


def replace_amount_in_slide(slide: Any, shape_index: int, amount: str) -> None:
    if shape_index < 0 or shape_index >= len(slide.shapes):
        return

    shape = slide.shapes[shape_index]
    if not getattr(shape, "has_text_frame", False):
        return

    amount_digits = amount_to_digits(amount)
    candidates: list[tuple[Any, int, str]] = []
    paragraphs = shape.text_frame.paragraphs

    for para in paragraphs:
        runs = list(getattr(para, "runs", []) or [])
        for idx, run in enumerate(runs):
            t = str(getattr(run, "text", "") or "").strip()
            if re.fullmatch(r"\d+(?:\.\d+)?", t) or re.fullmatch(r"[\*＊]+", t):
                candidates.append((para, idx, t))

    if not candidates:
        return

    def next_nonempty_text(para: Any, start_idx: int) -> str:
        runs = list(getattr(para, "runs", []) or [])
        for j in range(start_idx + 1, len(runs)):
            txt = str(getattr(runs[j], "text", "") or "").strip()
            if txt:
                return txt
        return ""

    def prev_nonempty_text(para: Any, start_idx: int) -> str:
        runs = list(getattr(para, "runs", []) or [])
        for j in range(start_idx - 1, -1, -1):
            txt = str(getattr(runs[j], "text", "") or "").strip()
            if txt:
                return txt
        return ""

    preferred: list[tuple[Any, int, str]] = []
    for para, idx, _ in candidates:
        nxt = next_nonempty_text(para, idx)
        prv = prev_nonempty_text(para, idx)
        # Prefer the amount token that is adjacent to "万/万元".
        if ("万" in nxt) or ("万元" in nxt) or ("万" in prv) or ("万元" in prv):
            preferred.append((para, idx, ""))

    targets = preferred if preferred else [candidates[-1]]
    for para, idx, _ in targets:
        runs = list(getattr(para, "runs", []) or [])
        if 0 <= idx < len(runs):
            runs[idx].text = amount_digits


def replace_status_in_slide(
    slide: Any,
    shape_index: int,
    status: str,
    page_name: str,
    config: dict[str, Any],
) -> None:
    if shape_index < 0 or shape_index >= len(slide.shapes):
        return

    shape = slide.shapes[shape_index]
    if not getattr(shape, "has_text_frame", False):
        return

    normalized_status = normalize_status_value(status, config)
    page_cfg = config.get("pages", {}).get(page_name, {})
    has_bracket = bool(page_cfg.get("has_bracket", False)) if isinstance(page_cfg, dict) else False
    target_text = f"（{normalized_status}）" if has_bracket else normalized_status
    valid_status = get_valid_status_list(config)

    paragraphs = shape.text_frame.paragraphs
    if not paragraphs:
        shape.text_frame.text = target_text
        return

    target_para = None
    for para in paragraphs:
        para_text = "".join(run.text for run in para.runs).strip() if para.runs else para.text.strip()
        if not para_text:
            continue
        if any(s in para_text for s in valid_status) or ("成功营销" in para_text) or ("（" in para_text and "）" in para_text):
            target_para = para
            break

    if target_para is None:
        for para in paragraphs:
            para_text = "".join(run.text for run in para.runs).strip() if para.runs else para.text.strip()
            if para_text:
                target_para = para
                break
    if target_para is None:
        target_para = paragraphs[0]

    if target_para.runs:
        replaced = False

        # 优先替换已存在的状态 run，保持模板字体/字号完全不变。
        for run in target_para.runs:
            raw = run.text or ""
            trimmed = raw.strip()
            if not raw:
                continue
            if trimmed in valid_status or trimmed == "成功营销":
                run.text = raw.replace(trimmed, normalized_status, 1)
                replaced = True
                break

        if not replaced and has_bracket:
            for run in target_para.runs:
                raw = run.text or ""
                if "（" in raw and "）" in raw:
                    run.text = re.sub(r"（[^）]*）", f"（{normalized_status}）", raw, count=1)
                    replaced = True
                    break

        if not replaced:
            target_para.runs[0].text = target_text
            for run in target_para.runs[1:]:
                run.text = ""
    else:
        target_para.text = target_text

    if has_bracket:
        target_para.alignment = PP_ALIGN.CENTER
        shape.text_frame.word_wrap = False


def replace_page_display_name_in_slide(slide: Any, page_name: str, config: dict[str, Any]) -> None:
    pages = config.get("pages", {})
    if not isinstance(pages, dict):
        return
    page_cfg = pages.get(page_name, {})
    if not isinstance(page_cfg, dict):
        return
    display_name = str(page_cfg.get("display_name", "")).strip()
    if not display_name:
        return

    aliases = page_cfg.get("display_aliases", [])
    alias_list: list[str] = [display_name]
    if isinstance(aliases, list):
        alias_list.extend([str(x).strip() for x in aliases if str(x).strip()])
    if page_name == "page_5":
        # 兼容旧模板文案。
        alias_list.extend(["两三期理财", "两三年期理财"])
    alias_list = [x for x in dict.fromkeys(alias_list) if x]

    for shape in slide.shapes:
        if not getattr(shape, "has_text_frame", False):
            continue
        paragraphs = shape.text_frame.paragraphs
        for para in paragraphs:
            raw_text = paragraph_plain_text(para)
            compact_text = normalize_skip_line(raw_text)
            if not compact_text:
                continue
            for alias in alias_list:
                alias_compact = normalize_skip_line(alias)
                if not alias_compact or alias_compact not in compact_text:
                    continue

                if compact_text == alias_compact:
                    set_paragraph_text(para, display_name)
                elif alias in raw_text:
                    set_paragraph_text(para, raw_text.replace(alias, display_name, 1))
                elif len(compact_text) <= 24:
                    set_paragraph_text(para, compact_text.replace(alias_compact, display_name, 1))
                else:
                    continue

                # 避免“多一个字后自动折行”。
                shape.text_frame.word_wrap = False
                break


def keep_only_target_slide(prs: Presentation, keep_index: int) -> bool:
    try:
        sld_id_list = prs.slides._sldIdLst  # type: ignore[attr-defined]
        total = len(prs.slides)
        if keep_index < 0 or keep_index >= total:
            return False
        for idx in range(total - 1, -1, -1):
            if idx == keep_index:
                continue
            rel_id = sld_id_list[idx].rId
            prs.part.drop_rel(rel_id)
            del sld_id_list[idx]
        return True
    except Exception:
        return False


def is_single_slide_output_enabled(config: dict[str, Any]) -> bool:
    perf_cfg = config.get("performance", {})
    if isinstance(perf_cfg, dict):
        return bool(perf_cfg.get("single_slide_output", True))
    return True


def resolve_generation_strategy(config: dict[str, Any], total_records: int) -> str:
    perf_cfg = config.get("performance", {})
    if not isinstance(perf_cfg, dict):
        return "legacy"

    strategy = str(perf_cfg.get("generation_strategy", "page_template_cache")).strip().lower()
    if strategy not in {"legacy", "page_template_cache"}:
        strategy = "page_template_cache"
    if strategy == "legacy":
        return "legacy"

    min_records = perf_cfg.get("template_cache_min_records", 2)
    try:
        min_records_int = int(min_records)
    except Exception:
        min_records_int = 2
    if total_records < max(1, min_records_int):
        return "legacy"

    if not is_single_slide_output_enabled(config):
        return "legacy"

    return "page_template_cache"


def _resolve_perf_section(config: dict[str, Any]) -> dict[str, Any]:
    perf_cfg = config.get("performance", {})
    return perf_cfg if isinstance(perf_cfg, dict) else {}


def resolve_single_generation_mode(config: dict[str, Any]) -> bool:
    perf_cfg = _resolve_perf_section(config)
    return bool(perf_cfg.get("single_generation_mode", True))


def resolve_build_workers(config: dict[str, Any], total_records: int) -> int:
    perf_cfg = _resolve_perf_section(config)
    raw_workers = perf_cfg.get("max_build_workers", 1)
    try:
        workers = int(raw_workers)
    except Exception:
        workers = 1
    workers = max(1, min(6, workers))
    return max(1, min(workers, total_records, (os.cpu_count() or 2)))


def resolve_extract_workers(config: dict[str, Any]) -> int:
    perf_cfg = _resolve_perf_section(config)
    raw_workers = perf_cfg.get("max_extract_workers", 1)
    try:
        workers = int(raw_workers)
    except Exception:
        workers = 1
    return max(1, min(6, workers, (os.cpu_count() or 2)))


def resolve_memory_reclaim_options(config: dict[str, Any]) -> dict[str, bool]:
    perf_cfg = _resolve_perf_section(config)
    reclaim_cfg = perf_cfg.get("memory_reclaim", {})
    if not isinstance(reclaim_cfg, dict):
        reclaim_cfg = {}
    enabled = bool(reclaim_cfg.get("enabled", True))
    return {
        "enabled": enabled,
        "gc_collect": enabled and bool(reclaim_cfg.get("gc_collect", True)),
        "malloc_trim": enabled and bool(reclaim_cfg.get("malloc_trim", True)),
    }


def _try_malloc_trim() -> bool:
    global _MALLOC_TRIM_FN
    if os.name != "posix":
        return False
    try:
        if _MALLOC_TRIM_FN is None:
            libc = ctypes.CDLL("libc.so.6")
            fn = libc.malloc_trim
            fn.argtypes = [ctypes.c_size_t]
            fn.restype = ctypes.c_int
            _MALLOC_TRIM_FN = fn
        return bool(_MALLOC_TRIM_FN(0))
    except Exception:
        return False


def reclaim_runtime_memory(config: dict[str, Any]) -> None:
    opts = resolve_memory_reclaim_options(config)
    if not opts["enabled"]:
        return
    if opts["gc_collect"]:
        gc.collect()
    if opts["malloc_trim"]:
        _try_malloc_trim()


def build_page_template_cache(
    template_path: Path,
    page_names: set[str],
) -> dict[str, bytes]:
    cache: dict[str, bytes] = {}
    for page_name in sorted(page_names):
        prs = Presentation(str(template_path))
        slide_index = page_to_slide_index(page_name)
        if slide_index < 0 or slide_index >= len(prs.slides):
            raise RuntimeError(f"slide index out of range for page={page_name}")
        if not keep_only_target_slide(prs, slide_index):
            raise RuntimeError(f"failed to prepare single-slide template for page={page_name}")
        buffer = io.BytesIO()
        prs.save(buffer)
        cache[page_name] = buffer.getvalue()
    return cache


def resolve_image_delivery_options(config: dict[str, Any]) -> dict[str, Any]:
    perf_cfg = config.get("performance", {})
    delivery_cfg = perf_cfg.get("image_delivery", {}) if isinstance(perf_cfg, dict) else {}
    if not isinstance(delivery_cfg, dict):
        delivery_cfg = {}

    enabled = bool(delivery_cfg.get("enabled", True))

    raw_max_kbps = delivery_cfg.get("max_kbps", 300)
    try:
        max_kbps = int(raw_max_kbps)
    except Exception:
        max_kbps = 300
    max_kbps = max(0, max_kbps)

    raw_chunk_kb = delivery_cfg.get("chunk_kb", 16)
    try:
        chunk_kb = int(raw_chunk_kb)
    except Exception:
        chunk_kb = 16
    chunk_kb = max(4, min(256, chunk_kb))

    if not enabled:
        max_kbps = 0

    return {
        "enabled": enabled,
        "max_kbps": max_kbps,
        "chunk_size": chunk_kb * 1024,
    }


def build_ppt_for_record(
    template_path: Path,
    config: dict[str, Any],
    record: dict[str, Any],
    out_pptx: Path,
    page_template_cache: dict[str, bytes] | None = None,
) -> int:
    page_name = str(record.get("page", ""))
    cached_payload = page_template_cache.get(page_name) if isinstance(page_template_cache, dict) else None
    if cached_payload:
        prs = Presentation(io.BytesIO(cached_payload))
        slide_index = 0
    else:
        prs = Presentation(str(template_path))
        slide_index = page_to_slide_index(page_name)
        if slide_index < 0 or slide_index >= len(prs.slides):
            raise RuntimeError(f"slide index out of range for page={record.get('page')}")

    slide = prs.slides[slide_index]
    page_cfg = config.get("pages", {}).get(page_name, {})
    amount_shape = int(page_cfg.get("amount_shape", 2))
    status_shape = int(page_cfg.get("status_shape", 3))
    normalized_status = normalize_status_value(str(record.get("status", "")), config)

    replace_branch_in_slide(slide, str(record.get("branch", "")), config)
    replace_amount_in_slide(slide, amount_shape, str(record.get("amount", "")))
    replace_status_in_slide(slide, status_shape, normalized_status, page_name, config)
    replace_page_display_name_in_slide(slide, page_name, config)

    single_slide_output = is_single_slide_output_enabled(config)
    export_page_number = slide_index + 1
    if not cached_payload and single_slide_output and keep_only_target_slide(prs, slide_index):
        export_page_number = 1

    prs.save(str(out_pptx))
    return export_page_number


def run_subprocess(args: list[str], timeout: int = 300) -> subprocess.CompletedProcess[str]:
    return subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        check=False,
        timeout=timeout,
    )


def use_local_office_converter() -> bool:
    return bool(LOCAL_LIBREOFFICE_BIN and Path(str(LOCAL_LIBREOFFICE_BIN)).exists())


def use_local_pdftoppm() -> bool:
    return bool(LOCAL_PDFTOPPM_BIN and Path(str(LOCAL_PDFTOPPM_BIN)).exists())


def ensure_converter_image() -> None:
    global CONVERTER_IMAGE_READY
    if use_local_office_converter():
        CONVERTER_IMAGE_READY = True
        return
    if CONVERTER_IMAGE_READY:
        return

    with _CONVERTER_LOCK:
        if CONVERTER_IMAGE_READY:
            return

        inspect = run_subprocess(["docker", "image", "inspect", CONVERTER_IMAGE], timeout=60)
        if inspect.returncode != 0:
            pull = run_subprocess(["docker", "pull", CONVERTER_IMAGE], timeout=900)
            if pull.returncode != 0:
                raise RuntimeError(
                    f"failed to pull docker image {CONVERTER_IMAGE}: {pull.stderr.strip() or pull.stdout.strip()}"
                )

        CONVERTER_IMAGE_READY = True


def prewarm_converter_image(background: bool = True) -> None:
    def _task() -> None:
        try:
            ensure_converter_image()
            print(f"Converter image ready: {CONVERTER_IMAGE}")
        except Exception as exc:
            print(f"Converter image prewarm failed: {exc}")

    if background:
        t = threading.Thread(target=_task, name="converter-prewarm", daemon=True)
        t.start()
    else:
        _task()


def convert_pptx_to_pdf_batch(
    job_dir: Path,
    pptx_paths: list[Path],
    progress_cb: Callable[[int, str, str], None] | None = None,
) -> list[Path]:
    if not pptx_paths:
        return []

    pdf_paths = [p.with_suffix(".pdf") for p in pptx_paths]
    total = len(pdf_paths)
    start_ts = time.time()
    last_percent = -1
    last_done = -1

    if use_local_office_converter():
        args = [
            str(LOCAL_LIBREOFFICE_BIN),
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            str(job_dir),
        ]
        args.extend([str(p) for p in pptx_paths])
    else:
        ensure_converter_image()
        uid_gid = f"{os.getuid()}:{os.getgid()}"
        workdir = str(job_dir)
        args = [
            "docker",
            "run",
            "--rm",
            "--user",
            uid_gid,
            "-v",
            f"{workdir}:/work",
            CONVERTER_IMAGE,
            "libreoffice",
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            "/work",
        ]
        args.extend([f"/work/{p.name}" for p in pptx_paths])

    proc = subprocess.Popen(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    while True:
        ret = proc.poll()

        done = 0
        for p in pdf_paths:
            if p.exists():
                done += 1

        # 70~84 之间滚动，避免长时间停在 70% 不动。
        percent_by_done = int((done / max(total, 1)) * 14)
        percent_by_time = min(13, int((time.time() - start_ts) / 2.0))
        percent = min(84, 70 + max(percent_by_done, percent_by_time))
        if progress_cb and (done != last_done or percent != last_percent):
            progress_cb(percent, "PPT转PDF", f"已转 {done}/{total}")
            last_done = done
            last_percent = percent

        if ret is not None:
            break
        time.sleep(0.35)

    stdout, stderr = proc.communicate()
    if proc.returncode != 0:
        raise RuntimeError(f"pptx->pdf batch convert failed: {stderr.strip() or stdout.strip()}")

    if progress_cb:
        progress_cb(84, "PPT转PDF", f"已转 {total}/{total}")

    missing = [str(p.name) for p in pdf_paths if not p.exists()]
    if missing:
        raise RuntimeError(f"pptx->pdf batch convert failed, missing outputs: {', '.join(missing[:5])}")

    return pdf_paths


def extract_pdf_pages_to_png_batch(
    tasks: list[dict[str, Any]],
    config: dict[str, Any] | None = None,
    progress_cb: Callable[[int, str, str], None] | None = None,
) -> None:
    if not tasks:
        return

    first_pdf = Path(str(tasks[0]["pdf_path"]))
    job_dir = first_pdf.parent

    local_pdftoppm = use_local_pdftoppm()
    map_file = job_dir / "_extract_map.nul"
    with map_file.open("wb") as f:
        for t in tasks:
            if local_pdftoppm:
                pdf_field = str(Path(str(t["pdf_path"])).resolve())
                out_field = str(Path(str(t["png_path"])).with_suffix("").resolve())
            else:
                pdf_field = Path(str(t["pdf_path"])).name
                out_field = Path(str(t["png_path"])).with_suffix("").name
            pdf_name = pdf_field.encode("utf-8")
            page_number = str(int(t["page_number"])).encode("utf-8")
            out_prefix = out_field.encode("utf-8")
            f.write(pdf_name + b"\0" + page_number + b"\0" + out_prefix + b"\0")

    parallel_jobs = resolve_extract_workers(config or {})
    if local_pdftoppm:
        script = (
            "set -e; "
            f"xargs -0 -P {parallel_jobs} -n 3 sh -c "
            "'pdf=\"$1\"; page=\"$2\"; out=\"$3\"; "
            f"\"{LOCAL_PDFTOPPM_BIN}\" -f \"$page\" -singlefile -png \"$pdf\" \"$out\"' _ "
            f"< {str(map_file)}"
        )
        args = ["sh", "-lc", script]
    else:
        ensure_converter_image()
        uid_gid = f"{os.getuid()}:{os.getgid()}"
        script = (
            "set -e; "
            f"xargs -0 -P {parallel_jobs} -n 3 sh -c "
            "'pdf=\"$1\"; page=\"$2\"; out=\"$3\"; "
            "pdftoppm -f \"$page\" -singlefile -png \"/work/$pdf\" \"/work/$out\"' _ "
            "< /work/_extract_map.nul"
        )
        args = [
            "docker",
            "run",
            "--rm",
            "--user",
            uid_gid,
            "-v",
            f"{str(job_dir)}:/work",
            CONVERTER_IMAGE,
            "sh",
            "-lc",
            script,
        ]
    proc = subprocess.Popen(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    total = len(tasks)
    last_done = -1
    last_percent = -1
    while True:
        ret = proc.poll()
        done = 0
        for t in tasks:
            if Path(str(t["png_path"])).exists():
                done += 1
        percent = min(94, 85 + int((done / max(total, 1)) * 9))
        if progress_cb and (done != last_done or percent != last_percent):
            progress_cb(percent, "PDF转PNG", f"已导出 {done}/{total}")
            last_done = done
            last_percent = percent
        if ret is not None:
            break
        time.sleep(0.35)

    stdout, stderr = proc.communicate()
    map_file.unlink(missing_ok=True)
    if proc.returncode != 0:
        raise RuntimeError(f"pdf->png batch extract failed: {stderr.strip() or stdout.strip()}")

    missing_png = [str(Path(str(t["png_path"])).name) for t in tasks if not Path(str(t["png_path"])).exists()]
    if missing_png:
        raise RuntimeError(f"pdf->png batch extract failed, missing outputs: {', '.join(missing_png[:5])}")


def cleanup_download_cache() -> None:
    now = time.time()
    to_delete: list[str] = []

    for token, meta in DOWNLOAD_CACHE.items():
        created_at = float(meta.get("created_at", 0))
        if now - created_at > 3600:  # 1 hour
            to_delete.append(token)

    for token in to_delete:
        DOWNLOAD_CACHE.pop(token, None)


def register_download(file_path: Path, content_type: str = "application/octet-stream") -> str:
    token = uuid.uuid4().hex
    with _DOWNLOAD_LOCK:
        cleanup_download_cache()
        DOWNLOAD_CACHE[token] = {
            "file_path": str(file_path),
            "filename": file_path.name,
            "content_type": content_type,
            "created_at": time.time(),
        }
    return token


def generate_records(
    records: list[dict[str, Any]],
    config: dict[str, Any],
    template_path: Path,
    output_dir: Path,
    progress_cb: Callable[[int, str, str], None] | None = None,
) -> dict[str, Any]:
    output_cfg = config.get("output_settings", {})
    auto_cleanup = bool(output_cfg.get("auto_cleanup_pptx", True)) if isinstance(output_cfg, dict) else True

    job_id = f"job_{now_ts()}_{uuid.uuid4().hex[:6]}"
    job_dir = output_dir / job_id
    job_dir.mkdir(parents=True, exist_ok=True)
    register_active_job_dir(job_dir)

    try:
        generated_items: list[dict[str, Any]] = []
        download_images: list[dict[str, Any]] = []
        tasks: list[dict[str, Any]] = []
        page_template_cache: dict[str, bytes] | None = None

        used_names: set[str] = set()
        total_records = len(records)

        generation_strategy = resolve_generation_strategy(config, total_records)
        if generation_strategy == "page_template_cache" and total_records > 0:
            page_names = {str(r.get("page", "")).strip() for r in records if str(r.get("page", "")).strip()}
            if page_names:
                if progress_cb:
                    progress_cb(14, "准备模板", "构建单页模板缓存")
                page_template_cache = build_page_template_cache(template_path, page_names)
                if progress_cb:
                    progress_cb(18, "准备模板", f"已缓存 {len(page_template_cache)} 个页面")

        if progress_cb:
            progress_cb(20, "生成PPT", f"0/{total_records}")

        for i, rec in enumerate(records, start=1):
            base_name = safe_filename(str(rec.get("output_file") or f"喜报_{rec.get('branch', '未知网点')}_{i}.png"))
            if not base_name.lower().endswith(".png"):
                base_name += ".png"

            final_name = base_name
            seq = 1
            while final_name in used_names:
                final_name = f"{Path(base_name).stem}_{seq}.png"
                seq += 1
            used_names.add(final_name)

            ppt_name = f"{Path(final_name).stem}.pptx"
            ppt_path = job_dir / ppt_name
            pdf_path = job_dir / f"{Path(final_name).stem}.pdf"
            png_path = job_dir / final_name

            tasks.append(
                {
                    "record": rec,
                    "ppt_path": ppt_path,
                    "pdf_path": pdf_path,
                    "png_path": png_path,
                    "page_number": 1,
                    "png_name": final_name,
                }
            )

        def _build_one(task: dict[str, Any]) -> None:
            rec = dict(task.get("record", {}))
            ppt_path = Path(str(task.get("ppt_path")))
            try:
                export_page_number = build_ppt_for_record(
                    template_path,
                    config,
                    rec,
                    ppt_path,
                    page_template_cache=page_template_cache,
                )
                task["page_number"] = int(export_page_number)
            except Exception as exc:
                append_review_log(
                    "generation_error",
                    {
                        "reason": "build_ppt_failed",
                        "source_line": str(rec.get("source_line") or rec.get("raw_text") or ""),
                        "record": rec,
                        "error": str(exc),
                    },
                )
                raise

        max_workers = resolve_build_workers(config, total_records)
        if total_records <= 1 or max_workers <= 1:
            done = 0
            for task in tasks:
                _build_one(task)
                done += 1
                if progress_cb:
                    pct = 20 + int((done / max(total_records, 1)) * 45)
                    progress_cb(pct, "生成PPT", f"{done}/{total_records}")
        else:
            done = 0
            with ThreadPoolExecutor(max_workers=max_workers) as ex:
                futs = [ex.submit(_build_one, t) for t in tasks]
                for fut in as_completed(futs):
                    fut.result()
                    done += 1
                    if progress_cb:
                        pct = 20 + int((done / max(total_records, 1)) * 45)
                        progress_cb(pct, "生成PPT", f"{done}/{total_records}")

        if progress_cb:
            progress_cb(70, "PPT转PDF", f"已转 0/{total_records}")
        convert_pptx_to_pdf_batch(
            job_dir,
            [Path(str(t["ppt_path"])) for t in tasks],
            progress_cb=progress_cb,
        )
        if progress_cb:
            progress_cb(85, "PDF转PNG", "已导出 0/{}".format(total_records))
        extract_pdf_pages_to_png_batch(tasks, config=config, progress_cb=progress_cb)

        for i, t in enumerate(tasks, start=1):
            rec = dict(t["record"])
            ppt_path = Path(str(t["ppt_path"]))
            pdf_path = Path(str(t["pdf_path"]))
            png_path = Path(str(t["png_path"]))

            if auto_cleanup:
                ppt_path.unlink(missing_ok=True)
                pdf_path.unlink(missing_ok=True)

            item = dict(rec)
            item["png_file"] = str(t["png_name"])
            item["job_id"] = job_id
            item["image_path"] = str(png_path)
            token = register_download(png_path, content_type="image/png")
            item["download_token"] = token
            item["download_url"] = f"/api/download/{token}"
            generated_items.append(item)
            download_images.append(
                {
                    "name": str(t["png_name"]),
                    "download_token": token,
                    "download_url": f"/api/download/{token}",
                }
            )
            if progress_cb:
                pct = 95 + int((i / max(total_records, 1)) * 5)
                progress_cb(pct, "整理下载", f"{i}/{total_records}")

        return {
            "job_id": job_id,
            "job_dir": str(job_dir),
            "generation_strategy": generation_strategy,
            "generated": generated_items,
            "generated_count": len(generated_items),
            "download_images": download_images,
        }
    finally:
        unregister_active_job_dir(job_dir)
        reclaim_runtime_memory(config)


class XibaoHandler(SimpleHTTPRequestHandler):
    def __init__(self, *args: Any, **kwargs: Any):
        super().__init__(*args, directory=str(STATIC_DIR), **kwargs)

    def _send_json(self, payload: dict[str, Any], status: HTTPStatus = HTTPStatus.OK) -> None:
        data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.send_header("Content-Length", str(len(data)))
        self.end_headers()
        self.wfile.write(data)

    def _send_bytes(
        self,
        content: bytes,
        content_type: str,
        filename: str | None = None,
        status: HTTPStatus = HTTPStatus.OK,
    ) -> None:
        self.send_response(status)
        self.send_header("Content-Type", content_type)
        self.send_header("Content-Length", str(len(content)))
        if filename:
            encoded = quote(filename)
            self.send_header("Content-Disposition", f"attachment; filename*=UTF-8''{encoded}")
        self.end_headers()
        self.wfile.write(content)

    def _send_file(
        self,
        file_path: Path,
        content_type: str,
        filename: str | None = None,
        status: HTTPStatus = HTTPStatus.OK,
        max_kbps: int = 0,
        chunk_size: int = 16 * 1024,
    ) -> None:
        total_size = int(file_path.stat().st_size)
        self.send_response(status)
        self.send_header("Content-Type", content_type)
        self.send_header("Content-Length", str(total_size))
        if filename:
            encoded = quote(filename)
            self.send_header("Content-Disposition", f"attachment; filename*=UTF-8''{encoded}")
        self.end_headers()

        max_bytes_per_sec = max(0, int(max_kbps)) * 1024
        block_size = max(4096, int(chunk_size))
        sent = 0
        start_ts = time.perf_counter()

        with file_path.open("rb") as f:
            while True:
                chunk = f.read(block_size)
                if not chunk:
                    break
                self.wfile.write(chunk)
                sent += len(chunk)

                if max_bytes_per_sec > 0:
                    elapsed = time.perf_counter() - start_ts
                    expected_elapsed = sent / max_bytes_per_sec
                    if expected_elapsed > elapsed:
                        time.sleep(min(expected_elapsed - elapsed, 0.2))

    def _read_json_body(self) -> dict[str, Any]:
        try:
            content_len = int(self.headers.get("Content-Length", "0"))
        except ValueError:
            content_len = 0

        body = self.rfile.read(content_len) if content_len > 0 else b"{}"
        if not body:
            return {}

        try:
            return json.loads(body.decode("utf-8"))
        except json.JSONDecodeError:
            raise ValueError("invalid JSON body")

    def do_GET(self) -> None:  # noqa: N802
        parsed = urlparse(self.path)

        if handle_get_routes(self, parsed, globals()):
            return

        if parsed.path == "/":
            self.path = "/index.html"

        super().do_GET()

    def do_POST(self) -> None:  # noqa: N802
        parsed = urlparse(self.path)
        if handle_post_routes(self, parsed, globals()):
            return

        self._send_json({"ok": False, "error": "not found"}, HTTPStatus.NOT_FOUND)


def run_server(
    host: str = "0.0.0.0",
    port: int = 8787,
    prewarm: bool = True,
    prewarm_blocking: bool = False,
) -> None:
    if prewarm:
        prewarm_converter_image(background=not prewarm_blocking)

    cleanup_stop = threading.Event()
    cleanup_thread = threading.Thread(
        target=daily_cleanup_loop,
        args=(cleanup_stop,),
        name="daily-cleanup",
        daemon=True,
    )
    cleanup_thread.start()

    server = ThreadingHTTPServer((host, port), XibaoHandler)
    print(f"Xibao Web running on http://{host}:{port}")
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        pass
    finally:
        cleanup_stop.set()
        cleanup_thread.join(timeout=1.0)
        server.server_close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Xibao Web server")
    parser.add_argument("--host", default="0.0.0.0", help="bind host, e.g. 0.0.0.0 or 127.0.0.1")
    parser.add_argument("--port", type=int, default=8787, help="bind port")
    parser.add_argument(
        "--skip-prewarm",
        action="store_true",
        help="disable docker converter image prewarm on startup",
    )
    parser.add_argument(
        "--prewarm-blocking",
        action="store_true",
        help="prewarm converter image before serving (blocks startup until ready)",
    )
    args = parser.parse_args()
    run_server(
        host=args.host,
        port=args.port,
        prewarm=not args.skip_prewarm,
        prewarm_blocking=args.prewarm_blocking,
    )