""" 邮件解析和验证码提取 """ import logging import re from typing import Optional, List, Dict, Any from ...config.constants import ( OTP_CODE_SIMPLE_PATTERN, OTP_CODE_SEMANTIC_PATTERN, OPENAI_EMAIL_SENDERS, OPENAI_VERIFICATION_KEYWORDS, ) from .base import EmailMessage logger = logging.getLogger(__name__) class EmailParser: """ 邮件解析器 用于识别 OpenAI 验证邮件并提取验证码 """ def __init__(self): # 编译正则表达式 self._simple_pattern = re.compile(OTP_CODE_SIMPLE_PATTERN) self._semantic_pattern = re.compile(OTP_CODE_SEMANTIC_PATTERN, re.IGNORECASE) def is_openai_verification_email( self, email: EmailMessage, target_email: Optional[str] = None, ) -> bool: """ 判断是否为 OpenAI 验证邮件 Args: email: 邮件对象 target_email: 目标邮箱地址(用于验证收件人) Returns: 是否为 OpenAI 验证邮件 """ sender = email.sender.lower() # 1. 发件人必须是 OpenAI if not any(s in sender for s in OPENAI_EMAIL_SENDERS): logger.debug(f"邮件发件人非 OpenAI: {sender}") return False # 2. 主题或正文包含验证关键词 subject = email.subject.lower() body = email.body.lower() combined = f"{subject} {body}" if not any(kw in combined for kw in OPENAI_VERIFICATION_KEYWORDS): logger.debug(f"邮件未包含验证关键词: {subject[:50]}") return False # 3. 收件人检查已移除:别名邮件的 IMAP 头中收件人可能不匹配,只靠发件人+关键词判断 logger.debug(f"识别为 OpenAI 验证邮件: {subject[:50]}") return True def extract_verification_code( self, email: EmailMessage, ) -> Optional[str]: """ 从邮件中提取验证码 优先级: 1. 从主题提取(6位数字) 2. 从正文用语义正则提取(如 "code is 123456") 3. 兜底:任意 6 位数字 Args: email: 邮件对象 Returns: 验证码字符串,如果未找到返回 None """ # 1. 主题优先 code = self._extract_from_subject(email.subject) if code: logger.debug(f"从主题提取验证码: {code}") return code # 2. 正文语义匹配 code = self._extract_semantic(email.body) if code: logger.debug(f"从正文语义提取验证码: {code}") return code # 3. 兜底:正文任意 6 位数字 code = self._extract_simple(email.body) if code: logger.debug(f"从正文兜底提取验证码: {code}") return code return None def _extract_from_subject(self, subject: str) -> Optional[str]: """从主题提取验证码""" match = self._simple_pattern.search(subject) if match: return match.group(1) return None def _extract_semantic(self, body: str) -> Optional[str]: """语义匹配提取验证码""" match = self._semantic_pattern.search(body) if match: return match.group(1) return None def _extract_simple(self, body: str) -> Optional[str]: """简单匹配提取验证码""" match = self._simple_pattern.search(body) if match: return match.group(1) return None def find_verification_code_in_emails( self, emails: List[EmailMessage], target_email: Optional[str] = None, min_timestamp: int = 0, used_codes: Optional[set] = None, ) -> Optional[str]: """ 从邮件列表中查找验证码 Args: emails: 邮件列表 target_email: 目标邮箱地址 min_timestamp: 最小时间戳(用于过滤旧邮件) used_codes: 已使用的验证码集合(用于去重) Returns: 验证码字符串,如果未找到返回 None """ used_codes = used_codes or set() for email in emails: # 时间戳过滤 if min_timestamp > 0 and email.received_timestamp > 0: if email.received_timestamp < min_timestamp: logger.debug(f"跳过旧邮件: {email.subject[:50]}") continue # 检查是否是 OpenAI 验证邮件 if not self.is_openai_verification_email(email, target_email): continue # 提取验证码 code = self.extract_verification_code(email) if code: # 去重检查 if code in used_codes: logger.debug(f"跳过已使用的验证码: {code}") continue logger.info( f"[{target_email or 'unknown'}] 找到验证码: {code}, " f"邮件主题: {email.subject[:30]}" ) return code return None def filter_emails_by_sender( self, emails: List[EmailMessage], sender_patterns: List[str], ) -> List[EmailMessage]: """ 按发件人过滤邮件 Args: emails: 邮件列表 sender_patterns: 发件人匹配模式列表 Returns: 过滤后的邮件列表 """ filtered = [] for email in emails: sender = email.sender.lower() if any(pattern.lower() in sender for pattern in sender_patterns): filtered.append(email) return filtered def filter_emails_by_subject( self, emails: List[EmailMessage], keywords: List[str], ) -> List[EmailMessage]: """ 按主题关键词过滤邮件 Args: emails: 邮件列表 keywords: 关键词列表 Returns: 过滤后的邮件列表 """ filtered = [] for email in emails: subject = email.subject.lower() if any(kw.lower() in subject for kw in keywords): filtered.append(email) return filtered # 全局解析器实例 _parser: Optional[EmailParser] = None def get_email_parser() -> EmailParser: """获取全局邮件解析器实例""" global _parser if _parser is None: _parser = EmailParser() return _parser