Files
codex-register/src/services/outlook/email_parser.py
237899745 0f9948ffc3
Some checks are pending
Docker Image CI / build-and-push-image (push) Waiting to run
feat: codex-register with Sub2API增强 + Playwright引擎
2026-03-22 00:24:16 +08:00

229 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
邮件解析和验证码提取
"""
import logging
import re
from typing import Optional, List, Dict, Any
from ...config.constants import (
OTP_CODE_SIMPLE_PATTERN,
OTP_CODE_SEMANTIC_PATTERN,
OPENAI_EMAIL_SENDERS,
OPENAI_VERIFICATION_KEYWORDS,
)
from .base import EmailMessage
logger = logging.getLogger(__name__)
class EmailParser:
"""
邮件解析器
用于识别 OpenAI 验证邮件并提取验证码
"""
def __init__(self):
# 编译正则表达式
self._simple_pattern = re.compile(OTP_CODE_SIMPLE_PATTERN)
self._semantic_pattern = re.compile(OTP_CODE_SEMANTIC_PATTERN, re.IGNORECASE)
def is_openai_verification_email(
self,
email: EmailMessage,
target_email: Optional[str] = None,
) -> bool:
"""
判断是否为 OpenAI 验证邮件
Args:
email: 邮件对象
target_email: 目标邮箱地址(用于验证收件人)
Returns:
是否为 OpenAI 验证邮件
"""
sender = email.sender.lower()
# 1. 发件人必须是 OpenAI
if not any(s in sender for s in OPENAI_EMAIL_SENDERS):
logger.debug(f"邮件发件人非 OpenAI: {sender}")
return False
# 2. 主题或正文包含验证关键词
subject = email.subject.lower()
body = email.body.lower()
combined = f"{subject} {body}"
if not any(kw in combined for kw in OPENAI_VERIFICATION_KEYWORDS):
logger.debug(f"邮件未包含验证关键词: {subject[:50]}")
return False
# 3. 收件人检查已移除:别名邮件的 IMAP 头中收件人可能不匹配,只靠发件人+关键词判断
logger.debug(f"识别为 OpenAI 验证邮件: {subject[:50]}")
return True
def extract_verification_code(
self,
email: EmailMessage,
) -> Optional[str]:
"""
从邮件中提取验证码
优先级:
1. 从主题提取6位数字
2. 从正文用语义正则提取(如 "code is 123456"
3. 兜底:任意 6 位数字
Args:
email: 邮件对象
Returns:
验证码字符串,如果未找到返回 None
"""
# 1. 主题优先
code = self._extract_from_subject(email.subject)
if code:
logger.debug(f"从主题提取验证码: {code}")
return code
# 2. 正文语义匹配
code = self._extract_semantic(email.body)
if code:
logger.debug(f"从正文语义提取验证码: {code}")
return code
# 3. 兜底:正文任意 6 位数字
code = self._extract_simple(email.body)
if code:
logger.debug(f"从正文兜底提取验证码: {code}")
return code
return None
def _extract_from_subject(self, subject: str) -> Optional[str]:
"""从主题提取验证码"""
match = self._simple_pattern.search(subject)
if match:
return match.group(1)
return None
def _extract_semantic(self, body: str) -> Optional[str]:
"""语义匹配提取验证码"""
match = self._semantic_pattern.search(body)
if match:
return match.group(1)
return None
def _extract_simple(self, body: str) -> Optional[str]:
"""简单匹配提取验证码"""
match = self._simple_pattern.search(body)
if match:
return match.group(1)
return None
def find_verification_code_in_emails(
self,
emails: List[EmailMessage],
target_email: Optional[str] = None,
min_timestamp: int = 0,
used_codes: Optional[set] = None,
) -> Optional[str]:
"""
从邮件列表中查找验证码
Args:
emails: 邮件列表
target_email: 目标邮箱地址
min_timestamp: 最小时间戳(用于过滤旧邮件)
used_codes: 已使用的验证码集合(用于去重)
Returns:
验证码字符串,如果未找到返回 None
"""
used_codes = used_codes or set()
for email in emails:
# 时间戳过滤
if min_timestamp > 0 and email.received_timestamp > 0:
if email.received_timestamp < min_timestamp:
logger.debug(f"跳过旧邮件: {email.subject[:50]}")
continue
# 检查是否是 OpenAI 验证邮件
if not self.is_openai_verification_email(email, target_email):
continue
# 提取验证码
code = self.extract_verification_code(email)
if code:
# 去重检查
if code in used_codes:
logger.debug(f"跳过已使用的验证码: {code}")
continue
logger.info(
f"[{target_email or 'unknown'}] 找到验证码: {code}, "
f"邮件主题: {email.subject[:30]}"
)
return code
return None
def filter_emails_by_sender(
self,
emails: List[EmailMessage],
sender_patterns: List[str],
) -> List[EmailMessage]:
"""
按发件人过滤邮件
Args:
emails: 邮件列表
sender_patterns: 发件人匹配模式列表
Returns:
过滤后的邮件列表
"""
filtered = []
for email in emails:
sender = email.sender.lower()
if any(pattern.lower() in sender for pattern in sender_patterns):
filtered.append(email)
return filtered
def filter_emails_by_subject(
self,
emails: List[EmailMessage],
keywords: List[str],
) -> List[EmailMessage]:
"""
按主题关键词过滤邮件
Args:
emails: 邮件列表
keywords: 关键词列表
Returns:
过滤后的邮件列表
"""
filtered = []
for email in emails:
subject = email.subject.lower()
if any(kw.lower() in subject for kw in keywords):
filtered.append(email)
return filtered
# 全局解析器实例
_parser: Optional[EmailParser] = None
def get_email_parser() -> EmailParser:
"""获取全局邮件解析器实例"""
global _parser
if _parser is None:
_parser = EmailParser()
return _parser