import os
import shutil
import argparse
import email
from email import policy
from email.parser import BytesParser
from datetime import datetime  

def load_keywords(keyword_path):
    """讀取關鍵字設定檔，回傳關鍵字列表"""
    if not os.path.exists(keyword_path):
        print(f"錯誤: 找不到關鍵字檔案: {keyword_path}")
        return []
    
    with open(keyword_path, 'r', encoding='utf-8') as f:
        return [line.strip().lower() for line in f if line.strip()]

def get_email_body(msg):
    """解析 email 物件，遞迴提取純文字或 HTML 內文"""
    body_content = ""

    if msg.is_multipart():
        for part in msg.walk():
            content_type = part.get_content_type()
            content_disposition = str(part.get("Content-Disposition"))

            if "attachment" in content_disposition:
                continue

            if content_type in ["text/plain", "text/html"]:
                try:
                    part_payload = part.get_payload(decode=True)
                    if part_payload:
                        charset = part.get_content_charset() or 'utf-8'
                        body_content += part_payload.decode(charset, errors='replace')
                except Exception as e:
                    print(f"解析部分內文時發生錯誤: {e}")
    else:
        try:
            payload = msg.get_payload(decode=True)
            if payload:
                charset = msg.get_content_charset() or 'utf-8'
                body_content = payload.decode(charset, errors='replace')
        except Exception as e:
            print(f"解析內文時發生錯誤: {e}")

    return body_content

def write_log(target_dir, eml_path, keyword):
    """
    寫入紀錄檔
    檔名格式: YYYYMMDD.log (存放在 target_dir)
    內容格式: [時間] 檔案路徑 | 觸發關鍵字
    """
    # 取得當前日期做為檔名
    date_str = datetime.now().strftime("%Y%m%d")
    log_filename = f"{date_str}.log"
    log_path = os.path.join(target_dir, log_filename)
    
    # 取得當前時間做為紀錄內容
    time_str = datetime.now().strftime("%H:%M:%S")
    
    # 轉為絕對路徑，讓紀錄更清楚
    abs_eml_path = os.path.abspath(eml_path)

    try:
        # 使用 'a' (append) 模式，避免覆蓋舊紀錄
        with open(log_path, 'a', encoding='utf-8') as f:
            f.write(f"[{time_str}] 檔案: {abs_eml_path} | 關鍵字: {keyword}\n")
    except Exception as e:
        print(f"寫入紀錄檔失敗: {e}")

def process_eml(eml_path, keywords, target_dir, log_dir):
    """主處理邏輯"""
    if not os.path.exists(eml_path):
        print(f"錯誤: 找不到 EML 檔案: {eml_path}")
        return

    # 1. 解析 EML 檔案
    try:
        with open(eml_path, 'rb') as f:
            msg = BytesParser(policy=policy.default).parse(f)
    except Exception as e:
        print(f"讀取 EML 失敗: {e}")
        return

    # 2. 取得內文並轉小寫
    body_text = get_email_body(msg).lower()
    subject = msg.get('subject', '').lower()
    
    # 合併標題與內文一起檢查
    search_content = f"{subject}\n{body_text}"

    # 3. 檢查關鍵字
    found_keyword = None
    for kw in keywords:
        if kw in search_content:
            found_keyword = kw
            break
    
    if found_keyword:
        print(f"[發現關鍵字 '{found_keyword}'] 正在移動檔案...")
        
        # 確保目標目錄存在
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
            print(f"已建立目錄: {target_dir}")

        # 4. 移動檔案
        filename = os.path.basename(eml_path)
        dest_path = os.path.join(target_dir, filename)
        
        # 處理檔名重複
        if os.path.exists(dest_path):
            base, ext = os.path.splitext(filename)
            import time
            dest_path = os.path.join(target_dir, f"{base}_{int(time.time())}{ext}")

        try:
            shutil.move(eml_path, dest_path)
            print(f"成功將 {filename} 移動至 {target_dir}")
            
            # 5. 成功移動後，寫入紀錄檔
            if log_dir != None:
                write_log(log_dir, eml_path, found_keyword)
            
        except Exception as e:
            print(f"移動檔案失敗: {e}")
    else:
        print(f"未發現任何關鍵字，檔案保留原處。")

def main():
    parser = argparse.ArgumentParser(description="EML 關鍵字過濾工具 (含 Log 紀錄)")
    parser.add_argument("eml_path", help="[必要]要檢查的 .eml 檔案路徑")
    parser.add_argument("keywords_file", help="[必要]關鍵字設定檔路徑 (txt)")
    parser.add_argument("target_dir", help="[必要]符合關鍵字時要存放的目標目錄, 例: D:\email_filter\mails")
    parser.add_argument("log_dir", nargs='?', help="[可選]紀錄檔目錄, 例: D:\email_filter\logs, 檔名格式為 YYYYMMDD.log, 不給參數即不產生紀錄檔")
    
    args = parser.parse_args()

    keywords = load_keywords(args.keywords_file)
    if not keywords:
        print("沒有有效的關鍵字，程式結束。")
        return

    process_eml(args.eml_path, keywords, args.target_dir, args.log_dir)

if __name__ == "__main__":
    main()