import requests
from pathlib import Path
import re
import csv
import sys

# ---------------------------
# Configuration
# ---------------------------
ACCESS_TOKEN = "YOUR_ACCESS_TOKEN_HERE"
CRM_API_BASE = "https://www.zohoapis.com/crm/v2"
CV_FOLDER = Path(r"D:\zohocv\batch")
LOG_FILE = Path("upload_log.csv")
# ---------------------------

# Known email providers and common TLD tokens
KNOWN_PROVIDERS = {
    "gmail", "yahoo", "hotmail", "outlook", "icloud", "live", "yandex", "protonmail",
    "msn", "aol"
}
KNOWN_TLDS = {
    "com", "net", "org", "co", "uk", "eg", "io", "dev", "edu", "gov", "info"
}

def extract_email_from_stem(stem: str):
    """Extract an email from the filename stem."""
    if not stem:
        return None

    parts = stem.split("_")

    while parts and parts[-1].isdigit():
        parts.pop()

    if not parts:
        return None

    for i, token in enumerate(parts):
        token_l = token.lower()
        if token_l in KNOWN_PROVIDERS:
            tld = None
            if i + 1 < len(parts) and parts[i+1].lower() in KNOWN_TLDS:
                tld = parts[i+1].lower()
            name_parts = parts[:i]
            if not name_parts:
                return None
            user = ".".join(name_parts)
            if tld:
                return f"{user}@{token_l}.{tld}"
            else:
                return f"{user}@{token_l}"

    if len(parts) >= 3 and parts[-2].lower() in KNOWN_PROVIDERS and parts[-1].lower() in KNOWN_TLDS:
        provider = parts[-2].lower()
        tld = parts[-1].lower()
        user = ".".join(parts[:-2])
        if user:
            return f"{user}@{provider}.{tld}"

    for i in range(len(parts)-2):
        if parts[i].lower() in KNOWN_PROVIDERS and parts[i+1].lower() == "co" and parts[i+2].lower() in {"uk", "eg"}:
            provider = parts[i].lower()
            tld = f"co.{parts[i+2].lower()}"
            user = ".".join(parts[:i])
            if user:
                return f"{user}@{provider}.{tld}"

    simple = stem.replace("_", "@", 1).replace("_", ".")
    if "@" in simple and "." in simple.split("@", 1)[1]:
        return simple

    return None

def search_contact_by_email(access_token, email):
    """Search for a contact in Zoho CRM by email."""
    if not email:
        return None
    headers = {"Authorization": f"Zoho-oauthtoken {access_token}"}
    url = f"{CRM_API_BASE}/Contacts/search?email={requests.utils.quote(email)}"
    try:
        r = requests.get(url, headers=headers, timeout=30)
    except Exception as e:
        print(f"[ERROR] search request failed for {email}: {e}")
        return None

    if r.status_code == 200:
        data = r.json().get("data")
        if data:
            return data[0].get("id")
    elif r.status_code == 204:
        return None
    else:
        print(f"[WARN] search API returned {r.status_code} for {email}: {r.text}")
    return None

def create_contact(access_token, email):
    """Create a new contact in Zoho CRM with the given email."""
    headers = {
        "Authorization": f"Zoho-oauthtoken {access_token}",
        "Content-Type": "application/json"
    }
    url = f"{CRM_API_BASE}/Contacts"
    payload = {
        "data": [
            {
                "Last_Name": email.split("@")[0],  # Last_Name is mandatory in Zoho CRM
                "Email": email
            }
        ]
    }
    r = requests.post(url, headers=headers, json=payload, timeout=30)
    return r

def load_already_ok_entries(log_path: Path):
    """Load already processed OK emails from the log file."""
    ok_emails = set()
    if not log_path.exists():
        return ok_emails
    try:
        with open(log_path, newline="", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    status = row.get("Status", "").strip().upper()
                    email = row.get("Email", "").strip()
                    if status == "CREATED" and email:
                        ok_emails.add(email)
                except Exception:
                    continue
    except Exception as e:
        print(f"[WARN] Could not read existing log file: {e}")
    return ok_emails

def append_log_row(writer, log_file_obj, row):
    """Write a row to CSV and flush immediately."""
    writer.writerow(row)
    log_file_obj.flush()

def main():
    access_token = ACCESS_TOKEN

    files = sorted([f for f in CV_FOLDER.iterdir() if f.is_file() and f.suffix.lower() in ['.pdf', '.doc', '.docx']])
    if not files:
        print("[!] No CV files found in folder.")
        return

    already_ok = load_already_ok_entries(LOG_FILE)
    print(f"[INFO] Files found: {len(files)}. Already created contacts: {len(already_ok)}")

    file_exists = LOG_FILE.exists()
    with open(LOG_FILE, "a", newline="", encoding="utf-8") as log_file:
        fieldnames = ["Email", "FileName", "Status", "Message"]
        writer = csv.DictWriter(log_file, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
            log_file.flush()

        for f in files:
            stem = f.stem
            email = extract_email_from_stem(stem)

            if not email:
                msg = "PARSE_ERROR: could not extract email from filename"
                print(f"[ERROR] {f.name} -> {msg}")
                append_log_row(writer, log_file, {"Email": "", "FileName": f.name, "Status": "PARSE_ERROR", "Message": msg})
                continue

            if email in already_ok:
                print(f"[SKIP] {email} already created previously.")
                append_log_row(writer, log_file, {"Email": email, "FileName": f.name, "Status": "SKIP", "Message": "Already created earlier"})
                continue

            record_id = search_contact_by_email(access_token, email)

            if not record_id:
                resp = create_contact(access_token, email)
                if resp.status_code == 201:
                    print(f"[OK] Created contact for {email}")
                    append_log_row(writer, log_file, {"Email": email, "FileName": f.name, "Status": "CREATED", "Message": "Contact created"})
                else:
                    print(f"[ERROR] Failed to create contact {email}: {resp.text}")
                    append_log_row(writer, log_file, {"Email": email, "FileName": f.name, "Status": "ERROR", "Message": resp.text})
            else:
                print(f"[SKIP] Contact already exists: {email}")
                append_log_row(writer, log_file, {"Email": email, "FileName": f.name, "Status": "SKIP", "Message": "Contact already exists"})

    print("[DONE] Processing completed. Check upload_log.csv for details.")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n[ABORT] Interrupted by user.")
        sys.exit(1)