#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Migration script — نقل الجداول من SOURCES إلى TARGET_DB (محسّن)
ملاحظات:
- يفرغ جداول التارجت مرة واحدة قبل النقل (فقط الجداول المشتركة مع أي source).
- يتجاهل أعمدة AUTO_INCREMENT في التارجت.
- يقصّ تلقائياً نصوص VARCHAR الطويلة لتتناسب مع طول العمود في التارجت بدلاً من الفشل.
- يحاول تحويل القيم الرقمية إلى النوع المناسب، ويسجل الصفوف الفاشلة مع سبب الفشل.
- يستخدم دفعات ويفك الدفعات إلى إدخال صف صف عند حدوث خطأ.
- احتياطي: اعمل backup للـ target قبل التشغيل.
"""
import re
import pymysql
import traceback
import csv
from datetime import datetime

# ================= CONFIG =================
DB_HOST = "localhost"
DB_USER = "rizk"
DB_PASS = "Mostafarizk27@"

TARGET_DB = "kalamcx"

SOURCES = [
    {"db": "futuredb", "tenant_id": 1},
    {"db": "LLKllk", "tenant_id": 3}
   
]

BATCH_SIZE = 1000
# ==========================================

def log(msg):
    print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}")

def connect(db, cursorclass=None):
    """Connect and return connection object. cursorclass can be provided."""
    try:
        kwargs = dict(
            host=DB_HOST,
            user=DB_USER,
            password=DB_PASS,
            database=db,
            autocommit=False,
            charset='utf8mb4',
            use_unicode=True,
        )
        if cursorclass:
            kwargs['cursorclass'] = cursorclass
        return pymysql.connect(**kwargs)
    except Exception as e:
        log(f"❌ Cannot connect to {db}: {e}")
        raise

def get_tables(conn):
    with conn.cursor() as c:
        c.execute("SHOW TABLES")
        rows = c.fetchall()
        tables = []
        for row in rows:
            if isinstance(row, dict):
                tables.append(list(row.values())[0])
            else:
                tables.append(row[0])
        return tables
    
def get_columns_info(conn, table):
    with conn.cursor(pymysql.cursors.DictCursor) as c:
        c.execute(f"SHOW COLUMNS FROM `{table}`")
        return c.fetchall()

def get_auto_increment_cols(conn, table):
    cols = get_columns_info(conn, table)
    return [c['Field'] for c in cols if 'auto_increment' in (c.get('Extra') or '').lower()]

def parse_col_type(type_str):
    """
    Parse MySQL column type strings like:
    - varchar(255)
    - int(11) unsigned
    - decimal(10,2)
    - text
    Returns dict: {base: 'varchar', max_len: 255 or None, is_text: bool, is_numeric: bool}
    """
    t = type_str.lower()
    res = {'base': t, 'max_len': None, 'is_text': False, 'is_numeric': False, 'is_unsigned': False}
    if 'unsigned' in t:
        res['is_unsigned'] = True
        t = t.replace('unsigned','').strip()
    m = re.match(r'([a-z]+)\((\d+)(?:,\s*\d+)?\)', t)
    if m:
        base = m.group(1)
        num = int(m.group(2))
        res['base'] = base
        res['max_len'] = num
        if base in ('varchar','char'):
            res['is_text'] = True
        if base in ('int','bigint','smallint','mediumint','tinyint','decimal','float','double'):
            res['is_numeric'] = True
    else:
        # no parenthesis types: text, longtext, datetime, text-like
        if 'text' in t or t in ('mediumtext','longtext','text'):
            res['is_text'] = True
        if any(k in t for k in ('int','decimal','float','double','numeric')):
            res['is_numeric'] = True
    return res

def build_target_col_specs(tgt_cols_info):
    """Return dict field -> spec parsed from SHOW COLUMNS"""
    specs = {}
    for c in tgt_cols_info:
        field = c['Field']
        specs[field] = parse_col_type(c['Type'])
    return specs

def write_failed_row(table, row_values, reason):
    fname = f"failed_inserts_{table}.csv"
    try:
        with open(fname, mode='a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow([reason] + list(row_values))
    except Exception as e:
        log(f"⚠️ Could not write failed row to {fname}: {e}")

def safe_prepare_row(raw_row, common_cols, tgt_specs):
    """
    Prepare a row for insertion:
    - truncate strings to target max_len
    - try convert numeric
    Returns (prepared_list, issues) where issues is list of strings describing problems
    """
    vals = []
    issues = []
    for col in common_cols:
        val = raw_row.get(col) if isinstance(raw_row, dict) else raw_row[col]
        spec = tgt_specs.get(col, {})
        # normalize None
        if val is None:
            vals.append(None)
            continue

        # if it's bytes, decode
        if isinstance(val, (bytes, bytearray)):
            try:
                val = val.decode('utf-8', errors='replace')
            except Exception:
                val = str(val)

        # TEXT/VARCHAR handling
        if spec.get('is_text'):
            s = str(val)
            max_len = spec.get('max_len')
            if max_len and len(s) > max_len:
                s = s[:max_len]
                issues.append(f"truncated {col} to {max_len}")
            vals.append(s)
            continue

        # Numeric handling
        if spec.get('is_numeric'):
            try:
                base = spec.get('base','')
                if base in ('int','bigint','smallint','mediumint','tinyint'):
                    # try int
                    if str(val).strip()=="":
                        vals.append(None)
                    else:
                        vals.append(int(float(val)))
                else:
                    # decimal/float/double
                    if str(val).strip()=="":
                        vals.append(None)
                    else:
                        vals.append(float(val))
            except Exception:
                # couldn't convert numeric -> keep original as string if small, else None
                sval = str(val)
                if len(sval) <= 255:
                    vals.append(sval)
                    issues.append(f"numeric_conv_failed_{col}")
                else:
                    vals.append(None)
                    issues.append(f"numeric_conv_failed_and_set_null_{col}")
            continue

        # default: keep as-is (strings, dates, etc.)
        vals.append(val)
    return vals, issues

def transfer_table(src_conn, tgt_conn, table, tenant_id, target_col_specs):
    log(f"📦 Start transfer: `{table}` (tenant={tenant_id})")

    try:
        src_cols_info = get_columns_info(src_conn, table)
        tgt_cols_info = get_columns_info(tgt_conn, table)
    except Exception as e:
        log(f"❌ Skip `{table}` — cannot read columns: {e}")
        return

    src_cols = [c['Field'] for c in src_cols_info]
    tgt_cols = [c['Field'] for c in tgt_cols_info]

    auto_cols = get_auto_increment_cols(tgt_conn, table)
    if auto_cols:
        log(f"ℹ️ Skipping auto-increment columns for `{table}`: {auto_cols}")

    common_cols = [col for col in src_cols if col in tgt_cols and col not in auto_cols and col != "tenant_id"]

    if not common_cols:
        log(f"⚠️ No matching non-auto-increment columns to transfer for `{table}` — skipping")
        return

    include_tenant = "tenant_id" in tgt_cols

    insert_cols = common_cols.copy()
    if include_tenant:
        insert_cols.append("tenant_id")

    insert_placeholders = ", ".join(["%s"] * len(insert_cols))
    insert_query = f"INSERT INTO `{table}` ({', '.join(['`'+c+'`' for c in insert_cols])}) VALUES ({insert_placeholders})"
    select_query = f"SELECT {', '.join(['`'+c+'`' for c in common_cols])} FROM `{table}`"

    # stats
    selected_count = 0
    inserted_count = 0
    failed_count = 0
    truncated_count = 0
    conversion_issues = 0

    # use server-side cursor for large datasets (SSCursor returns tuples)
    try:
        src_cur = src_conn.cursor(pymysql.cursors.SSCursor)
    except Exception:
        src_cur = src_conn.cursor()

    tgt_cur = tgt_conn.cursor()
    try:
        src_cur.execute(select_query)
    except Exception as e:
        log(f"❌ Select error in `{table}`: {e}")
        try:
            src_cur.close()
        except: pass
        try:
            tgt_cur.close()
        except: pass
        return

    batch = []
    batch_raw_rows = []  # keep raw for fallback logging
    tgt_specs = {col: target_col_specs.get(col, {}) for col in common_cols}

    while True:
        rows = src_cur.fetchmany(BATCH_SIZE)
        if not rows:
            break
        selected_count += len(rows)

        # rows may be tuples with SSCursor; convert to dict for convenience
        for r in rows:
            if isinstance(r, (list, tuple)):
                # map tuple to common_cols order
                raw_row = {common_cols[i]: r[i] for i in range(len(common_cols))}
            else:
                raw_row = r  # dict already

            prepared_vals, issues = safe_prepare_row(raw_row, common_cols, tgt_specs)
            if include_tenant:
                prepared_vals.append(tenant_id)
            batch.append(prepared_vals)
            batch_raw_rows.append((raw_row, issues))

            # count issues
            for it in issues:
                if it.startswith('truncated'):
                    truncated_count += 1
                else:
                    conversion_issues += 1

        # try insert batch
        if batch:
            try:
                tgt_cur.executemany(insert_query, batch)
                tgt_conn.commit()
                inserted_count += len(batch)
                log(f"✅ `{table}`: inserted total {inserted_count} rows so far")
                batch = []
                batch_raw_rows = []
            except Exception as e:
                log(f"❌ Batch error in `{table}`: {e}")
                log(traceback.format_exc())
                try:
                    tgt_conn.rollback()
                except Exception:
                    pass

                # try single inserts with safer handling and log failures
                for idx, single in enumerate(batch):
                    raw_row, issues = batch_raw_rows[idx]
                    try:
                        tgt_cur.execute(insert_query, single)
                        tgt_conn.commit()
                        inserted_count += 1
                    except Exception as e2:
                        try:
                            tgt_conn.rollback()
                        except Exception:
                            pass
                        failed_count += 1
                        reason = f"single_insert_failed: {e2}"
                        log(f"⚠️ Failed single insert in `{table}`: {e2} — values count {len(single)}")
                        # write human-readable row to CSV with reason and issues
                        write_failed_row(table, list(single), reason + " | issues: " + (", ".join(issues) if issues else "none"))
                batch = []
                batch_raw_rows = []

    try:
        src_cur.close()
    except Exception:
        pass
    try:
        tgt_cur.close()
    except Exception:
        pass

    log(f"📊 `{table}` — selected from source: {selected_count}")
    log(f"📊 `{table}` — successfully inserted into target: {inserted_count}")
    log(f"📊 `{table}` — failed rows: {failed_count}, truncated fields: {truncated_count}, conversion issues: {conversion_issues}")

    log(f"🏁 Finished `{table}`")
    return {'selected': selected_count, 'inserted': inserted_count, 'failed': failed_count, 'truncated': truncated_count, 'conversion_issues': conversion_issues}

def main():
    log("🚀 Migration started — improved safe mode")

    try:
        tgt_conn = connect(TARGET_DB)
    except Exception as e:
        log(f"❌ Cannot connect to target DB `{TARGET_DB}`: {e}")
        return

    try:
        target_tables = set(get_tables(tgt_conn))
        log(f"📋 Target `{TARGET_DB}` tables count: {len(target_tables)}")
    except Exception as e:
        log(f"❌ Could not list tables in target: {e}")
        tgt_conn.close()
        return

    # جمع كل جداول الـ sources أولاً حتى نقدر نعمل TRUNCATE مرة واحدة فقط على الجداول المشتركة
    all_source_tables = set()
    for src in SOURCES:
        src_db = src["db"]
        try:
            tmp_conn = connect(src_db)
            src_tbls = get_tables(tmp_conn)
            all_source_tables.update(src_tbls)
            tmp_conn.close()
        except Exception as e:
            log(f"⚠️ Could not connect/list tables for source `{src_db}` while collecting tables: {e}")
            # نستمر مع باقي المصادر

    # الجداول المشتركة بين التارجت و أي مصدر
    to_truncate = sorted(list(target_tables.intersection(all_source_tables)))
    if to_truncate:
        log(f"⚠️ Will truncate {len(to_truncate)} target tables that exist in sources (once before migration)")
        try:
            with tgt_conn.cursor() as c:
                c.execute("SET FOREIGN_KEY_CHECKS = 0")
                c.execute("SET UNIQUE_CHECKS = 0")
            tgt_conn.commit()
            for table in to_truncate:
                try:
                    with tgt_conn.cursor() as c:
                        c.execute(f"TRUNCATE TABLE `{table}`")
                    tgt_conn.commit()
                    log(f"🧹 Truncated `{table}` in target")
                except Exception as e:
                    log(f"❌ Could not truncate `{table}`: {e}")
                    try:
                        tgt_conn.rollback()
                    except Exception:
                        pass
        except Exception as e:
            log(f"❌ Error while disabling checks / truncating: {e}")
    else:
        log("ℹ️ No tables to truncate (no common tables with sources)")

    # بناء مواصفات الأعمدة للتارجت مرة واحدة لكل جدول
    target_col_specs_map = {}
    for table in to_truncate:
        try:
            tgt_cols_info = get_columns_info(tgt_conn, table)
            target_col_specs_map[table] = build_target_col_specs(tgt_cols_info)
        except Exception as e:
            log(f"⚠️ Could not get column specs for `{table}`: {e}")
            target_col_specs_map[table] = {}

    # نبدأ نقل كل مصدر
    for src in SOURCES:
        src_db = src["db"]
        tenant_id = src.get("tenant_id", None)
        log("====================================")
        log(f"🔄 Processing source DB: `{src_db}` (tenant={tenant_id})")

        try:
            src_conn = connect(src_db)
        except Exception as e:
            log(f"❌ Skipping source `{src_db}` due to connection error: {e}")
            continue

        try:
            src_tables = get_tables(src_conn)
            log(f"📋 Found {len(src_tables)} tables in source `{src_db}`")
        except Exception as e:
            log(f"❌ Could not list tables in source `{src_db}`: {e}")
            try:
                src_conn.close()
            except Exception:
                pass
            continue

        for table in src_tables:
            if table not in target_tables:
                log(f"⏭️ Skipped (not in target): `{table}`")
                continue

            try:
                # استخدم specs جاهزة لو متوفرة أو اعملها الآن
                if table not in target_col_specs_map:
                    try:
                        tgt_cols_info = get_columns_info(tgt_conn, table)
                        target_col_specs_map[table] = build_target_col_specs(tgt_cols_info)
                    except Exception as e:
                        target_col_specs_map[table] = {}
                transfer_table(src_conn, tgt_conn, table, tenant_id, target_col_specs_map[table])
            except Exception as e:
                log(f"❌ Unexpected error for `{table}`: {e}")
                log(traceback.format_exc())

        try:
            src_conn.close()
        except Exception:
            pass

    # إعادة القيود
    try:
        with tgt_conn.cursor() as c:
            c.execute("SET FOREIGN_KEY_CHECKS = 1")
            c.execute("SET UNIQUE_CHECKS = 1")
        tgt_conn.commit()
        log("🔁 Re-enabled FOREIGN_KEY_CHECKS and UNIQUE_CHECKS on target")
    except Exception as e:
        log(f"❌ Could not re-enable checks: {e}")
        try:
            tgt_conn.rollback()
        except Exception:
            pass

    try:
        tgt_conn.close()
    except Exception:
        pass

    log("🎉 Migration finished")

if __name__ == "__main__":
    main()