import csv
import mysql.connector
from mysql.connector import Error
from datetime import datetime, date
import re

# --- CONFIGURE ---
host = 'localhost'
database = 'webhooks_db'           # Replace with your DB name
username = 'admin'          # Replace with your username
password = 'PhpmyAdmin@2025'              # Replace with your password
# csv_file_path = 'bbbb.csv'  # Path to your CSV
csv_file_path = 'Beneficiaries.csv'  # Path to your CSV

# -----------------

# Database column names (actual column names in the database table)
db_columns = ['id', 'name', 'last_name', 'job_title', 'work', 'mobile', 'last_seen_on_chat', 'locale', 'total_chat_sessions', 'first_seen_on_chat', 'ivf_id', 'time_zone', 'permanent_address', 'location_current_address', 'state', 'zipcode', 'location_permanent_address', 'facebook', 'twitter', 'linkedin', 'medium', 'keyword', 'last_contacted_time', 'last_contacted_mode', 'last_activity_type', 'last_activity_date', 'last_seen_on_web', 'score', 'subscription_status', 'unsubscribe_reason', 'other_unsubscribe_reasons', 'whatsapp_subscription_status', 'sms_subscription_status', 'recent_note', 'created_at', 'updated_at', 'source', 'campaign', 'territory', 'record_type', 'lifecycle_stage', 'status', 'lost_reason', 'original_campaign', 'original_medium', 'original_source', 'created_through_campaign', 'created_from_medium', 'created_from_source', 'most_recent_campaign', 'most_recent_medium', 'most_recent_source', 'work_email', 'active_sales_sequences', 'completed_sales_sequences', 'customer_fit', 'web_forms', 'last_assigned_at', 'location_id', 'location', 'data_owner_id', 'data_owner', 'created_by_id', 'created_by', 'updated_by_id', 'updated_by', 'import_id', 'emails', 'unique_id', 'age', 'parent_of_child', 'gender', 'child_moved_to_cvf', 'date_of_birth', 'date_of_birth_certificate_received', 'ivf_joining_date', 'reoffender', 'previous_ivf_id', 'social_category', 'caste_certificate_received', 'religion', 'nationality', 'state_permanent_address', 'current_address', 'state_current_address', 'cluster', 'convicted_undertrial', 'duration_if_convicted', 'case_text', 'ipc_section_no', 'education_qualification', 'marital_status', 'project_location', 'project', 'active_status', 'bail_out_date', 'release_date', 'drop_out_date', 'deceased_date', 'pre_existing_disease_yes_no', 'pre_existing_disease_1', 'pre_existing_disease_2', 'is_person_differently_abled', 'differently_abled', 'earning_member', 'occupation_of_earning_member', 'father_husband_name', 'avg_monthly_income', 'name_of_counsellor', 'number_of_children', 'no_of_children_below_18_years', 'ivf_support', 'type_of_support_family_details', 'family_member_total', 'amenities_available', 'source_of_meals', 'area_living', 'friends', 'awareness_on_community_resources', 'accomodation', 'availabledocuments', 'currently_incarcerated_or_not', 'case_type', 'current_incarceration_status', 'incarcerated_in_which_prison', 'custom_app', 'associated_activities', 'certified', 'if_not_certified', 'course_completed', 'role', 'child_in_creche', 'released_date', 'connected_with_foundation', 'smart_phone', 'employment_status', 'if_employed', 'income', 'post_released_rehab', 'rehab_project_year', 'rehab_project_name', 'rehab_support', 'rehab_documents_created', 'rehab_intervention', 'rehab_status', 'child_code', 'supported_by', 'child_born_in_creche', 'school_type', 'grade', 'financial_criteria', 'vulnerability_criterias', 'primary_contact_relation_with_child', 'neighbor_mobile_number', 'fathers_name', 'mothers_name', 'caregivers_name', 'caregivers_relationship_with_child', 'average_monthly_income_of_family_in_rs', 'earning_member_cvf', 'occupational_status_of_earning_member', 'living_condition_of_cvf_child', 'family_size', 'no_of_offspring_of_mother_of_child', 'no_of_children_18_years_amongst_offspring_of_mother', 'incarceration_status_history', 'govt_issued_documents_available_in_family', 'sibling_in_prison_creche', 'creche_ivf_id', 'sibling_cvf_id', 'sibling_profile_cvf', 'alumni', 'alumni_category', 'alumni_status', 'year_of_moving_to_alumni', 'alumni_working', 'if_working_alumni_sector', 'emotional_symptoms_scale_headaches', 'emotional_symptoms_scale_worry_a_lot', 'emotional_symptoms_scale_unhappy_tearful', 'emotional_symptoms_scale_nervous_new_situations', 'emotional_symptoms_scale_many_fears', 'eq_scale_score', 'conduct_problems_scale_angry_lose_temper', 'conduct_problems_scale_do_as_told', 'conduct_problems_scale_fight_a_lot', 'conduct_problems_scale_lying_cheating', 'conduct_problems_scale_take_things_not_mine', 'hyperactivity_inattention_scale_restless', 'hyperactivity_inattention_scale_fidgeting', 'hyperactivity_inattention_scale_distracted_concentration', 'hyperactivity_inattention_scale_think_before_doing', 'hyperactivity_inattention_scale_finish_work_attention_good', 'hyperactivity_inattention_score', 'peer_relationship_problems_scale_alone', 'peer_relationship_problems_scale_good_friend', 'peer_relationship_problems_scale_others_like_me', 'peer_relationship_problems_scale_better_with_adults', 'peer_relationship_problems_scale_bullied', 'peer_relationship_problems_score', 'prosocial_behaviour_scale_nice_care_feelings', 'prosocial_behaviour_scale_share_with_others', 'prosocial_behaviour_scale_helpful_if_hurt', 'prosocial_behaviour_scale_kind_to_younger_children', 'prosocial_behaviour_scale_offer_to_help_others', 'prosocial_behaviour_score', 'current_incarceration_status_father', 'incarceration_location_father', 'type_of_crime_father', 'working_status_father', 'working_sector_father', 'average_monthly_income_father', 'incarceration_status_mother', 'incarceration_location_mother', 'type_of_crime_mother', 'working_status_mother', 'working_sector_mother', 'average_monthly_income_mother', 'cvf_average_family_monthly_income', 'rehab_projects_year', 'prior_occupation', 'neighbourhood_communication', 'next_birth_date', 'tags']

# CSV column names (as they appear in the CSV file)
csv_columns = ['Id', 'Name', 'Last name', 'Job title', 'Work', 'Mobile', 'Last seen on chat', 'Locale', 'Total chat sessions', 'First seen on chat', 'IVF ID', 'Time zone', 'Permanent Address', 'Location - Current Address', 'State', 'Zipcode', 'Location - Permanent Address', 'Facebook', 'Twitter', 'LinkedIn', 'Medium', 'Keyword', 'Last contacted time', 'Last contacted mode', 'Last activity type', 'Last activity date', 'Last seen on web', 'Score', 'Subscription status', 'Unsubscribe reason', 'Other unsubscribe reasons', 'WhatsApp subscription status', 'SMS subscription status', 'Recent note', 'Created at', 'Updated at', 'Source', 'Campaign', 'Territory', 'Record type', 'Lifecycle stage', 'Status', 'Lost reason', 'Original campaign', 'Original medium', 'Original source', 'Created through campaign', 'Created from medium', 'Created from source', 'Most recent campaign', 'Most recent medium', 'Most recent source', 'Work email', 'Active sales sequences', 'Completed sales sequences', 'Customer fit', 'Web forms', 'Last assigned at', 'Location id', 'Location', 'Data owner id', 'Data owner', 'Created by id', 'Created by', 'Updated by id', 'Updated by', 'Import ID', 'Emails', 'Unique ID', 'Age', 'Parent of Child', 'Gender', 'Child Moved To CVF', 'Date of Birth', 'Date of birth Certificate received ?', 'IVF Joining date', 'Reoffender', 'Previous IVF - ID', 'Social Category', 'Caste Certificate Received?', 'Religion', 'Nationality', 'State - Permanent Address', 'Current Address', 'State - Current Address', 'Cluster', 'Convicted / Undertrial', 'Duration (If Convicted)', 'Case', 'IPC Section No', 'Education & Qualification', 'Marital Status', 'Project Location', 'Project', 'Active Status', 'Bail Out Date', 'Release Date', 'Drop Out Date', 'Deceased Date', 'Pre-existing Disease - Yes/No?', 'Pre Existing Disease', 'Pre-existing Disease', 'Is Person Differently abled ?', 'Differentially Abled', 'Earning Member', 'Occupation of Earning Member', 'Father/ Husband Name', 'Avg Monthly Income', 'Name of Counsellor', 'Number of Children', 'No of Children Below 18 Years', 'IVF Support', 'Type of Support - Family Details', 'Family Member Total', 'Amenities Available', 'Source of Meals', 'Area living', 'Friends', 'Awareness on the community resources ', 'Accomodation', 'AvailableDocuments', 'Currently incarcerated or not?', 'Case type', 'Current incarceration status', 'Incarcerated in which prison ?', 'Custom App', 'Associated Activities', 'Certified', 'If Not Certified', 'Course Completed', 'Role', "Child In Cre'che", 'Released Date', 'Connected With Foundation', 'Smart Phone', 'Employment status', 'If Employed', 'Income', 'Post Released Rehab', 'Rehab Project Year', 'Rehab Project Name', 'Rehab Support', 'Rehab Documents Created', 'Rehab Intervention', 'Rehab Status', 'Child Code', 'Supported By', "Child Born in Cre'che", 'School Type', 'Grade', 'Financial Criteria', 'Vulnerability Criterias', 'Primary contact relation with the child', 'Neighbor Mobile Number', "Father's Name", "Mother's Name", "Caregiver's Name", "Caregiver's relationship with Child", 'Average monthly income of the family (in Rs.)', 'Earning-Member', 'Occupational status of the earning member', 'Living condition of CVF Child', 'Family Size', 'No.of offspring of mother of the child', 'No.of children <18 Years amongst the offspring of the mother', 'Incarceration status/history', 'Govt. issued documents currently available in the family', 'Sibling in Prison Creche?', 'Creche IVF ID', 'Sibling CVF ID', 'Sibling profile (CVF)', 'Alumni', 'Alumni Category', 'Alumni Status', 'Year of Moving to Alumni', 'Alumni Working', 'If Working Alumni - Sector', 'Emotional Symptoms Scale.I get a lot of headaches, stomach-aches, or sickness', 'Emotional Symptoms Scale.I worry a lot', 'Emotional Symptoms Scale.I am often unhappy, down-hearted or tearful', 'Emotional Symptoms Scale.I am nervous in new situations. I easily lose confidence', 'Emotional Symptoms Scale.I have many fears, I am easily scared', 'EQ Scale Score', 'Conduct Problems Scale.I get very angry and often lose my temper', 'Conduct Problems Scale.I usually do as I am told', 'Conduct Problems Scale.I fight a lot. I can make other people do what I want', 'Conduct Problems Scale.I am often accused of lying or cheating', 'Conduct Problems Scale.I take things that are not mine', 'Conduct Problems Scale', 'Hyperactivity/Inattention Scale.I am restless, I cannot stay still for long', 'Hyperactivity/Inattention Scale.I am constantly fidgeting or squirming', 'Hyperactivity/Inattention Scale.I am easily distracted, I find it difficult to concentrate', 'Hyperactivity/Inattention Scale.I think before I do things', "Hyperactivity/Inattention Scale.I finish the work I'm doing. My attention is good", 'Hyperactivity/Inattention Score', 'Peer Relationship Problems Scale.I am usually on my own. I generally play alone', 'Peer Relationship Problems Scale.I have one good friend or more', 'Peer Relationship Problems Scale.Other people my age generally like me', 'Peer Relationship Problems Scale.I get on better with adults than with people my own age', 'Peer Relationship Problems Scale.Other children or young people pick on me or bully me', 'Peer Relationship Problems Score', 'Prosocial Behaviour Scale.I try to be nice to other people. I care about their feelings', 'Prosocial Behaviour Scale.I usually share with others (e.g., food, games, pens)', 'Prosocial Behaviour Scale.I am helpful if someone is hurt, upset or feeling ill', 'Prosocial Behaviour Scale.I am kind to younger children', 'Prosocial Behaviour Scale.I often offer to help others (parents, teachers, children)', 'Prosocial Behaviour Score', 'Current Incarceration Status-Father', 'Incarceration Location - Father', 'Type of Crime_Father', 'Working Status- Father', 'Working Sector - Father', 'Average Monthly Income - Father', 'Incarceration Status - Mother', 'Incarceration Location - Mother', 'Type of Crime - Mother', 'Working - Status - Mother', 'Working Sector - Mother', 'Average Monthly Income- Mother', 'Earning Member - CVF', 'CVF-Average Family Monthly Income', 'Rehab Projects Year', 'Prior Occupation', 'Neighbourhood communication', 'Next Birth date', 'Tags']

# Manual mapping for problematic columns that don't match automatically
manual_mapping = {
    'Earning Member - CVF': 'earning_member_cvf',
    'CVF-Average Family Monthly Income': 'cvf_average_family_monthly_income',
    'Earning-Member': 'earning_member',
    'Average monthly income of the family (in Rs.)': 'average_monthly_income_of_family_in_rs',
    'Father/ Husband Name': 'father_husband_name',
    'Date of birth Certificate received ?': 'date_of_birth_certificate_received',
    'Caste Certificate Received?': 'caste_certificate_received',
    'Convicted / Undertrial': 'convicted_undertrial',
    'Duration (If Convicted)': 'duration_if_convicted',
    'IPC Section No': 'ipc_section_no',
    'Education & Qualification': 'education_qualification',
    'Pre-existing Disease - Yes/No?': 'pre_existing_disease_yes_no',
    'Pre Existing Disease': 'pre_existing_disease_1',
    'Pre-existing Disease': 'pre_existing_disease_2',
    'Is Person Differently abled ?': 'is_person_differently_abled',
    'Occupation of Earning Member': 'occupation_of_earning_member',
    'Avg Monthly Income': 'avg_monthly_income',
    'Name of Counsellor': 'name_of_counsellor',
    'Number of Children': 'number_of_children',
    'No of Children Below 18 Years': 'no_of_children_below_18_years',
    'Type of Support - Family Details': 'type_of_support_family_details',
    'Family Member Total': 'family_member_total',
    'Amenities Available': 'amenities_available',
    'Source of Meals': 'source_of_meals',
    'Area living': 'area_living',
    'Friends': 'friends',
    'Awareness on the community resources ': 'awareness_on_community_resources',
    'Accomodation': 'accomodation',
    'AvailableDocuments': 'availabledocuments',
    'Currently incarcerated or not?': 'currently_incarcerated_or_not',
    'Incarcerated in which prison ?': 'incarcerated_in_which_prison',
    'If Not Certified': 'if_not_certified',
    'Course Completed': 'course_completed',
    "Child In Cre'che": 'child_in_creche',
    'Connected With Foundation': 'connected_with_foundation',
    'Smart Phone': 'smart_phone',
    'Employment status': 'employment_status',
    'If Employed': 'if_employed',
    'Post Released Rehab': 'post_released_rehab',
    'Rehab Project Year': 'rehab_project_year',
    'Rehab Project Name': 'rehab_project_name',
    'Rehab Support': 'rehab_support',
    'Rehab Documents Created': 'rehab_documents_created',
    'Rehab Intervention': 'rehab_intervention',
    'Rehab Status': 'rehab_status',
    'Child Code': 'child_code',
    'Supported By': 'supported_by',
    "Child Born in Cre'che": 'child_born_in_creche',
    'School Type': 'school_type',
    'Grade': 'grade',
    'Financial Criteria': 'financial_criteria',
    'Vulnerability Criterias': 'vulnerability_criterias',
    'Primary contact relation with the child': 'primary_contact_relation_with_child',
    'Neighbor Mobile Number': 'neighbor_mobile_number',
    "Father's Name": 'fathers_name',
    "Mother's Name": 'mothers_name',
    "Caregiver's Name": 'caregivers_name',
    "Caregiver's relationship with Child": 'caregivers_relationship_with_child',
    'Average monthly income of the family (in Rs.)': 'average_monthly_income_of_family_in_rs',
    'Earning-Member': 'earning_member',
    'Occupational status of the earning member': 'occupational_status_of_earning_member',
    'Living condition of CVF Child': 'living_condition_of_cvf_child',
    'Family Size': 'family_size',
    'No.of offspring of mother of the child': 'no_of_offspring_of_mother_of_child',
    'No.of children <18 Years amongst the offspring of the mother': 'no_of_children_18_years_amongst_offspring_of_mother',
    'Incarceration status/history': 'incarceration_status_history',
    'Govt. issued documents currently available in the family': 'govt_issued_documents_available_in_family',
    'Sibling in Prison Creche?': 'sibling_in_prison_creche',
    'Creche IVF ID': 'creche_ivf_id',
    'Sibling CVF ID': 'sibling_cvf_id',
    'Sibling profile (CVF)': 'sibling_profile_cvf',
    'Alumni Category': 'alumni_category',
    'Alumni Status': 'alumni_status',
    'Year of Moving to Alumni': 'year_of_moving_to_alumni',
    'Alumni Working': 'alumni_working',
    'If Working Alumni - Sector': 'if_working_alumni_sector',
    'Current Incarceration Status-Father': 'current_incarceration_status_father',
    'Incarceration Location - Father': 'incarceration_location_father',
    'Type of Crime_Father': 'type_of_crime_father',
    'Working Status- Father': 'working_status_father',
    'Working Sector - Father': 'working_sector_father',
    'Average Monthly Income - Father': 'average_monthly_income_father',
    'Incarceration Status - Mother': 'incarceration_status_mother',
    'Incarceration Location - Mother': 'incarceration_location_mother',
    'Type of Crime - Mother': 'type_of_crime_mother',
    'Working - Status - Mother': 'working_status_mother',
    'Working Sector - Mother': 'working_sector_mother',
    'Average Monthly Income- Mother': 'average_monthly_income_mother',
    'Rehab Projects Year': 'rehab_projects_year',
    'Prior Occupation': 'prior_occupation',
    'Neighbourhood communication': 'neighbourhood_communication',
    'Next Birth date': 'next_birth_date',
    # Additional mappings for unmatched columns
    'Case': 'case_text',
    'Differentially Abled': 'differently_abled',
    'Emotional Symptoms Scale.I get a lot of headaches, stomach-aches, or sickness': 'emotional_symptoms_scale_headaches',
    'Emotional Symptoms Scale.I worry a lot': 'emotional_symptoms_scale_worry_a_lot',
    'Emotional Symptoms Scale.I am often unhappy, down-hearted or tearful': 'emotional_symptoms_scale_unhappy_tearful',
    'Emotional Symptoms Scale.I am nervous in new situations. I easily lose confidence': 'emotional_symptoms_scale_nervous_new_situations',
    'Emotional Symptoms Scale.I have many fears, I am easily scared': 'emotional_symptoms_scale_many_fears',
    'Conduct Problems Scale.I get very angry and often lose my temper': 'conduct_problems_scale_angry_lose_temper',
    'Conduct Problems Scale.I usually do as I am told': 'conduct_problems_scale_do_as_told',
    'Conduct Problems Scale.I fight a lot. I can make other people do what I want': 'conduct_problems_scale_fight_a_lot',
    'Conduct Problems Scale.I am often accused of lying or cheating': 'conduct_problems_scale_lying_cheating',
    'Conduct Problems Scale.I take things that are not mine': 'conduct_problems_scale_take_things_not_mine',
    'Conduct Problems Scale': 'conduct_problems_scale',
    'Hyperactivity/Inattention Scale.I am restless, I cannot stay still for long': 'hyperactivity_inattention_scale_restless',
    'Hyperactivity/Inattention Scale.I am constantly fidgeting or squirming': 'hyperactivity_inattention_scale_fidgeting',
    'Hyperactivity/Inattention Scale.I am easily distracted, I find it difficult to concentrate': 'hyperactivity_inattention_scale_distracted_concentration',
    'Hyperactivity/Inattention Scale.I think before I do things': 'hyperactivity_inattention_scale_think_before_doing',
    "Hyperactivity/Inattention Scale.I finish the work I'm doing. My attention is good": 'hyperactivity_inattention_scale_finish_work_attention_good',
    'Peer Relationship Problems Scale.I am usually on my own. I generally play alone': 'peer_relationship_problems_scale_alone',
    'Peer Relationship Problems Scale.I have one good friend or more': 'peer_relationship_problems_scale_good_friend',
    'Peer Relationship Problems Scale.Other people my age generally like me': 'peer_relationship_problems_scale_others_like_me',
    'Peer Relationship Problems Scale.I get on better with adults than with people my own age': 'peer_relationship_problems_scale_better_with_adults',
    'Peer Relationship Problems Scale.Other children or young people pick on me or bully me': 'peer_relationship_problems_scale_bullied',
    'Prosocial Behaviour Scale.I try to be nice to other people. I care about their feelings': 'prosocial_behaviour_scale_nice_care_feelings',
    'Prosocial Behaviour Scale.I usually share with others (e.g., food, games, pens)': 'prosocial_behaviour_scale_share_with_others',
    'Prosocial Behaviour Scale.I am helpful if someone is hurt, upset or feeling ill': 'prosocial_behaviour_scale_helpful_if_hurt',
    'Prosocial Behaviour Scale.I am kind to younger children': 'prosocial_behaviour_scale_kind_to_younger_children',
    'Prosocial Behaviour Scale.I often offer to help others (parents, teachers, children)': 'prosocial_behaviour_scale_offer_to_help_others'
}

# Define income-related columns that need special handling
income_columns = {
    'avg_monthly_income', 'average_monthly_income_father', 'average_monthly_income_mother',
    'cvf_average_family_monthly_income', 'average_monthly_income_of_family_in_rs', 'income'
}

# Define columns that should be treated as TIMESTAMP (within valid range 1970-2038)
timestamp_columns = {
    'last_seen_on_chat', 'first_seen_on_chat', 'last_contacted_time',
    'last_activity_date', 'last_seen_on_web', 'created_at', 'updated_at',
    'last_assigned_at'
}

# Define columns that should be treated as DATE (can be outside TIMESTAMP range)
# IMPORTANT: These should be DATE columns in your MySQL table, not TIMESTAMP
date_columns = {
    'date_of_birth', 'ivf_joining_date', 'bail_out_date', 'release_date',
    'drop_out_date', 'deceased_date', 'released_date', 'next_birth_date'
}

# Helper: Clean and normalize column names
def normalize_col(col):
    if not col:
        return ""
    # Convert to lowercase and replace spaces, hyphens, and other special chars with underscores
    normalized = col.strip().lower()
    # Replace multiple special characters with single underscore
    normalized = re.sub(r'[^a-z0-9]+', '_', normalized)
    # Remove leading/trailing underscores
    normalized = normalized.strip('_')
    return normalized

# Helper: Convert income values to proper decimal format
def parse_income(val):
    if not val or val.strip() == "":
        return None
    
    val = val.strip().lower()
    
    # Handle common non-numeric values
    if val in ['not applicable', 'n/a', 'na', 'nil', 'none', '0']:
        return None
    
    # Handle range values like "6-10 K", "11-16K", etc.
    # Extract numbers and convert K to thousands
    income_match = re.search(r'(\d+)(?:\s*-\s*(\d+))?\s*k?', val)
    if income_match:
        min_val = int(income_match.group(1))
        max_val = int(income_match.group(2)) if income_match.group(2) else min_val
        
        # If it contains 'k' or the numbers are small (likely in thousands)
        if 'k' in val or max_val <= 100:
            # Take average of range and multiply by 1000
            avg_val = (min_val + max_val) / 2 * 1000
        else:
            # Take average of range as is
            avg_val = (min_val + max_val) / 2
        
        return int(avg_val)
    
    # Try to extract just numbers
    numbers = re.findall(r'\d+', val)
    if numbers:
        num = int(numbers[0])
        # If the number is very small, likely in thousands
        if num <= 100 and ('k' in val or len(val) <= 6):
            return num * 1000
        return num
    
    print(f"⚠️ Could not parse income value: {val}")
    return None

# Helper: Convert to SQL NULL
def to_sql_value(val):
    return None if val is None or val.strip() == "" else val.strip()

# Helper: Parse date with validation and proper type handling
def parse_date(val, column_name):
    if not val:
        return None
    
    val = val.strip()
    if not val:
        return None
    
    formats = [
        '%Y-%m-%d %H:%M:%S UTC',
        '%Y-%m-%d %H:%M:%S',
        '%Y-%m-%d',
        '%d/%m/%Y',
        '%m/%d/%Y',
        '%d-%m-%Y',
        '%Y-%m-%d %H:%M:%S.%f'
    ]
    
    for fmt in formats:
        try:
            parsed_date = datetime.strptime(val, fmt)
            
            # For TIMESTAMP columns, check valid range (1970-2038) and return datetime
            if column_name in timestamp_columns:
                if parsed_date.year < 1970 or parsed_date.year > 2037:
                    print(f"⚠️ Date outside TIMESTAMP range for {column_name}: {val} -> setting to NULL")
                    return None
                return parsed_date  # Return datetime object for TIMESTAMP columns
            
            # For DATE columns, allow broader range and return date object
            elif column_name in date_columns:
                if parsed_date.year < 1900 or parsed_date.year > datetime.now().year + 10:
                    print(f"⚠️ Date outside reasonable range for {column_name}: {val} -> setting to NULL")
                    return None
                # Return date object for DATE columns (this is key!)
                return parsed_date.date()
            
            # For other columns that might contain dates
            else:
                if parsed_date.year < 1970 or parsed_date.year > datetime.now().year + 10:
                    print(f"⚠️ Date outside reasonable range for {column_name}: {val} -> setting to NULL")
                    return None
                return parsed_date
            
        except ValueError:
            continue
    
    print(f"⚠️ Could not parse date: {val} for column {column_name}")
    return None

# Helper: Clean mobile number
def clean_mobile(val):
    if not val or val.strip() == "":
        return None
    
    # Remove all non-digit characters
    mobile = re.sub(r'[^\d]', '', val.strip())
    
    # If mobile is too long, truncate to reasonable length (15 digits max for international)
    if len(mobile) > 15:
        print(f"⚠️ Mobile number too long, truncating: {val} -> {mobile[:15]}")
        mobile = mobile[:15]
    
    # If mobile is too short (less than 10 digits), might be invalid
    if len(mobile) < 10:
        print(f"⚠️ Mobile number too short: {val}")
        return None
    
    return mobile if mobile else None

try:
    # Connect to MySQL
    connection = mysql.connector.connect(
        host=host,
        user=username,
        password=password,
        database=database
    )
    if not connection.is_connected():
        raise Exception("Failed to connect to MySQL")
    cursor = connection.cursor()
    print("✅ Connected to MySQL database")

    # Read and parse CSV
    with open(csv_file_path, mode='r', encoding='utf-8') as file:
        # Sniff delimiter
        sample = file.read(8192)
        file.seek(0)
        sniffer = csv.Sniffer()
        delimiter = sniffer.sniff(sample).delimiter
        print(f"🔍 Detected delimiter: '{delimiter}'")

        reader = csv.reader(file, delimiter=delimiter)
        rows = list(reader)

    if len(rows) < 1:
        raise Exception("CSV is empty")

    # Find header row (look for 'id' or 'mobile')
    header_row_idx = None
    for i, row in enumerate(rows):
        normalized = [normalize_col(cell) for cell in row]
        if 'id' in normalized or 'mobile' in normalized or 'ivf_id' in normalized:
            header_row_idx = i
            break

    if header_row_idx is None:
        raise Exception("❌ Could not find header row in CSV")

    # Extract header and data rows
    header = rows[header_row_idx]
    data_rows = rows[header_row_idx + 1:]

    # Create mapping from CSV columns to database columns
    csv_to_db_mapping = {}
    unmatched_csv_cols = []
    unmatched_db_cols = []
    
    print("🔍 Processing CSV columns:")
    for i, csv_col in enumerate(header):
        if csv_col.strip():  # Skip empty columns
            print(f"   CSV[{i}]: '{csv_col}'")
            # First check manual mapping
            if csv_col in manual_mapping:
                csv_to_db_mapping[i] = manual_mapping[csv_col]
                print(f"      → Mapped to: '{manual_mapping[csv_col]}'")
                continue
            
            # Check for Govt. issued documents with flexible matching
            if 'govt' in csv_col.lower() and 'issued' in csv_col.lower() and 'documents' in csv_col.lower():
                csv_to_db_mapping[i] = 'govt_issued_documents_available_in_family'
                print(f"      → Flexible mapped to: 'govt_issued_documents_available_in_family'")
                continue
            
            # Check for Awareness on community resources with flexible matching
            if 'awareness' in csv_col.lower() and 'community' in csv_col.lower() and 'resources' in csv_col.lower():
                csv_to_db_mapping[i] = 'awareness_on_community_resources'
                print(f"      → Flexible mapped to: 'awareness_on_community_resources'")
                continue
            
            # Check for No.of children with flexible matching
            if 'no.of children' in csv_col.lower() and '18 years' in csv_col.lower() and 'offspring' in csv_col.lower():
                csv_to_db_mapping[i] = 'no_of_children_18_years_amongst_offspring_of_mother'
                print(f"      → Flexible mapped to: 'no_of_children_18_years_amongst_offspring_of_mother'")
                continue
            
            # Then try automatic normalization
            normalized_csv = normalize_col(csv_col)
            # Find matching database column
            matched = False
            for db_col in db_columns:
                if normalize_col(db_col) == normalized_csv:
                    csv_to_db_mapping[i] = db_col
                    matched = True
                    print(f"      → Auto-mapped to: '{db_col}'")
                    break
            if not matched:
                unmatched_csv_cols.append((i, csv_col, normalized_csv))
                print(f"      → No match found")
    
    # Check for unmatched database columns
    for db_col in db_columns:
        normalized_db = normalize_col(db_col)
        found = False
        for idx, mapped_db_col in csv_to_db_mapping.items():
            if mapped_db_col == db_col:
                found = True
                break
        if not found:
            unmatched_db_cols.append((db_col, normalized_db))

    print(f"✅ Found {len(csv_to_db_mapping)} matching columns out of {len(db_columns)}")
    
    if unmatched_csv_cols:
        print(f"⚠️ Unmatched CSV columns ({len(unmatched_csv_cols)}):")
        for idx, csv_col, normalized in unmatched_csv_cols[:10]:  # Show first 10
            print(f"   CSV[{idx}]: '{csv_col}' -> '{normalized}'")
        if len(unmatched_csv_cols) > 10:
            print(f"   ... and {len(unmatched_csv_cols) - 10} more")
    
    if unmatched_db_cols:
        print(f"⚠️ Unmatched DB columns ({len(unmatched_db_cols)}):")
        for db_col, normalized in unmatched_db_cols[:10]:  # Show first 10
            print(f"   DB: '{db_col}' -> '{normalized}'")
        if len(unmatched_db_cols) > 10:
            print(f"   ... and {len(unmatched_db_cols) - 10} more")

    # Prepare INSERT with ON DUPLICATE KEY UPDATE to handle potential duplicates
    placeholders = ', '.join(['%s'] * len(db_columns))
    columns_list = ', '.join([f'`{col}`' for col in db_columns])
    
    # Create UPDATE clause for duplicate key handling (excluding primary key)
    update_clauses = []
    for col in db_columns:
        if col != 'id':  # Assuming 'id' is the primary key
            update_clauses.append(f'`{col}` = VALUES(`{col}`)')
    
    update_clause = ', '.join(update_clauses)
    insert_query = f"""
    INSERT INTO beneficiaries ({columns_list}) 
    VALUES ({placeholders})
    ON DUPLICATE KEY UPDATE {update_clause}
    """

    inserted = 0
    updated = 0
    skipped = 0
    
    print(f"🚀 Starting to process {len(data_rows)} data rows...")
    
    for row_num, row in enumerate(data_rows, start=header_row_idx + 2):
        if len(row) == 0 or (len(row) == 1 and row[0].strip() == ""):
            skipped += 1
            continue  # Skip empty rows

        # Extend row if shorter than expected
        while len(row) < len(header):
            row.append("")

        # Build data tuple in database column order
        data = []
        for db_col in db_columns:
            # Find the CSV column index that maps to this database column
            csv_idx = None
            for idx, mapped_db_col in csv_to_db_mapping.items():
                if mapped_db_col == db_col:
                    csv_idx = idx
                    break
            
            raw_val = row[csv_idx] if csv_idx is not None and csv_idx < len(row) else ""
            val = to_sql_value(raw_val)

            # Special parsing for income columns
            if val and db_col in income_columns:
                val = parse_income(val)
            
            # Special parsing for mobile numbers
            elif val and db_col == 'mobile':
                val = clean_mobile(val)
            
            # Special parsing for dates (with column-specific handling)
            elif val and (db_col in timestamp_columns or db_col in date_columns):
                val = parse_date(val, db_col)

            data.append(val)

        try:
            cursor.execute(insert_query, data)
            
            # Check if it was an insert or update
            if cursor.rowcount == 1:
                inserted += 1
            elif cursor.rowcount == 2:  # MySQL returns 2 for ON DUPLICATE KEY UPDATE
                updated += 1
            
            if (inserted + updated) % 50 == 0:
                print(f"📌 Processed {inserted + updated} records (Inserted: {inserted}, Updated: {updated})...")
                
        except Exception as e:
            print(f"❌ Error on row {row_num}: {e}")
            print(f"   Data preview: {data[:5]}...")  # Show first few values
            
            # Show specific problematic data for debugging
            for i, (col, val) in enumerate(zip(db_columns, data)):
                if val is not None and (col in timestamp_columns or col in date_columns):
                    print(f"   Date column {col}: {val} (type: {type(val)})")
                if i >= 10:  # Limit debug output
                    break
            
            skipped += 1
            continue

    connection.commit()
    print(f"✨ Processing completed!")
    print(f"✅ Successfully inserted {inserted} new records")
    print(f"🔄 Successfully updated {updated} existing records")
    if skipped > 0:
        print(f"⚠️ Skipped {skipped} rows due to errors or empty data")

except Exception as e:
    print("❌ Error:", str(e))
    import traceback
    traceback.print_exc()
finally:
    if 'connection' in locals() and connection.is_connected():
        cursor.close()
        connection.close()
        print("🔌 MySQL connection closed.")