import oracledb
from dotenv import load_dotenv
import os
import sys
import glob
import getpass
from pathlib import Path
from collections import defaultdict
import re

# Environment variable loading
# Scripts live in skg/validation/ - load skg/.env explicitly
_project_root = Path(__file__).resolve().parent.parent  # skg/
load_dotenv(_project_root / ".env")

# Add VAULT_DIR to sys.path so vault_client can be imported
_vault_dir = os.getenv("VAULT_DIR")
if _vault_dir and _vault_dir not in sys.path:
    sys.path.insert(0, _vault_dir)

try:
    from vault_client import get_vault_credentials, get_wallet_password
    VAULT_AVAILABLE = True
except ImportError:
    VAULT_AVAILABLE = False

wallet_path = Path(os.getenv("ORACLE_WALLET_PATH"))
dsn         = os.getenv("ORACLE_DSN")
oracle_user = os.getenv("ORACLE_USER")

# Credential resolution: vault -> ORACLE_PASSWORD env var -> interactive prompt
_wallet_folder = Path(os.getenv("ORACLE_WALLET_PATH", "")).name
_service       = _wallet_folder.replace("Wallet_", "").replace("wallet_", "").strip()

oracle_password = os.getenv("ORACLE_PASSWORD")
if not oracle_password and VAULT_AVAILABLE and _service:
    _vault_user, oracle_password = get_vault_credentials(_service)
    if _vault_user and not oracle_user:
        oracle_user = _vault_user
if not oracle_password:
    oracle_password = getpass.getpass("Enter Oracle password: ")

wallet_password = None
if VAULT_AVAILABLE and _service:
    wallet_password = get_wallet_password(str(wallet_path))
if not wallet_password:
    wallet_password = getpass.getpass("Enter wallet password: ")

os.environ["NLS_LANG"] = "AMERICAN_AMERICA.AL32UTF8"

# Oracle client initialization

# Debug mode configuration
DEBUG_MODE = input("Run in DEBUG mode for single table? (y/n): ").strip().lower() == 'y'
DEBUG_TABLE = None
DEBUG_OUTPUT_DIR = None
_debug_ttl_subjects = {}  # Store TTL subjects for debug mode

if DEBUG_MODE:
    DEBUG_TABLE = input("Enter table name to debug (e.g., GRF_DRUG_INTERACTION): ").strip().upper()
    DEBUG_OUTPUT_DIR = input("Enter directory for debug output files (default: current dir): ").strip() or "."
    print(f"\n[DEBUG MODE] Processing only: {DEBUG_TABLE}")
    print(f"[DEBUG MODE] Output directory: {DEBUG_OUTPUT_DIR}\n")

# TTL File Path
_default_ttl = str(_project_root / "output" / "output_consolidated.trig")
ttl_file_path = input(f"Enter the path to the TTL file [{_default_ttl}]: ").strip() or _default_ttl


def is_mapping_table(table_name):
    """Check if a table is a mapping table based on naming convention."""
    return "_MAPPING_" in table_name


def get_db_pk_counts():
    """Fetch distinct PK counts from Oracle database tables for REGULAR tables only."""
    print("\n[DEBUG] Connecting to Oracle database...")
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        conn.autocommit = True
        cursor = conn.cursor()
        print("[DEBUG] Database connection successful!")

        # Build metadata query - filter by table if in debug mode
        # Exclude MAPPING tables - they're handled separately
        if DEBUG_MODE and DEBUG_TABLE:
            if is_mapping_table(DEBUG_TABLE):
                # For debug mode with a mapping table, skip PK counts query
                print(f"[DEBUG] {DEBUG_TABLE} is a MAPPING table - skipping PK counts")
                cursor.close()
                conn.close()
                return []
            sql_query = f"""
                SELECT 'SELECT ''' || C.TABLE_NAME || ''' AS TABLE_NAME, ''' || C.COLUMN_NAME || ''' AS COLUMN_NAME, ''' || C.R2RML_CLASS || ''' AS CLASS_NAME, COUNT(DISTINCT ' || C.COLUMN_NAME || ') AS DISTINCT_COUNT FROM GLOBAL_DISTRIBUTE.' || C.TABLE_NAME AS sql_statement,
                       C.TABLE_NAME, C.COLUMN_NAME
                FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN C
                WHERE C.IS_KEY = 'Y'
                  AND C.TABLE_NAME = '{DEBUG_TABLE}'
                  AND C.TABLE_NAME NOT LIKE '%_MAPPING_%'
                ORDER BY C.TABLE_NAME, C.COLUMN_ORDER
            """
        else:
            sql_query = """
                SELECT 'SELECT ''' || C.TABLE_NAME || ''' AS TABLE_NAME, ''' || C.COLUMN_NAME || ''' AS COLUMN_NAME, ''' || C.R2RML_CLASS || ''' AS CLASS_NAME, COUNT(DISTINCT ' || C.COLUMN_NAME || ') AS DISTINCT_COUNT FROM GLOBAL_DISTRIBUTE.' || C.TABLE_NAME AS sql_statement,
                       C.TABLE_NAME, C.COLUMN_NAME
                FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN C
                WHERE C.IS_KEY = 'Y'
                  AND C.TABLE_NAME NOT LIKE '%_MAPPING_%'
                ORDER BY C.TABLE_NAME, C.COLUMN_ORDER
            """
        
        print("[DEBUG] Executing metadata query for REGULAR tables...")
        if DEBUG_MODE:
            print(f"[DEBUG] Metadata query:\n{sql_query}\n")
        
        cursor.execute(sql_query)
        rows = cursor.fetchall()
        sql_statements = [(row[0], row[1], row[2]) for row in rows]  # (sql, table_name, column_name)
        print(f"[DEBUG] Found {len(sql_statements)} PK columns to query (regular tables)")

        pk_counts = []
        for i, (sql, table_name, column_name) in enumerate(sql_statements, 1):
            print(f"[DEBUG] Executing query {i}/{len(sql_statements)}...")
            
            if DEBUG_MODE:
                print(f"[DEBUG] Count query:\n{sql}\n")
            
            cursor.execute(sql)
            result = cursor.fetchone()
            
            pk_counts.append({
                "TABLE_NAME": result[0],
                "COLUMN_NAME": result[1],
                "CLASS_NAME": result[2],
                "DISTINCT_COUNT": result[3],
                "IS_MAPPING": False
            })
            
            # In debug mode, also fetch and write all PK values to file
            if DEBUG_MODE and DEBUG_OUTPUT_DIR:
                pk_values_sql = f"SELECT DISTINCT {column_name} FROM GLOBAL_DISTRIBUTE.{table_name} ORDER BY {column_name}"
                print(f"[DEBUG] PK values query:\n{pk_values_sql}\n")
                
                cursor.execute(pk_values_sql)
                pk_values = [str(row[0]) for row in cursor.fetchall() if row[0] is not None]
                
                # Write to file
                output_file = os.path.join(DEBUG_OUTPUT_DIR, f"db_pk_values_{table_name}.txt")
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(f"# Table: {table_name}\n")
                    f.write(f"# Column: {column_name}\n")
                    f.write(f"# Distinct count: {len(pk_values)}\n")
                    f.write(f"# Query: {pk_values_sql}\n")
                    f.write("#" + "="*60 + "\n")
                    for pk in pk_values:
                        f.write(f"{pk}\n")
                
                print(f"[DEBUG] Wrote {len(pk_values)} PK values to: {output_file}")

        cursor.close()
        conn.close()
        print(f"[DEBUG] Database queries complete. Found {len(pk_counts)} PK counts (regular tables)\n")
        return pk_counts
    except Exception as e:
        print(f"[ERROR] Database error: {e}")
        import traceback
        traceback.print_exc()
        return []


def get_db_mapping_counts():
    """
    Fetch distinct NON-PK column counts from MAPPING tables.
    For mapping tables, we count the distinct non-null values of each non-PK column,
    not the PK itself.
    
    Also fetches grouping info from UTIL_GRF_R2RML_COLUMN_GROUP to handle
    predicates nested in blank nodes, and subject class from PK column.
    """
    print("\n[DEBUG] Fetching MAPPING table non-PK column counts...")
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        conn.autocommit = True
        cursor = conn.cursor()

        # Get metadata for mapping tables - non-PK columns with predicates
        # Join with COLUMN_GROUP to get grouping predicate info
        # Also get subject class from PK column via subquery
        if DEBUG_MODE and DEBUG_TABLE:
            if not is_mapping_table(DEBUG_TABLE):
                print(f"[DEBUG] {DEBUG_TABLE} is not a MAPPING table - skipping mapping counts")
                cursor.close()
                conn.close()
                return []
            sql_query = f"""
                SELECT C.TABLE_NAME, C.COLUMN_NAME, C.PREDICATE_DESCRIPTION, C.R2RML_CLASS,
                       G.GROUP_PREDICATE_NAME,
                       (SELECT PK.R2RML_CLASS FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN PK 
                        WHERE PK.TABLE_NAME = C.TABLE_NAME AND PK.IS_KEY = 'Y') AS SUBJECT_CLASS
                FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN C
                LEFT JOIN GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN_GROUP G
                  ON C.TABLE_NAME = G.TABLE_NAME AND C.COLUMN_NAME = G.COLUMN_NAME
                WHERE C.TABLE_NAME = '{DEBUG_TABLE}'
                  AND C.IS_KEY = 'N'
                  AND C.PREDICATE_DESCRIPTION IS NOT NULL
                ORDER BY C.TABLE_NAME, C.COLUMN_ORDER
            """
        else:
            sql_query = """
                SELECT C.TABLE_NAME, C.COLUMN_NAME, C.PREDICATE_DESCRIPTION, C.R2RML_CLASS,
                       G.GROUP_PREDICATE_NAME,
                       (SELECT PK.R2RML_CLASS FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN PK 
                        WHERE PK.TABLE_NAME = C.TABLE_NAME AND PK.IS_KEY = 'Y') AS SUBJECT_CLASS
                FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN C
                LEFT JOIN GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN_GROUP G
                  ON C.TABLE_NAME = G.TABLE_NAME AND C.COLUMN_NAME = G.COLUMN_NAME
                WHERE C.TABLE_NAME LIKE '%_MAPPING_%'
                  AND C.IS_KEY = 'N'
                  AND C.PREDICATE_DESCRIPTION IS NOT NULL
                ORDER BY C.TABLE_NAME, C.COLUMN_ORDER
            """
        
        print("[DEBUG] Executing metadata query for MAPPING tables...")
        if DEBUG_MODE:
            print(f"[DEBUG] Metadata query:\n{sql_query}\n")
        
        cursor.execute(sql_query)
        rows = cursor.fetchall()
        
        if not rows:
            print("[DEBUG] No non-PK columns found for MAPPING tables")
            cursor.close()
            conn.close()
            return []
        
        print(f"[DEBUG] Found {len(rows)} non-PK columns in MAPPING tables")
        
        mapping_counts = []
        for i, (table_name, column_name, predicate, object_class, group_predicate, subject_class) in enumerate(rows, 1):
            # Count distinct non-null values in this column
            count_sql = f"SELECT COUNT(DISTINCT {column_name}) FROM GLOBAL_DISTRIBUTE.{table_name} WHERE {column_name} IS NOT NULL"
            
            print(f"[DEBUG] Executing count {i}/{len(rows)}: {table_name}.{column_name}")
            print(f"[DEBUG]   Subject class: {subject_class}")
            if group_predicate:
                print(f"[DEBUG]   Grouped under: {group_predicate}")
            if DEBUG_MODE:
                print(f"[DEBUG] Count query:\n{count_sql}\n")
            
            cursor.execute(count_sql)
            count_result = cursor.fetchone()[0]
            
            mapping_counts.append({
                "TABLE_NAME": table_name,
                "COLUMN_NAME": column_name,
                "PREDICATE": predicate,
                "OBJECT_CLASS": object_class,
                "GROUP_PREDICATE": group_predicate,  # e.g., "mt:RelatedDrugNameGroup" or None
                "SUBJECT_CLASS": subject_class,  # e.g., "Drug" - the class of subjects we should count
                "DISTINCT_COUNT": count_result,
                "IS_MAPPING": True
            })
            
            # In debug mode, also fetch and write all values to file
            if DEBUG_MODE and DEBUG_OUTPUT_DIR:
                values_sql = f"SELECT DISTINCT {column_name} FROM GLOBAL_DISTRIBUTE.{table_name} WHERE {column_name} IS NOT NULL ORDER BY {column_name}"
                print(f"[DEBUG] Values query:\n{values_sql}\n")
                
                cursor.execute(values_sql)
                values = [str(row[0]) for row in cursor.fetchall()]
                
                # Write to file
                output_file = os.path.join(DEBUG_OUTPUT_DIR, f"db_mapping_values_{table_name}_{column_name}.txt")
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(f"# Table: {table_name}\n")
                    f.write(f"# Column: {column_name}\n")
                    f.write(f"# Predicate: {predicate}\n")
                    f.write(f"# Group Predicate: {group_predicate or 'None (top-level)'}\n")
                    f.write(f"# Subject Class: {subject_class}\n")
                    f.write(f"# Object Class: {object_class}\n")
                    f.write(f"# Distinct non-null count: {len(values)}\n")
                    f.write(f"# Query: {values_sql}\n")
                    f.write("#" + "="*60 + "\n")
                    for v in values:
                        f.write(f"{v}\n")
                
                print(f"[DEBUG] Wrote {len(values)} values to: {output_file}")
        
        cursor.close()
        conn.close()
        print(f"[DEBUG] MAPPING table queries complete. Found {len(mapping_counts)} column counts\n")
        return mapping_counts
        
    except Exception as e:
        print(f"[ERROR] Database error: {e}")
        import traceback
        traceback.print_exc()
        return []


def get_db_pk_values_for_table(table_name, column_name):
    """Fetch actual PK values for a specific table (called only for mismatches)."""
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        cursor = conn.cursor()
        
        sql = f"SELECT DISTINCT {column_name} FROM GLOBAL_DISTRIBUTE.{table_name}"
        cursor.execute(sql)
        pk_values = set(str(row[0]) for row in cursor.fetchall() if row[0] is not None)
        
        cursor.close()
        conn.close()
        return pk_values
    except Exception as e:
        print(f"[ERROR] Error fetching PKs for {table_name}: {e}")
        return set()


def get_mapping_table_metadata(table_name):
    """
    Get metadata for a mapping table including subject key and all predicates.
    Returns dict with:
        - subject_column: the key column name
        - subject_class: the R2RML class for the subject
        - predicates: list of {column, predicate, object_class} for non-key columns
    """
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        cursor = conn.cursor()
        
        sql = """
            SELECT COLUMN_NAME, IS_KEY, R2RML_CLASS, PREDICATE_DESCRIPTION
            FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN
            WHERE TABLE_NAME = :table_name
            ORDER BY COLUMN_ORDER
        """
        cursor.execute(sql, {'table_name': table_name})
        rows = cursor.fetchall()
        
        result = {
            'subject_column': None,
            'subject_class': None,
            'predicates': []
        }
        
        for column_name, is_key, r2rml_class, predicate_desc in rows:
            if is_key == 'Y':
                result['subject_column'] = column_name
                result['subject_class'] = r2rml_class
            elif predicate_desc:  # Non-key column with predicate
                result['predicates'].append({
                    'column': column_name,
                    'predicate': predicate_desc,
                    'object_class': r2rml_class
                })
        
        cursor.close()
        conn.close()
        return result
    except Exception as e:
        print(f"[ERROR] Error fetching metadata for {table_name}: {e}")
        return None


def get_mapping_relationships(table_name, key_column, value_column):
    """
    Get distinct (subject_key, object_value) pairs from a mapping table.
    Returns set of tuples: {(subject_key, object_value), ...}
    """
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        cursor = conn.cursor()
        
        sql = f"""
            SELECT DISTINCT {key_column}, {value_column}
            FROM GLOBAL_DISTRIBUTE.{table_name}
            WHERE {key_column} IS NOT NULL AND {value_column} IS NOT NULL
        """
        cursor.execute(sql)
        relationships = set((str(row[0]), str(row[1])) for row in cursor.fetchall())
        
        cursor.close()
        conn.close()
        return relationships
    except Exception as e:
        print(f"[ERROR] Error fetching relationships from {table_name}: {e}")
        return set()


def get_ttl_predicate_object_counts(ttl_file_path, predicate_class_map, group_predicates=None):
    """
    Parse TTL file and count distinct objects for specific predicates,
    filtering by subject class.
    
    Handles both top-level predicates and predicates nested in blank node blocks:
    - Top-level: mt:hasSomething someprefix:123
    - Grouped:   mt:RelatedDrugNameGroup [
                     mt:hasRelatedDrugNameTypeId drugnametype:17 ;
                     mt:hasRelatedDrugSynonymId drugname:283965
                 ] ;
    
    Args:
        ttl_file_path: Path to TTL file
        predicate_class_map: Dict mapping predicate name -> SET of subject classes to filter by
                            e.g., {'hasLeafletId': {'Drug', 'DrugProduct'}}
                            A predicate can appear in multiple tables with different subject classes.
        group_predicates: Set of group predicate names that contain blank nodes (e.g., {'RelatedDrugNameGroup'})
    
    Returns:
        Tuple of:
        - Dict mapping (predicate, class) -> count of distinct object IDs
        - Dict mapping (predicate, class) -> set of object IDs (for debug)
    """
    if not predicate_class_map:
        return {}, {}
    
    if group_predicates is None:
        group_predicates = set()
    
    target_predicates = set(predicate_class_map.keys())
    
    print(f"[DEBUG] Parsing TTL for predicate object counts (filtered by subject class)")
    print(f"[DEBUG]   Target predicates with class filter:")
    for pred, classes in predicate_class_map.items():
        print(f"[DEBUG]     mt:{pred} -> on {classes} subjects")
    print(f"[DEBUG]   Group predicates (blank nodes): {group_predicates}")
    
    # Key by (predicate, class) tuple to handle same predicate on different subject classes
    predicate_objects = defaultdict(set)
    line_count = 0
    current_subject = None
    current_subject_class = None
    in_blank_node = False
    blank_node_depth = 0
    current_predicate = None  # Track current predicate for multi-value continuation lines
    
    # Pattern for subject line: drug:d01164 or <http://...>
    subject_pattern = re.compile(r'^(\w+):(\S+)')
    
    # Pattern for class declaration: a mt:Drug or a mt:Drug,
    class_pattern = re.compile(r'\s+a\s+mt:(\w+)')
    
    # Pattern for predicate with prefixed IRI object on same line (top-level or in blank node)
    # Matches: mt:hasRelatedDrugNameTypeId drugnametype:17
    predicate_iri_pattern = re.compile(r'mt:(\w+)\s+(\w+):([^\s;,\]]+)')
    
    # Pattern for predicate with string literal object
    # Matches: mt:OuterPackageMfgDrugProductCode "00002143380"
    # Also handles typed literals: "value"^^xsd:string
    predicate_literal_pattern = re.compile(r'mt:(\w+)\s+"([^"]*)"')
    
    # Pattern to detect start of blank node after a group predicate
    # Matches: mt:RelatedDrugNameGroup [
    group_start_pattern = re.compile(r'mt:(\w+)\s+\[')
    
    # Pattern for continuation values (no predicate, just prefix:value)
    # Matches indented lines like:     rxbuilder:d00045_17,
    continuation_iri_pattern = re.compile(r'^\s+(\w+):([^\s;,\]]+)')
    
    # Pattern for continuation string literal values
    # Matches indented lines like:     "some value",
    continuation_literal_pattern = re.compile(r'^\s+"([^"]*)"')
    
    try:
        with open(ttl_file_path, "r", encoding="utf-8") as f:
            for line in f:
                line_count += 1
                line = line.rstrip('\r\n')
                
                if line_count % 5000000 == 0:
                    print(f"[DEBUG] Processed {line_count:,} lines for predicate objects...")
                
                # Skip empty lines and directives
                if not line or line.startswith('@') or line.startswith('#'):
                    continue
                
                # Check for new subject (line starts with prefix:id, not indented)
                if not line.startswith(' ') and not line.startswith('\t'):
                    subject_match = subject_pattern.match(line)
                    if subject_match:
                        current_subject = subject_match.group(2)
                        current_subject_class = None  # Reset class, will be set when we see 'a mt:X'
                        in_blank_node = False
                        blank_node_depth = 0
                        current_predicate = None  # Reset predicate for new subject
                
                # Check for class declaration on this line
                class_match = class_pattern.search(line)
                if class_match and current_subject and not in_blank_node:
                    current_subject_class = class_match.group(1)
                
                # Track blank node depth
                open_brackets = line.count('[')
                close_brackets = line.count(']')
                
                # Check if we're entering a blank node via a group predicate
                if group_predicates:
                    group_match = group_start_pattern.search(line)
                    if group_match:
                        group_pred = group_match.group(1)
                        if group_pred in group_predicates:
                            in_blank_node = True
                            blank_node_depth = 1
                
                # Update blank node tracking
                if in_blank_node:
                    if not (group_predicates and group_start_pattern.search(line)):
                        blank_node_depth += open_brackets
                    blank_node_depth -= close_brackets
                    
                    if blank_node_depth <= 0:
                        in_blank_node = False
                        blank_node_depth = 0
                
                # Look for predicate matches - only if subject class matches filter
                # Try IRI pattern first, then literal pattern
                predicate_match = predicate_iri_pattern.search(line)
                literal_match = predicate_literal_pattern.search(line)
                
                matched_predicate = None
                matched_object = None
                match_end = 0
                
                if predicate_match:
                    matched_predicate = predicate_match.group(1)
                    matched_object = predicate_match.group(3)
                    match_end = predicate_match.end()
                elif literal_match:
                    matched_predicate = literal_match.group(1)
                    matched_object = literal_match.group(2)
                    match_end = literal_match.end()
                
                if matched_predicate:
                    # Track this predicate for continuation lines
                    if matched_predicate in target_predicates:
                        # Check if subject class is in the set of required classes for this predicate
                        required_classes = predicate_class_map.get(matched_predicate, set())
                        if current_subject_class in required_classes:
                            current_predicate = matched_predicate
                            # Key by (predicate, class) tuple
                            predicate_objects[(matched_predicate, current_subject_class)].add(matched_object)
                    else:
                        # Different predicate - clear tracking unless it's a non-target predicate
                        current_predicate = None
                    
                    # Check for additional values on same line after first match
                    # Line might have: mt:hasX prefix:a, prefix:b, prefix:c ;
                    # Or: mt:hasX "val1", "val2", "val3" ;
                    remaining = line[match_end:]
                    if current_predicate:
                        # Check for IRI continuations
                        for cont_match in continuation_iri_pattern.finditer(remaining):
                            object_id = cont_match.group(2)
                            predicate_objects[(current_predicate, current_subject_class)].add(object_id)
                        # Check for literal continuations
                        for cont_match in continuation_literal_pattern.finditer(remaining):
                            object_id = cont_match.group(1)
                            predicate_objects[(current_predicate, current_subject_class)].add(object_id)
                
                # Handle continuation lines (indented, no mt: predicate)
                elif current_predicate and (line.startswith(' ') or line.startswith('\t')):
                    # Check if this line has a new predicate (mt:something)
                    if 'mt:' in line:
                        # New predicate on this line - try both patterns
                        new_iri_match = predicate_iri_pattern.search(line)
                        new_literal_match = predicate_literal_pattern.search(line)
                        
                        new_predicate = None
                        new_object = None
                        
                        if new_iri_match:
                            new_predicate = new_iri_match.group(1)
                            new_object = new_iri_match.group(3)
                        elif new_literal_match:
                            new_predicate = new_literal_match.group(1)
                            new_object = new_literal_match.group(2)
                        
                        if new_predicate:
                            if new_predicate in target_predicates:
                                required_classes = predicate_class_map.get(new_predicate, set())
                                if current_subject_class in required_classes:
                                    current_predicate = new_predicate
                                    predicate_objects[(new_predicate, current_subject_class)].add(new_object)
                            else:
                                current_predicate = None
                    else:
                        # Continuation line - extract values for current predicate (both IRI and literal)
                        for cont_match in continuation_iri_pattern.finditer(line):
                            object_id = cont_match.group(2)
                            predicate_objects[(current_predicate, current_subject_class)].add(object_id)
                        for cont_match in continuation_literal_pattern.finditer(line):
                            object_id = cont_match.group(1)
                            predicate_objects[(current_predicate, current_subject_class)].add(object_id)
                
                # Check if predicate ends (semicolon = new predicate, period = end of subject)
                stripped = line.rstrip()
                if stripped.endswith(';'):
                    # Semicolon means next line has new predicate - clear current unless already handled above
                    if not matched_predicate:
                        current_predicate = None
                elif stripped.endswith('.'):
                    # Period means end of subject
                    current_predicate = None
        
        print(f"[DEBUG] Processed {line_count:,} lines")
        print(f"[DEBUG] Found distinct objects for predicates (by subject class):")
        for pred in sorted(target_predicates):
            for cls in sorted(predicate_class_map.get(pred, set())):
                count = len(predicate_objects.get((pred, cls), set()))
                print(f"  mt:{pred} (on {cls}): {count:,} distinct objects")
        
        # Return both counts and full sets (for debug mode)
        counts = {k: len(v) for k, v in predicate_objects.items()}
        return counts, predicate_objects
        
    except Exception as e:
        print(f"[ERROR] Error parsing TTL for predicate objects: {e}")
        import traceback
        traceback.print_exc()
        return {}, {}


def get_ttl_triples_for_predicates(ttl_file_path, target_predicates):
    """
    Parse TTL file and extract triples for specific predicates.
    
    Args:
        ttl_file_path: Path to TTL file
        target_predicates: Set of predicate names to look for (e.g., {'hasBeersTextKey', 'hasDrugCode'})
    
    Returns:
        Dict mapping predicate -> set of (subject_id, object_id) tuples
    """
    if not target_predicates:
        return {}
    
    print(f"[DEBUG] Parsing TTL for predicates: {target_predicates}")
    
    # Build regex pattern for target predicates
    # Matches: mt:hasBeersTextKey beerstext:2_13_1_6
    # Or: mt:hasBeersTextKey <http://...>
    predicate_pattern = r'mt:(\w+)\s+(\w+:[\S]+|<[^>]+>)'
    
    triples = defaultdict(set)
    current_subject = None
    line_count = 0
    
    subject_patterns = [
        (r'^(\w+):(\S+)$', 'prefixed subject'),
        (r'^<([^>]+)>$', 'full URI subject'),
    ]
    
    try:
        with open(ttl_file_path, "r", encoding="utf-8") as f:
            for line in f:
                line_count += 1
                line = line.rstrip('\r\n')
                
                if line_count % 5000000 == 0:
                    print(f"[DEBUG] Processed {line_count:,} lines for triples...")
                
                # Check for subject line
                for pattern, pattern_name in subject_patterns:
                    match = re.match(pattern, line)
                    if match:
                        if pattern_name == 'prefixed subject':
                            current_subject = match.group(2)
                        else:
                            current_subject = match.group(1).split('/')[-1]
                        break
                
                # Check for predicate matches
                for match in re.finditer(predicate_pattern, line):
                    predicate = match.group(1)
                    if predicate in target_predicates:
                        object_ref = match.group(2)
                        # Extract object ID from prefix:id or <URI>
                        if object_ref.startswith('<'):
                            object_id = object_ref[1:-1].split('/')[-1]
                        else:
                            # prefix:id format
                            object_id = object_ref.split(':')[1] if ':' in object_ref else object_ref
                        
                        if current_subject:
                            triples[predicate].add((current_subject, object_id))
        
        print(f"[DEBUG] Found triples for predicates:")
        for pred, trips in triples.items():
            print(f"  {pred}: {len(trips):,} triples")
        
        return triples
        
    except Exception as e:
        print(f"[ERROR] Error parsing TTL for triples: {e}")
        import traceback
        traceback.print_exc()
        return {}


def get_ttl_class_counts(ttl_file_path):
    """
    Parse the TTL file and count distinct instances of each class (fast).
    Returns dict mapping class name to count.
    """
    print(f"[DEBUG] Opening TTL file: {ttl_file_path}")
    class_counts = defaultdict(set)
    line_count = 0
    match_count = 0
    current_subject = None
    
    # In debug mode, get the target class from metadata
    target_class_filter = None
    if DEBUG_MODE and DEBUG_TABLE:
        # For mapping tables, we don't need class counts
        if is_mapping_table(DEBUG_TABLE):
            print(f"[DEBUG MODE] {DEBUG_TABLE} is a MAPPING table - skipping class counts")
            return {}
        
        # Query metadata to get the class name for this table
        try:
            conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
            cursor = conn.cursor()
            cursor.execute("""
                SELECT R2RML_CLASS FROM GLOBAL_DISTRIBUTE.UTIL_GRF_R2RML_COLUMN 
                WHERE TABLE_NAME = :table_name AND IS_KEY = 'Y'
            """, {'table_name': DEBUG_TABLE})
            row = cursor.fetchone()
            if row:
                target_class_filter = row[0]
                print(f"[DEBUG MODE] Only extracting class: {target_class_filter}")
            cursor.close()
            conn.close()
        except Exception as e:
            print(f"[WARNING] Could not get target class: {e}")
    
    # Pre-compile regex patterns for speed
    subject_patterns = [
        (re.compile(r'^(\w+):(\S+)$'), 'prefixed subject'),
        (re.compile(r'^<([^>]+)>$'), 'full URI subject'),
    ]
    
    type_patterns = [
        (re.compile(r'^(\w+):(\S+)\s+a\s+mt:(\w+)'), 'instance prefix notation', 3),
        (re.compile(r'^\s+a\s+mt:(\w+)'), 'type only (multi-line)', 1),
        (re.compile(r'<([^>]+)>\s+<http://www\.w3\.org/1999/02/22-rdf-syntax-ns#type>\s+<http://multum\.health\.oraclecloud\.com/[^#]+#(\w+)>'), 'full URI rdf:type', 2),
        (re.compile(r'<([^>]+)>\s+a\s+mt:(\w+)'), 'full URI with a', 2),
    ]
    
    try:
        with open(ttl_file_path, "r", encoding="utf-8") as f:
            for line in f:
                line_count += 1
                line = line.rstrip('\r\n')
                
                if line_count % 5000000 == 0:
                    print(f"[DEBUG] Processed {line_count:,} lines, found {match_count:,} matches...")
                
                # Quick skip for obviously irrelevant lines
                if not line or line.startswith('@') or line.startswith('#'):
                    continue
                
                # Always check for subject lines (they set current_subject for next line)
                if not line.startswith(' ') and not line.startswith('\t'):
                    for pattern, pattern_name in subject_patterns:
                        match = pattern.match(line)
                        if match:
                            current_subject = match.group(2) if pattern_name == 'prefixed subject' else match.group(1).split('/')[-1]
                            break
                
                # DEBUG MODE FAST PATH: Only process type declaration lines containing target class
                if target_class_filter:
                    # Must have type indicator AND target class name
                    if ' a ' not in line and 'rdf-syntax-ns#type' not in line:
                        continue
                    if target_class_filter not in line:
                        continue
                
                for pattern, pattern_name, class_group in type_patterns:
                    match = pattern.search(line)
                    if match:
                        if pattern_name == 'type only (multi-line)':
                            class_name = match.group(1)
                            subject_id = current_subject
                        elif pattern_name == 'instance prefix notation':
                            class_name = match.group(3)
                            subject_id = match.group(2)
                        else:
                            class_name = match.group(class_group)
                            subject_id = match.group(1).split('/')[-1] if '/' in match.group(1) else match.group(1)
                        
                        # In debug mode, only collect target class
                        if target_class_filter and class_name != target_class_filter:
                            current_subject = None
                            break
                        
                        if subject_id:
                            match_count += 1
                            class_counts[class_name].add(subject_id)
                        current_subject = None
                        break
            
            print(f"\n[DEBUG] Processed {line_count:,} lines, found {match_count:,} class declarations")
            
            # In debug mode, store the full subject sets for later file output
            if DEBUG_MODE:
                global _debug_ttl_subjects
                for k, v in class_counts.items():
                    _debug_ttl_subjects[k] = v
            
            return {k: len(v) for k, v in class_counts.items()}
            
    except Exception as e:
        print(f"[ERROR] Error reading TTL file: {e}")
        return {}


def get_ttl_class_subjects(ttl_file_path, target_classes):
    """
    Parse the TTL file and collect subject IDs for specific classes only.
    Called only for mismatched classes to get actual IDs for comparison.
    """
    if not target_classes:
        return {}
    
    print(f"\n[DEBUG] Fetching TTL subject IDs for {len(target_classes)} classes: {target_classes}")
    class_subjects = defaultdict(set)
    current_subject = None
    
    subject_patterns = [
        (r'^(\w+):(\S+)$', 'prefixed subject'),
        (r'^<([^>]+)>$', 'full URI subject'),
    ]
    
    type_patterns = [
        (r'^(\w+):(\S+)\s+a\s+mt:(\w+)', 'instance prefix notation', 3),
        (r'^\s+a\s+mt:(\w+)', 'type only (multi-line)', 1),
        (r'<([^>]+)>\s+<http://www\.w3\.org/1999/02/22-rdf-syntax-ns#type>\s+<http://multum\.health\.oraclecloud\.com/[^#]+#(\w+)>', 'full URI rdf:type', 2),
        (r'<([^>]+)>\s+a\s+mt:(\w+)', 'full URI with a', 2),
    ]
    
    try:
        with open(ttl_file_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.rstrip('\r\n')
                
                for pattern, pattern_name in subject_patterns:
                    match = re.match(pattern, line)
                    if match:
                        current_subject = match.group(2) if pattern_name == 'prefixed subject' else match.group(1).split('/')[-1]
                        break
                
                for pattern, pattern_name, class_group in type_patterns:
                    match = re.search(pattern, line)
                    if match:
                        if pattern_name == 'type only (multi-line)':
                            class_name = match.group(1)
                            subject_id = current_subject
                        elif pattern_name == 'instance prefix notation':
                            class_name = match.group(3)
                            subject_id = match.group(2)
                        else:
                            class_name = match.group(class_group)
                            subject_id = match.group(1).split('/')[-1] if '/' in match.group(1) else match.group(1)
                        
                        # Only collect for target classes
                        if class_name in target_classes and subject_id:
                            class_subjects[class_name].add(subject_id)
                        current_subject = None
                        break
        
        return class_subjects
            
    except Exception as e:
        print(f"[ERROR] Error reading TTL file: {e}")
        return {}


def generate_comparison_table(pk_counts, ttl_class_counts, mapping_counts, ttl_predicate_counts):
    """Generate a comparison table between DB counts and TTL counts."""
    comparison_table = []
    
    # Add regular tables (PK counts)
    for pk_count in pk_counts:
        class_name = pk_count["CLASS_NAME"]
        table_name = pk_count["TABLE_NAME"]
        
        comparison_table.append({
            "TABLE": table_name,
            "COLUMN": pk_count["COLUMN_NAME"],
            "CLASS": class_name,
            "DB_COUNT": pk_count["DISTINCT_COUNT"],
            "TTL_COUNT": ttl_class_counts.get(class_name, 0),
            "IS_MAPPING": False,
            "PREDICATE": None
        })
    
    # Add mapping tables (non-PK column counts)
    for map_count in mapping_counts:
        predicate = map_count["PREDICATE"]
        # Strip "mt:" prefix for lookup (TTL counts keyed without prefix)
        predicate_key = predicate[3:] if predicate.startswith("mt:") else predicate
        subject_class = map_count["SUBJECT_CLASS"]
        
        # Lookup using (predicate, subject_class) tuple - each class is evaluated separately
        ttl_count_key = (predicate_key, subject_class)
        
        comparison_table.append({
            "TABLE": map_count["TABLE_NAME"],
            "COLUMN": map_count["COLUMN_NAME"],
            "CLASS": map_count["OBJECT_CLASS"],
            "SUBJECT_CLASS": subject_class,
            "DB_COUNT": map_count["DISTINCT_COUNT"],
            "TTL_COUNT": ttl_predicate_counts.get(ttl_count_key, 0),
            "IS_MAPPING": True,
            "PREDICATE": predicate_key  # Store without prefix
        })
    
    return comparison_table


def print_comparison_table(comparison_table, ttl_predicate_objects=None):
    """Print the comparison table and return lists of issues.
    
    Status logic (DB is source of truth):
    - MISSING: TTL has 0 values (definitely a problem)
    - SHORTAGE: TTL has fewer values than DB (some DB values must be missing)
    - EXCESS: TTL has more values than DB (extra subjects in TTL not in DB)
    - OK: TTL count exactly matches DB count
    """
    # Separate regular and mapping tables
    regular_rows = [r for r in comparison_table if not r["IS_MAPPING"]]
    mapping_rows = [r for r in comparison_table if r["IS_MAPPING"]]
    
    ok_count = 0
    shortage_count = 0
    missing_count = 0
    excess_count = 0
    regular_mismatches = []
    mapping_mismatches = []
    
    # ==================== REGULAR TABLES ====================
    if regular_rows:
        print("\n" + "="*120)
        print("REGULAR TABLE RESULTS (DB is source of truth)")
        print("="*120)
        print("{:<40} {:<35} {:<20} {:<12} {:<12}".format(
            "TABLE", "COLUMN", "CLASS", "DB Count", "TTL Count"
        ))
        print("-" * 120)
        
        for row in regular_rows:
            db_count = row["DB_COUNT"]
            ttl_count = row["TTL_COUNT"]
            
            # Determine status (DB is source of truth)
            if ttl_count == 0:
                status = " [MISSING]"
                missing_count += 1
                regular_mismatches.append(row)
            elif ttl_count < db_count:
                status = " [SHORTAGE]"
                shortage_count += 1
                regular_mismatches.append(row)
            elif ttl_count > db_count:
                status = " [EXCESS]"
                excess_count += 1
                regular_mismatches.append(row)
            else:
                status = " [OK]"
                ok_count += 1
            
            print("{:<40} {:<35} {:<20} {:<12} {:<12}{}".format(
                row["TABLE"], row["COLUMN"], row["CLASS"],
                db_count, ttl_count, status
            ))
        
        print("="*120)
    
    # ==================== MAPPING TABLES ====================
    if mapping_rows:
        print("\n" + "="*180)
        print("MAPPING TABLE RESULTS (DB is source of truth)")
        print("="*180)
        print("{:<40} {:<35} {:<35} {:<15} {:<12} {:<12} {:<20}".format(
            "TABLE", "COLUMN", "PREDICATE", "SUBJ CLASS", "DB Count", "TTL Count", "Example Missing"
        ))
        print("-" * 180)
        
        for row in mapping_rows:
            db_count = row["DB_COUNT"]
            ttl_count = row["TTL_COUNT"]
            subject_class = row.get("SUBJECT_CLASS", "-")
            predicate = row["PREDICATE"]
            example_missing = "N/A"
            
            # Determine status (DB is source of truth)
            if ttl_count == 0:
                status = " [MISSING]"
                missing_count += 1
                mapping_mismatches.append(row)
                # Get one example from DB
                example_missing = get_one_missing_example(row["TABLE"], row["COLUMN"], predicate, subject_class, ttl_predicate_objects)
            elif ttl_count < db_count:
                status = " [SHORTAGE]"
                shortage_count += 1
                mapping_mismatches.append(row)
                # Get one example that's in DB but not TTL
                example_missing = get_one_missing_example(row["TABLE"], row["COLUMN"], predicate, subject_class, ttl_predicate_objects)
            elif ttl_count > db_count:
                status = " [EXCESS]"
                excess_count += 1
                mapping_mismatches.append(row)
            else:
                status = " [OK]"
                ok_count += 1
            
            print("{:<40} {:<35} {:<35} {:<15} {:<12} {:<12} {:<20}{}".format(
                row["TABLE"], row["COLUMN"], f"mt:{predicate}", subject_class,
                db_count, ttl_count, example_missing, status
            ))
        
        print("="*180)
    
    # ==================== SUMMARY ====================
    print(f"\nSUMMARY: {ok_count} OK | {shortage_count} SHORTAGE | {missing_count} MISSING | {excess_count} EXCESS")
    print("="*80)
    
    return regular_mismatches, mapping_mismatches


def get_one_missing_example(table_name, column_name, predicate, subject_class, ttl_predicate_objects):
    """Get one example value that's in DB but not in TTL."""
    try:
        conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
        cursor = conn.cursor()
        
        # Get TTL values for this (predicate, subject_class) combination
        ttl_key = (predicate, subject_class)
        ttl_values = ttl_predicate_objects.get(ttl_key, set()) if ttl_predicate_objects else set()
        
        # Query DB values
        sql = f"SELECT DISTINCT {column_name} FROM GLOBAL_DISTRIBUTE.{table_name} WHERE {column_name} IS NOT NULL ORDER BY {column_name}"
        cursor.execute(sql)
        
        # Find first value not in TTL
        for row in cursor:
            db_val = str(row[0])
            if db_val not in ttl_values:
                cursor.close()
                conn.close()
                return db_val
        
        cursor.close()
        conn.close()
        return "N/A"
    except Exception as e:
        return f"ERR"
    
    return regular_mismatches, mapping_mismatches


def print_mismatch_details(mismatches, ttl_class_subjects, sample_count=3):
    """Print detailed differences for regular (non-mapping) mismatched classes.
    Only checks that all DB values exist in TTL (DB is source of truth).
    """
    if not mismatches:
        return
    
    print("\n" + "="*120)
    print(f"REGULAR TABLE MISMATCH DETAILS (showing up to {sample_count} sample IDs)")
    print("(Checking: All DB values must exist in TTL)")
    print("="*120)
    
    for row in mismatches:
        table_name = row['TABLE']
        column_name = row['COLUMN']
        class_name = row['CLASS']
        
        print(f"\n{table_name} / {class_name}:")
        print(f"  DB Count: {row['DB_COUNT']:,}  |  TTL Count: {row['TTL_COUNT']:,}")
        
        # Fetch DB PKs for this specific table
        print(f"  [Fetching DB PKs for comparison...]")
        db_pks = get_db_pk_values_for_table(table_name, column_name)
        ttl_subjects = ttl_class_subjects.get(class_name, set())
        
        # Only check DB values missing from TTL (DB is source of truth)
        in_db_not_ttl = db_pks - ttl_subjects
        
        if in_db_not_ttl:
            samples = sorted(list(in_db_not_ttl))[:sample_count]
            print(f"  ✗ MISSING from TTL ({len(in_db_not_ttl):,} DB values not in TTL):")
            for s in samples:
                print(f"    - {s}")
            if len(in_db_not_ttl) > sample_count:
                print(f"    ... and {len(in_db_not_ttl) - sample_count:,} more")
        else:
            print(f"  ✓ OK: All {len(db_pks):,} DB values found in TTL")
    
    print("\n" + "="*120)


def print_mapping_mismatch_details(mismatches, ttl_predicate_objects, sample_count=3):
    """Print detailed differences for mapping table mismatches.
    Only checks that all DB values exist in TTL (DB is source of truth).
    """
    if not mismatches:
        return
    
    print("\n" + "="*120)
    print(f"MAPPING TABLE MISMATCH DETAILS (showing up to {sample_count} sample values)")
    print("(Checking: All DB values must exist in TTL)")
    print("="*120)
    
    for row in mismatches:
        table_name = row['TABLE']
        column_name = row['COLUMN']
        predicate = row['PREDICATE']
        
        print(f"\n{table_name}.{column_name} (mt:{predicate}):")
        print(f"  DB Count: {row['DB_COUNT']:,}  |  TTL Count: {row['TTL_COUNT']:,}")
        
        # Fetch DB values for this column
        print(f"  [Fetching DB values for comparison...]")
        try:
            conn = oracledb.connect(user=oracle_user, password=oracle_password, dsn=dsn,
                           config_dir=str(wallet_path), wallet_location=str(wallet_path),
                           wallet_password=wallet_password)
            cursor = conn.cursor()
            sql = f"SELECT DISTINCT {column_name} FROM GLOBAL_DISTRIBUTE.{table_name} WHERE {column_name} IS NOT NULL"
            cursor.execute(sql)
            db_values = set(str(row[0]) for row in cursor.fetchall())
            cursor.close()
            conn.close()
        except Exception as e:
            print(f"  [ERROR] Could not fetch DB values: {e}")
            continue
        
        ttl_objects = ttl_predicate_objects.get(predicate, set())
        
        # Only check DB values missing from TTL (DB is source of truth)
        in_db_not_ttl = db_values - ttl_objects
        
        if in_db_not_ttl:
            samples = sorted(list(in_db_not_ttl))[:sample_count]
            print(f"  ✗ MISSING from TTL ({len(in_db_not_ttl):,} DB values not in TTL):")
            for s in samples:
                print(f"    - {s}")
            if len(in_db_not_ttl) > sample_count:
                print(f"    ... and {len(in_db_not_ttl) - sample_count:,} more")
        else:
            print(f"  ✓ OK: All {len(db_values):,} DB values found in TTL")
    
    print("\n" + "="*120)


def main():
    print("="*140)
    if DEBUG_MODE:
        print(f"TTL VALIDATION SCRIPT - V8 DEBUG MODE (Table: {DEBUG_TABLE})")
    else:
        print("TTL VALIDATION SCRIPT - V8 (Handles both IRI and string literal objects)")
    print("="*140)
    
    # Phase 1: Get counts
    print("\n--- PHASE 1: Count Comparison ---")
    
    # Get regular table PK counts
    pk_counts = get_db_pk_counts()
    
    # Get mapping table non-PK column counts
    mapping_counts = get_db_mapping_counts()
    
    # Get TTL class counts (for regular tables)
    ttl_class_counts = get_ttl_class_counts(ttl_file_path)
    
    # Get TTL predicate object counts (for mapping tables)
    ttl_predicate_counts = {}
    ttl_predicate_objects = {}
    if mapping_counts:
        # Build predicate -> set of subject classes map for filtering
        # A predicate can appear in multiple tables with different subject classes
        # Strip "mt:" prefix from predicates if present (for regex matching)
        predicate_class_map = defaultdict(set)
        for m in mapping_counts:
            pred = m["PREDICATE"]
            if pred.startswith("mt:"):
                pred = pred[3:]
            predicate_class_map[pred].add(m["SUBJECT_CLASS"])
        
        # Convert to regular dict for passing to function
        predicate_class_map = dict(predicate_class_map)
        
        # Extract group predicates (strip "mt:" prefix if present)
        group_predicates = set()
        for m in mapping_counts:
            if m.get("GROUP_PREDICATE"):
                gp = m["GROUP_PREDICATE"]
                # Strip "mt:" prefix if present
                if gp.startswith("mt:"):
                    gp = gp[3:]
                group_predicates.add(gp)
        
        ttl_predicate_counts, ttl_predicate_objects = get_ttl_predicate_object_counts(
            ttl_file_path, predicate_class_map, group_predicates
        )
    
    print(f"\n[DEBUG] pk_counts has {len(pk_counts)} entries (regular tables)")
    print(f"[DEBUG] mapping_counts has {len(mapping_counts)} entries (mapping table columns)")
    print(f"[DEBUG] ttl_class_counts has {len(ttl_class_counts)} entries")
    print(f"[DEBUG] ttl_predicate_counts has {len(ttl_predicate_counts)} entries")
    
    if not pk_counts and not mapping_counts:
        print("\n[ERROR] No database counts found!")
        return
    
    if not ttl_class_counts and not ttl_predicate_counts:
        print("\n[WARNING] No TTL counts found - file may be empty or incorrectly formatted")
    
    comparison_table = generate_comparison_table(pk_counts, ttl_class_counts, mapping_counts, ttl_predicate_counts)
    
    # In debug mode, write TTL output files
    if DEBUG_MODE and DEBUG_OUTPUT_DIR:
        # Write class subjects for regular tables
        if pk_counts:
            target_class = pk_counts[0]['CLASS_NAME']
            subjects = _debug_ttl_subjects.get(target_class, set())
            output_file = os.path.join(DEBUG_OUTPUT_DIR, f"ttl_subjects_{target_class}.txt")
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(f"# Class: {target_class}\n")
                f.write(f"# Distinct subjects: {len(subjects)}\n")
                f.write("#" + "="*60 + "\n")
                for subj in sorted(subjects):
                    f.write(f"{subj}\n")
            print(f"[DEBUG] Wrote {len(subjects)} TTL subjects to: {output_file}")
        
        # Write predicate objects for mapping tables
        for (predicate, subject_class), objects in ttl_predicate_objects.items():
            output_file = os.path.join(DEBUG_OUTPUT_DIR, f"ttl_predicate_objects_{predicate}_{subject_class}.txt")
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(f"# Predicate: mt:{predicate}\n")
                f.write(f"# Subject Class: {subject_class}\n")
                f.write(f"# Distinct objects: {len(objects)}\n")
                f.write("#" + "="*60 + "\n")
                for obj in sorted(objects):
                    f.write(f"{obj}\n")
            print(f"[DEBUG] Wrote {len(objects)} TTL predicate objects to: {output_file}")
    
    regular_mismatches, mapping_mismatches = print_comparison_table(comparison_table, ttl_predicate_objects)
    
    # Phase 2: Get details for regular table mismatches (mapping tables already have Example column)
    if regular_mismatches:
        print("\n--- Regular Table Mismatch Details ---")
        target_classes = set(m['CLASS'] for m in regular_mismatches)
        ttl_class_subjects = get_ttl_class_subjects(ttl_file_path, target_classes)
        print_mismatch_details(regular_mismatches, ttl_class_subjects)
    
    # Debug mode summary
    if DEBUG_MODE and DEBUG_OUTPUT_DIR:
        print("\n" + "="*140)
        print("DEBUG OUTPUT FILES:")
        print("="*140)
        db_files = glob.glob(os.path.join(DEBUG_OUTPUT_DIR, "db_*.txt"))
        ttl_files = glob.glob(os.path.join(DEBUG_OUTPUT_DIR, "ttl_*.txt"))
        for f in sorted(db_files + ttl_files):
            print(f"  {f}")
        print("\nCompare files with: diff or your preferred diff tool")
        print("="*140)


if __name__ == "__main__":
    main()
