#!/usr/bin/env python3
import cv2
import numpy as np
from PIL import Image
import pytesseract
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from workflow.cells_to_csv import CellsToCSVExtractor

# Load cell 12
cell_path = 'page3_output/cells/page003_cell12.png'
cell_color = cv2.imread(cell_path)
cell_gray = cv2.cvtColor(cell_color, cv2.COLOR_BGR2GRAY)
cell_h, cell_w = cell_color.shape[:2]

# Use extractor to find templates
extractor = CellsToCSVExtractor()
positions = {}
for template_name, template in extractor.templates.items():
    match = extractor.find_template(cell_gray, template)
    if match:
        positions[template_name] = match

# Extract profession region using actual logic
if 'profession' in positions:
    prof_x, prof_y, prof_w, prof_h, score = positions['profession']

    if 'dob' in positions:
        dob_x, dob_y, dob_w, dob_h, dob_score = positions['dob']
        value_w = dob_x - (prof_x + prof_w) - extractor.H_GAP
    else:
        value_w = cell_w - (prof_x + prof_w) - 5

    value_x = prof_x + prof_w + extractor.H_GAP
    value_y = prof_y - extractor.V_PAD_TOP
    value_h = prof_h + extractor.V_PAD_TOP + extractor.V_PAD_BOTTOM

    print(f"Profession region: x={value_x}, y={value_y}, w={value_w}, h={value_h}")

    # Extract region
    region = cell_color[value_y:value_y+value_h, value_x:value_x+value_w]

    # Save for inspection
    cv2.imwrite('prof_correct_region.png', region)

    print("\nTesting profession field OCR (CORRECT REGION):")
    print("="*60)

    # 1. Original color
    print("\n1. ORIGINAL (color):")
    text = pytesseract.image_to_string(Image.fromarray(region), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 2. Grayscale
    gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
    print("\n2. GRAYSCALE:")
    text = pytesseract.image_to_string(Image.fromarray(gray), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 3. Binary (current method)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    cv2.imwrite('prof_correct_binary.png', binary)
    print("\n3. BINARY THRESHOLD (CURRENT):")
    text = pytesseract.image_to_string(Image.fromarray(binary), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 4. Adaptive
    adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    cv2.imwrite('prof_correct_adaptive.png', adaptive)
    print("\n4. ADAPTIVE THRESHOLD:")
    text = pytesseract.image_to_string(Image.fromarray(adaptive), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 5. Scale 2x
    scaled = cv2.resize(region, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    print("\n5. SCALED 2x:")
    text = pytesseract.image_to_string(Image.fromarray(scaled), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 6. Denoise + binary
    denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    _, binary2 = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    cv2.imwrite('prof_correct_denoise_binary.png', binary2)
    print("\n6. DENOISED + BINARY:")
    text = pytesseract.image_to_string(Image.fromarray(binary2), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")

    # 7. Try with different threshold value
    _, binary_manual = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    cv2.imwrite('prof_correct_binary_manual.png', binary_manual)
    print("\n7. BINARY (manual threshold=127):")
    text = pytesseract.image_to_string(Image.fromarray(binary_manual), lang='ben+eng', config=r'--oem 3 --psm 7')
    print(f"   '{text.strip()}'")