#!/usr/bin/env python3
import cv2
import numpy as np
from PIL import Image
import pytesseract

# Load cell 12
cell = cv2.imread('page3_output/cells/page003_cell12.png')

# Extract profession region coordinates (from debug output: 84x46)
# Position based on template matching
prof_x, prof_y = 120, 189  # Approximate from the annotated image
prof_w, prof_h = 84, 46

# Extract region
region = cell[prof_y:prof_y+prof_h, prof_x:prof_x+prof_w]

# Save original
cv2.imwrite('profession_original.png', region)

# Test different methods
print("Testing profession field OCR:")
print("="*60)

# 1. Original color
print("\n1. ORIGINAL (color):")
text = pytesseract.image_to_string(Image.fromarray(region), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 2. Grayscale
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
cv2.imwrite('profession_gray.png', gray)
print("\n2. GRAYSCALE:")
text = pytesseract.image_to_string(Image.fromarray(gray), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 3. Binary (current method)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imwrite('profession_binary.png', binary)
print("\n3. BINARY THRESHOLD (CURRENT):")
text = pytesseract.image_to_string(Image.fromarray(binary), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 4. Binary inverted
_, binary_inv = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
cv2.imwrite('profession_binary_inv.png', binary_inv)
print("\n4. BINARY INVERTED:")
text = pytesseract.image_to_string(Image.fromarray(binary_inv), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 5. Adaptive threshold
adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite('profession_adaptive.png', adaptive)
print("\n5. ADAPTIVE THRESHOLD:")
text = pytesseract.image_to_string(Image.fromarray(adaptive), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 6. Scale up first, then process
scaled = cv2.resize(region, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
cv2.imwrite('profession_scaled.png', scaled)
print("\n6. SCALED 2x (no threshold):")
text = pytesseract.image_to_string(Image.fromarray(scaled), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")

# 7. Scale + threshold
scaled_gray = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
_, scaled_binary = cv2.threshold(scaled_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imwrite('profession_scaled_binary.png', scaled_binary)
print("\n7. SCALED 2x + BINARY:")
text = pytesseract.image_to_string(Image.fromarray(scaled_binary), lang='ben+eng', config=r'--oem 3 --psm 7')
print(f"   '{text.strip()}'")