
    ui                     H    d Z ddlZddlZddlmZ ddlZ G d d          ZdS )zO
Module: PDF to Cells Extractor
Extracts individual voter cells from PDF pages
    N)convert_from_pathc                   &    e Zd Zd Zd Zd Zd ZdS )PDFToCellsExtractorc                     || _         d S )N)pdf_path)selfr   s     K/var/www/development/aibuddy-work/election-extract/workflow/pdf_to_cells.py__init__zPDFToCellsExtractor.__init__   s         c           	         t          j        |t           j                  }t          j        t           j        d          }t          j        t          j        |ddt           j        t           j        z             d         t           j	        |d          }t          j        t           j        d          }t          j        t          j        |ddt           j        t           j        z             d         t           j	        |d          }t          j
        ||          }t          j        |t           j        t           j                  \  }}	g }
|j        dd         \  }}||z  d	z  }|D ]x}t          j        |          }||k    r\t          j        |          \  }}}}|dk    rt#          |          |z  nd}d
|cxk     rdk     rn _|
                    ||||f           yt'          |
d           }
|
S )z1Detect voter cells from page using grid detection)P      r      r      )
iterations)r   r   Ng{Gz?g333333?g      @c                 "    | d         | d         fS )Nr   r    )cs    r	   <lambda>z7PDFToCellsExtractor.detect_grid_cells.<locals>.<lambda>9   s    QqT1Q4L r   )key)cv2cvtColorCOLOR_BGR2GRAYgetStructuringElement
MORPH_RECTmorphologyEx	thresholdTHRESH_BINARY_INVTHRESH_OTSU
MORPH_OPENaddfindContours	RETR_TREECHAIN_APPROX_SIMPLEshapecontourAreaboundingRectfloatappendsorted)r   
page_imagegrayhorizontal_kernelhorizontal_linesvertical_kernelvertical_linesgridcontours_cellspage_hpage_wmin_areacntareaxywhaspects                        r	   detect_grid_cellsz%PDFToCellsExtractor.detect_grid_cells   s   |J(:;;  5cngNN+M$3(=(OPPQRSN-!
 
 
 3CNGLL)M$3(=(OPPQRSNO
 
 
 w'88 &tS]C<STT! #)"1"-VOt+ 	/ 	/C?3''Dh -c22
1a)*QqAA%%%%#%%%%%LL!Q1... u"8"8999r   c                    t          j        |d           t          | j        d||          }|sdS |d         }t	          j        |          }t          j        |t          j                  }| 	                    |          }t          |d          D ]@\  }\  }	}
}}||
|
|z   |	|	|z   f         }| d|dd	|d
d}t          j        ||           At          |          S )z Extract cells from a single pageTexist_oki,  )dpi
first_page	last_pager   r   z/page03d_cell02dz.png)osmakedirsr   r   nparrayr   r   COLOR_RGB2BGRr?   	enumerateimwritelen)r   page_num
output_dirpagespage_pilpage_nppage_bgrr4   idxr:   r;   r<   r=   cell_imgoutput_paths                  r	   extract_pagez PDFToCellsExtractor.extract_page=   s   
J.... "M	
 
 
  	1 8(8$$<):;; &&x00 "+5!!4!4 	/ 	/C!Q1!A#q1u-H'NNhNNNNNNNKKX....5zzr   c                     t          j        |d           d}t          ||dz             D ]G}t          d| d| dd	           |                     ||          }||z  }t          | d
           H|S )z!Extract cells from multiple pagesTrA   r   r   z  Page /z... )endz cells extracted)rI   rJ   rangeprintrZ   )r   
start_pageend_pagerR   total_cellsrQ   	num_cellss          r	   extract_pagesz!PDFToCellsExtractor.extract_pages\   s    
J....j(Q,77 	2 	2H4H44x444#>>>>))(J??I9$KY0001111r   N)__name__
__module____qualname__r
   r?   rZ   re   r   r   r	   r   r      sQ        ! ! !) ) )V  >    r   r   )__doc__r   numpyrK   	pdf2imager   rI   r   r   r   r	   <module>rl      s}    
 


     ' ' ' ' ' ' 				\ \ \ \ \ \ \ \ \ \r   