
    wi/                         d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlZ	ddl
mZmZmZ ddlmZ  G d de          Z G d	 d
          ZdS )    N)Enum)ListTuple   )TextRecOutputWordInfoWordType)quads_to_rect_bboxc                       e Zd ZdZdZdS )	Directionhorizontal_directvertical_directN)__name__
__module____qualname__
HORIZONTALVERTICAL     s/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/rapidocr/cal_rec_boxes/main.pyr   r      s        $J HHHr   r   c                   p   e Zd ZdZ	 d&deej                 dej        dededef
dZ	e
d	ej        defd
            Z	 d&dedej        dededeee         eeee                           ee         f         f
dZdeee                  dededeeeeef         deeee                           f
dZe
dee         dededeeeeef         deeee                           f
d            Ze
dee         dedefd            Ze
dee         dedededef
d            Ze
deeee                           deeee                           fd            Zdej        deeee                           d edeeee                           fd!Ze
d"             Ze
d#eee                  deee                  fd$            Zd%S )'CalRecBoxesu   计算识别文字的汉字单字和英文单词的坐标框。
    代码借鉴自PaddlePaddle/PaddleOCR和fanqie03/char-detectionFimgsdt_boxesrec_resreturn_single_char_boxreturnc           
         g }t          t          ||                    D ]\  }\  }}|j        |j        dk    s|j        #|j        d d         \  }	}
t          j        ddg|
dg|
|	gd|	gg          }|                     |j        |         ||j        |         |          \  }}}| 	                    t          j        |                    }|                     |          }|                     t          j        |          ||          }|                    t          t          |||                               t!          |          |_        |S )Nr   r   )	enumerateziptxtssizeword_resultsshapenparraycal_ocr_word_boxadjust_box_overlapcopydeepcopyget_box_directionreverse_rotate_crop_imageappendlisttuple)selfr   r   r   r   r#   idximgboxhwimg_boxword_box_content_listword_box_list	conf_list	directions                   r   __call__zCalRecBoxes.__call__   sh    (T8)<)<== 	 	OC#s|#sx1}}8L8T9RaR=DAqhAAAA?@@G>B>S>SS!$S)&	? ?;!=) !33DM-4P4PQQM..s33I ::c""M9 M S.	=IIJJ     %\22r   r3   c           
         t          t          j                            | d         | d         z
                      t          t          j                            | d         | d         z
                      t          t          j                            | d         | d         z
                      t          t          j                            | d         | d         z
                      g}t	          |d         |d                   }t	          |d         |d                   }|dk     rt
          j        S t          ||z  d          }|dk    rt
          j        nt
          j        S )Nr      r      gư>g      ?)	floatr%   linalgnormmaxr   r   roundr   )r3   edge_lengthswidthheightaspect_ratios        r   r+   zCalRecBoxes.get_box_direction9   s    ")..Q#a&1122")..Q#a&1122")..Q#a&1122")..Q#a&1122	
 LO\!_55\!_l1o664<<%%Ve^Q//%1S%8%8y!!i>RRr   rec_txtbbox	word_infoc                 @   |r|j         dk    rg g g fS t          |d                   }|d         |d         z
  |j         z  }t          d |j        D                       }g g g }
}	}t	          |j        |j                  D ]\  }}|r@|s>|                    |           |
                    d                    |                     n*|	                    |           |
	                    |           t          |          dk    r|                     ||          }|	                    |           |                     |	|d         |d         t          |                    }|r|s|                     ||||          }n|                     ||||          }|
||j        fS )u  Calculate the detection frame for each word based on the results of recognition and detection of ocr
        汉字坐标是单字的
        英语坐标是单词级别的
        三种情况：
        1. 全是汉字
        2. 全是英文
        3. 中英混合
        r   )N.r   c              3   2   K   | ]}|t           j        u V  d S N)r	   EN_NUM).0vs     r   	<genexpr>z/CalRecBoxes.cal_ocr_word_box.<locals>.<genexpr>a   s)      OOQA0OOOOOOr    r=   )line_txt_lenr
   all
word_typesr    words	word_colsr-   joinextendlencalc_avg_char_widthcalc_all_char_avg_widthcalc_en_num_boxcalc_boxconfs)r0   rH   rI   rJ   r   bbox_pointsavg_col_widthis_all_en_num	line_colschar_widthsword_contentswordword_col	avg_widthavg_char_width
word_boxess                   r   r'   zCalRecBoxes.cal_ocr_word_boxL   s     	)0A55r2:(i99$Q+a.8I<RROO):NOOOOO02B;	!)/93FGG 	* 	*ND( +%; +  ***$$RWWT]]3333  ***$$T***8}}!!00=IIIy))))55QQW
 
  	!7 	-->=+ JJ >=+ J j)/99r   rc   ri   ra   r`   c                     g }|D ]a}|                      ||||          }t          t          j        |                    \  }}	}
}|                    ||	g|
|	g|
|g||gg           b|S rM   )r^   r
   r%   r&   r-   )r0   rc   ri   ra   r`   resultsone_colcur_word_cellx0y0x1y1s               r   r]   zCalRecBoxes.calc_en_num_box   s       	E 	EG MM M 00G0GHHNBBNNRHr2hR2r(CDDDDr   c                 6   |\  }}}}g }| D ]|}	|	dz   |z  }
t          t          |
|dz  z
            d          |z   }t          t          |
|dz  z             ||z
            |z   }||g||g||g||gg}|                    |           }t	          |d           S )Ng      ?r   r   c                     | d         d         S )Nr   r   )xs    r   <lambda>z&CalRecBoxes.calc_box.<locals>.<lambda>   s    QqT!W r   )key)rB   intminr-   sorted)rc   ri   ra   r`   ro   rp   rq   rr   rl   col_idxcenter_xchar_x0char_x1cells                 r   r^   zCalRecBoxes.calc_box   s     %BB  	! 	!G#6H #h!);;<<a@@2EG#h!);;<<b2gFFKG""""	D NN4    g#4#45555r   rg   each_col_widthc                 T    | d         | d         z
  |z  }|t          |           dz
  z  S )Nr   r=   )rZ   )rg   r   char_total_lengths      r   r[   zCalRecBoxes.calc_avg_char_width   s0    %b\HQK7>I CMMA$566r   
width_listbbox_x0bbox_x1txt_lenc                     |dk    rdS t          |           dk    rt          |           t          |           z  S ||z
  |z  S )Nr   g        )rZ   sum)r   r   r   r   s       r   r\   z#CalRecBoxes.calc_all_char_avg_width   sJ     a<<3z??Qz??S__44'!W,,r   r8   c                    t          t          |           dz
            D ]}| |         | |dz            }}|d         d         |d         d         k    rt          |d         d         |d         d         z
            }|d         dxx         |dz  z  cc<   |d         dxx         |dz  z  cc<   |d         dxx         ||dz  z
  z  cc<   |d         dxx         ||dz  z
  z  cc<   | S )Nr=   r   r   r>   )rangerZ   abs)r8   icurnxtdistances        r   r(   zCalRecBoxes.adjust_box_overlap   s   
 s=))A-.. 	5 	5A$Q'q1u)=C1vay3q6!9$$s1vay3q6!9455Aq			X\)			Aq			X\)			Aq			X144			Aq			X144			r   word_points_listr:   c           	         t          t          j        |dddf                             }t          t          j        |dddf                             }|dddf         |z
  |dddf<   |dddf         |z
  |dddf<   t          t          j                            |d         |d         z
                      }t          t          j                            |d         |d         z
                      }t          j        ddg|dg||gd|gg                              t          j                  }t          j	        ||          }	t          j
        |	          \  }
}g }|D ]}g }|D ]}|}|t          j        k    rE|                     t          j        d          |d         |d         dd          }|d         |z   |d<   t          j        |dgz             }t          j        ||          \  }}}||z  ||z  g}t          |d         |z             t          |d         |z             g}|                    |           |                     |          }|                    |           |S )u  
        get_rotate_crop_image的逆操作
        img为原图
        part_img为crop后的图
        bbox_points为part_img中对应在原图的bbox, 四个点，左上，右上，右下，左下
        part_points为在part_img中的点[(x, y), (x, y)]
        Nr   r=   r>   i)rx   r%   ry   r@   rA   r&   astypefloat32cv2getPerspectiveTransforminvertr   r   s_rotatemathradiansdotr-   order_points)r0   r`   r   r:   lefttopimg_crop_widthimg_crop_heightpts_stdM_IMnew_word_points_listword_pointsnew_word_pointspoint	new_pointpru   yzs                        r   r,   z%CalRecBoxes.reverse_rotate_crop_image   s    26+aaad+,,--"&QQQT*++,,'1-4AAAqD'1-3AAAqDRY^^KN[^,KLLMMbinn[^k!n-LMMNN(A#1O$	
 
 &

 	 'W==
12!+ 	9 	9K O$ 2 2!		 222 $S))9Q<1q!! !I $-Q<.#@IaLHY!_--&Q--1aUAEN	 1!455s9Q<#;M7N7NO	&&y1111"//@@O ''8888##r   c                 :   t          j        |          }t          j        |          }||z
  t          j        |           z  ||z
  t          j        |           z  z   |z   }||z
  t          j        |           z  ||z
  t          j        |           z  z
  |z   }||gS )uk   绕pointx,pointy顺时针旋转
        https://blog.csdn.net/qq_38826019/article/details/84233397
        )r%   r&   r   cossin)anglevaluexvalueypointxpointysRotatexsRotateys          r   r   zCalRecBoxes.s_rotate   s    
 &!!&!!f_/$(5//12 	 f_/$(5//12 	
 (##r   ori_boxc           
      T   d }t          j        |                               d          }t          j        |dddf                   t          j        |dddf                   }}t          j        |dddf         |k              r5t          j        |dddf         |k              r|t          j        |dddf         t          j        |dddf                   k                       }|t          j        |dddf         t          j        |dddf                   k                       }|t          j        |dddf         t          j        |dddf                   k                       }|t          j        |dddf         t          j        |dddf                   k                       }nt          j        |dddf         |k              rXt          j	        |dddf                   }	||	d                  }||	d                  }||	d                  }||	d                  }nt          j        |dddf         |k              rt          j        |dddf         |k              rb|t          j        |dddf         |k                        |t          j        |dddf         |k                       }}
|
t          j        |
dddf         t          j        |
dddf                   k                       |
t          j        |
dddf         t          j        |
dddf                   k                       }}|t          j        |dddf         t          j        |dddf                   k                       |t          j        |dddf         t          j        |dddf                   k                       }}n`|t          j        |dddf         |k                        |t          j        |dddf         |k                       }}|t          j        |dddf         t          j        |dddf                   k                       |t          j        |dddf         t          j        |dddf                   k                       }}|t          j        |dddf         t          j        |dddf                   k                       |t          j        |dddf         t          j        |dddf                   k                       }} ||          } ||          } ||          } ||          }t          j        ||||g                              d          
                                S )u   矩形框顺序排列c                 x    | j         dk    r|                     d          S | j         dk    r| S | d dd d f         S )N)r   )r=   r   r=   )r$   reshape)r   s    r   convert_to_1x2z0CalRecBoxes.order_points.<locals>.convert_to_1x2  sF    w$yy(((w&  RaRU8Or   )r   r   Nr   r=   r   r>   )r%   r&   r   meananywherery   rB   rT   argsorttolist)r   r   r3   r|   center_yp1p2p3p4y_sortp12p34p14p23s                 r   r   zCalRecBoxes.order_points  sZ   	 	 	 hw''00WSAY//QQQT1C1C(6#aaad)x'(( (	RV1I!.
 .
 (	 RXc!!!Q$i26#aaad)+<+<<==>BRXc!!!Q$i26#aaad)+<+<<==>BRXc!!!Q$i26#aaad)+<+<<==>BRXc!!!Q$i26#aaad)+<+<<==>BBVC1I)** !	ZAAAqD	**FVAYBVAYBVAYBVAYBBVC1I)** 	rv1I!0
 0
 	 BHSAY1223BHSAY1223 C
 BHSAY"&QQQT*;*;;<<=BHSAY"&QQQT*;*;;<<= B
 BHSAY"&QQQT*;*;;<<=BHSAY"&QQQT*;*;;<<= BB BHSAY1223BHSAY1223 C
 BHSAY"&QQQT*;*;;<<=BHSAY"&QQQT*;*;;<<= B
 BHSAY"&QQQT*;*;;<<=BHSAY"&QQQT*;*;;<<= B ^B^B^B^BxRR())11'::AACCCr   N)F)r   r   r   __doc__r   r%   ndarrayr   boolr;   staticmethodr   r+   strr   r   r?   r'   rx   r]   r^   r[   r\   r(   r,   r   r   r   r   r   r   r      s       G G (- 2: * 	
 !% 
   @ Srz Si S S S \S. (-2: 2:2: j2: 	2:
 !%2: 
tCy$tDK014;>	?2: 2: 2: 2:hS	?  	
 5%56 
d4;	      6966 6 5%56	6
 
d4;	 6 6 6 \62 7d3i 7 75 7 7 7 \7 	-K	-*/	-:?	-JM	-		- 	- 	- \	- De-.	d4;	    \3$Z3$ tDK013$ 	3$
 
d49o	3$ 3$ 3$ 3$j $ $ \$$ <Dd49o <D$tCy/ <D <D <D \<D <D <Dr   r   )r)   r   enumr   typingr   r   r   numpyr%   ch_ppocr_rec.typingsr   r   r	   utils.utilsr
   r   r   r   r   r   <module>r      s                    



     D D D D D D D D D D , , , , , ,! ! ! ! ! ! ! !
zD zD zD zD zD zD zD zD zD zDr   