
    wi              	       \   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlZddlmZ ddlmc mZ dZdZg dZ G d	 de          Ze G d
 de                      Ze G d de                      Zd Zd Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z	 d5dej         de!de"dej         fdZ# G d d ej                  Z$ G d! d"ej                  Z% G d# d$ej                  Z& G d% d&ej                  Z' G d' d(ej                  Z( G d) d*ej                  Z) G d+ d,ej                  Z* G d- d.ej                  Z+ G d/ d0ej                  Z, G d1 d2ej                  Z- G d3 d4e-          Z.dS )6z
This code is refer from:
https://github.com/huggingface/transformers/blob/main/src/transformers/models/donut/modeling_donut_swin.py

    N)OrderedDict)	dataclass)OptionalTupleUnionDonutSwinConfigz0https://huggingface.co/naver-clova-ix/donut-base)   1   i   c                   V     e Zd ZdZdddZddddg d	g d
ddddddddddf fd	Z xZS )r   z
donut-swin	num_heads
num_layers)num_attention_headsnum_hidden_layers         `   )   r      r   )r   r                  @T        皙?geluF{Gz?h㈵>c           
      T   t                                                       || _        || _        || _        || _        || _        t          |          | _        || _	        || _
        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        t)          |dt          |          dz
  z  z            | _        |                                D ]C\  }}	 t/          | ||           # t0          $ r}t3          d| d| d|             |d }~ww xY wd S )Nr   r	   z
Can't set z with value z for )super__init__
image_size
patch_sizenum_channels	embed_dimdepthslenr   r   window_size	mlp_ratioqkv_biashidden_dropout_probattention_probs_dropout_probdrop_path_rate
hidden_actuse_absolute_embeddingslayer_norm_epsinitializer_rangeinthidden_sizeitemssetattrAttributeErrorprint)selfr"   r#   r$   r%   r&   r   r(   r)   r*   r+   r,   r-   r.   r/   r1   r0   kwargskeyvalueerr	__class__s                        /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/rapidocr/networks/backbones/rec_donut_swin.pyr!   zDonutSwinConfig.__init__.   sN   ( 	$$("f++"&" #6 ,H),$'>$,!2y1Vq+AABB ,,.. 	 	JCc5))))!   F3FFEFFFFGGG		 	s   *C<<
D%D  D%)__name__
__module____qualname__
model_typeattribute_mapr!   __classcell__r=   s   @r>   r   r   &   s        J  +) M || ..%( %#. . . . . . . . . .    c                   N     e Zd ZdZdZdZdZ fdZd Z fdZ	 fdZ
d Z xZS )DonutSwinEncoderOutputNc                 :     t                      j        |i | d S Nr    r!   r8   argsr9   r=   s      r>   r!   zDonutSwinEncoderOutput.__init__g   %    $)&)))))rF   c                     t          |t                    r)t          |                                           }||         S |                                 |         S rJ   
isinstancestrdictr4   to_tupler8   k
inner_dicts      r>   __getitem__z"DonutSwinEncoderOutput.__getitem__j   F    a 	&djjll++Ja= ==??1%%rF   c                     ||                                  v r$|"t                                          ||           t                                          ||           d S rJ   keysr    __setitem____setattr__r8   namer;   r=   s      r>   r^   z"DonutSwinEncoderOutput.__setattr__q   U    499;;5#4GGe,,,D%(((((rF   c                     t                                          ||           t                                          ||           d S rJ   r    r]   r^   r8   r:   r;   r=   s      r>   r]   z"DonutSwinEncoderOutput.__setitem__v   =    C'''C'''''rF   c                 ^     t           fd                                 D                       S )a
        Convert self to a tuple containing all the attributes/keys that are not `None`.
        c              3   (   K   | ]}|         V  d S rJ    .0rV   r8   s     r>   	<genexpr>z2DonutSwinEncoderOutput.to_tuple.<locals>.<genexpr>~   '      22T!W222222rF   tupler\   r8   s   `r>   rT   zDonutSwinEncoderOutput.to_tuplez   0     2222diikk222222rF   )r?   r@   rA   last_hidden_statehidden_states
attentionsreshaped_hidden_statesr!   rX   r^   r]   rT   rD   rE   s   @r>   rH   rH   _   s         MJ!* * * * *& & &) ) ) ) )
( ( ( ( (3 3 3 3 3 3 3rF   rH   c                   R     e Zd ZdZdZdZdZdZ fdZd Z	 fdZ
 fdZd Z xZS )DonutSwinModelOutputNc                 :     t                      j        |i | d S rJ   rK   rL   s      r>   r!   zDonutSwinModelOutput.__init__   rN   rF   c                     t          |t                    r)t          |                                           }||         S |                                 |         S rJ   rP   rU   s      r>   rX   z DonutSwinModelOutput.__getitem__   rY   rF   c                     ||                                  v r$|"t                                          ||           t                                          ||           d S rJ   r[   r_   s      r>   r^   z DonutSwinModelOutput.__setattr__   ra   rF   c                     t                                          ||           t                                          ||           d S rJ   rc   rd   s      r>   r]   z DonutSwinModelOutput.__setitem__   re   rF   c                 ^     t           fd                                 D                       S )rg   c              3   (   K   | ]}|         V  d S rJ   ri   rj   s     r>   rl   z0DonutSwinModelOutput.to_tuple.<locals>.<genexpr>   rm   rF   rn   rp   s   `r>   rT   zDonutSwinModelOutput.to_tuple   rq   rF   )r?   r@   rA   rr   pooler_outputrs   rt   ru   r!   rX   r^   r]   rT   rD   rE   s   @r>   rw   rw      s         MMJ!* * * * *& & &) ) ) ) )
( ( ( ( (3 3 3 3 3 3 3rF   rw   c                     | j         \  }}}}|                     |||z  |||z  ||g          } |                     g d                              d|||g          }|S )z2
    Partitions the given input into windows.
    r   r	   r   r   r      shapereshape	transpose)input_featurer(   
batch_sizeheightwidthr$   windowss          r>   window_partitionr      s     /<.A+J|!))k![ 	
	 	M %%&8&8&899AA	[+|4 G NrF   c                     | j         d         }|                     d||z  ||z  |||g          } |                     g d                              d|||g          } | S )z?
    Merges windows to produce higher resolution features.
    r   r   r   )r   r(   r   r   r$   s        r>   window_reverser      s     =$Look![ 	
	 	G  2 2 233;;	VUL) G NrF   c                   ,     e Zd ZdZd fd	ZddZ xZS )DonutSwinEmbeddingszW
    Construct the patch and position embeddings. Optionally, also the mask token.
    Fc           	         t                                                       t          |          | _        | j        j        }| j        j        | _        |rt          j        t          j	        
                    t          j        dd|j                                      t          j                                      | _        t          j	                            | j                   nd | _        |j        rt          j        t          j	        
                    t          j        d|dz   |j                                      t          j                                      | _        t          j	                            | j                   nd | _        t          j        |j                  | _        t          j        |j                  | _        d S )Nr	   )r    r!   DonutSwinPatchEmbeddingspatch_embeddingsnum_patches	grid_size
patch_gridnn	Parameterinitxavier_uniform_torchzerosr%   tofloat32
mask_tokenzeros_r/   position_embeddingsposition_embedding	LayerNormnormDropoutr+   dropout)r8   configuse_mask_tokenr   r=   s       r>   r!   zDonutSwinEmbeddings.__init__   sg    8 @ @+7/9 	# !l''K1f&677::5=II  DO
 GNN4?++++"DO) 	, (*|''K;?F4DEEHHWW ( (D$
 GNN423333'+D$L!122	z&"<==rF   Nc                 n   |                      |          \  }}|                     |          }|j        \  }}}|R| j                            ||d          }|                    d                              |          }	|d|	z
  z  ||	z  z   }| j        
|| j        z   }|                     |          }||fS )Nr         ?)	r   r   r   r   expand	unsqueezetype_asr   r   )
r8   pixel_valuesbool_masked_pos
embeddingsoutput_dimensionsr   seq_len_mask_tokensmasks
             r>   forwardzDonutSwinEmbeddings.forward   s    (,(=(=l(K(K%
%YYz**
!+!1
GQ&/00WbIIK",,R0088EED#sTz2[45GGJ#/#d&>>J\\*--
,,,rF   FrJ   )r?   r@   rA   __doc__r!   r   rD   rE   s   @r>   r   r      s\          >  >  >  >  >  >D- - - - - - - -rF   r   c                   2     e Zd Z	 	 	 	 	 	 d fd	Zd Z xZS )MyConv2dr	   SAMEFư>c
                    t                                          ||||||||           t          j        t          j                            t          j        |||d         |d                                       t          j	                                      | _
        t          j        t          j                            t          j        |                              t          j	                                      | _        t          j                            | j
                   t          j                            | j                   d S )N)stridepaddingdilationgroups	bias_attrr   r	   )r    r!   r   r   r   r   r   r   r   r   weightbiasones_r   )r8   
in_channelout_channelskernel_sizer   r   r   r   r   epsr=   s             r>   r!   zMyConv2d.__init__
  s    	 	 		
 		
 		
 oG## *k!nk!n "U]## 
 
 OG##EK$=$=$@$@$O$OPP
 
	 	dk"""
ty!!!!!rF   c           	      v    t          j        || j        | j        | j        | j        | j        | j                  }|S rJ   )Fconv2dr   r   _stride_padding	_dilation_groups)r8   xs     r>   r   zMyConv2d.forward1  s;    HKILMNL
 
 rF   )r	   r   r	   r	   Fr   )r?   r@   rA   r!   r   rD   rE   s   @r>   r   r   	  sa         %" %" %" %" %" %"N
 
 
 
 
 
 
rF   r   c                   Z     e Zd ZdZ fdZd Zdeej        ee	         f         fdZ
 xZS )r   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                 2   t                                                       |j        |j        }}|j        |j        }}t          |t          j        j	                  r|n||f}t          |t          j        j	                  r|n||f}|d         |d         z  |d         |d         z  z  }|| _        || _        || _        || _
        |j        | _        |d         |d         z  |d         |d         z  f| _        t          j        ||||          | _        d S )Nr	   r   )r   r   )r    r!   r"   r#   r$   r%   rQ   collectionsabcIterabler   	is_exportr   r   Conv2D
projection)r8   r   r"   r#   r$   r3   r   r=   s          r>   r!   z!DonutSwinPatchEmbeddings.__init__F  s.   !'!2F4EJ
$*$79Ik *ko&>??*JJj) 	 *ko&>??*JJj) 	
 "!}
15qMZ]*
 %$(&)qMZ]*qMZ]*
 )+:j
 
 
rF   c                    || j         d         z  dk    rgd| j         d         || j         d         z  z
  f}| j        r t          j        |t          j                  }t
          j                            ||          }|| j         d         z  dk    riddd| j         d         || j         d         z  z
  f}| j        r t          j        |t          j                  }t
          j                            ||          }|S )Nr	   r   dtype)r#   r   r   tensorint32r   
functionalpad)r8   r   r   r   
pad_valuess        r>   	maybe_padz"DonutSwinPatchEmbeddings.maybe_padd  s    4?1%%**T_Q/%$/!:L2LLMJ~ I"\*EKHHH
=,,\:FFLDOA&&!++Q4?1#5QRAS8S#STJ~ I"\*EKHHH
=,,\:FFLrF   returnc                 "   |j         \  }}}}|| j        k    rt          d          |                     |||          }|                     |          }|j         \  }}}}||f}|                    d                              g d          }||fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r   r   r	   )r   r$   
ValueErrorr   r   flattenr   )r8   r   r   r$   r   r   r   r   s           r>   r   z DonutSwinPatchEmbeddings.forwardq  s    )5);&<4,,,w   ~~lFEBB__\22
(.1fe#UO''**44YYY??
,,,rF   )r?   r@   rA   r   r!   r   r   r   Tensorr2   r   rD   rE   s   @r>   r   r   ?  s|         
 
 
 
 
<  -uU\5:-E'F - - - - - - - -rF   r   c                        e Zd ZdZej        dfdee         dedej        f fdZ	d Z
dej        d	eeef         d
ej        fdZ xZS )DonutSwinPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`Tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    Finput_resolutiondim
norm_layerc                     t                                                       || _        || _        t	          j        d|z  d|z  d          | _         |d|z            | _        || _        d S )Nr   r   Fr   )	r    r!   r   r   r   Linear	reductionr   r   )r8   r   r   r   r   r=   s        r>   r!   zDonutSwinPatchMerging.__init__  sj     	 01s7AGuEEEJq3w''	"rF   c                     |dz  dk    p|dz  dk    }|rUddd|dz  d|dz  f}| j         r t          j        |t          j                  }t          j                            ||          }|S )Nr   r	   r   r   )r   r   r   r   r   r   r   )r8   r   r   r   
should_padr   s         r>   r   zDonutSwinPatchMerging.maybe_pad  s}    qjAo:519>
 	IQ519a!<J~ I"\*EKHHH
M--mZHHMrF   r   input_dimensionsr   c                    |\  }}|j         \  }}}|                    ||||g          }|                     |||          }|d d dd ddd dd d f         }|d d dd ddd dd d f         }	|d d dd ddd dd d f         }
|d d dd ddd dd d f         }t          j        ||	|
|gd          }|                    |dd|z  g          }|                     |          }|                     |          }|S )Nr   r   r	   r   r   )r   r   r   r   catr   r   )r8   r   r   r   r   r   r   r$   input_feature_0input_feature_1input_feature_2input_feature_3s               r>   r   zDonutSwinPatchMerging.forward  sY    )(5(;%
C%--z65,.WXX}feDD'14a4Aqqq(89'14a4Aqqq(89'14a4Aqqq(89'14a4Aqqq(89	oPRT
 
 &--Q-.
 
 		-00}55rF   )r?   r@   rA   r   r   r   r   r2   Moduler!   r   r   r   r   rD   rE   s   @r>   r   r     s        
 
  !## #*# # I	# # # # # #  "\=B38_	       rF   r   r   Finput	drop_probtrainingr   c                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j                  z   }|                                 | |z  |z  }|S )Nr   r	   r   )r	   r   )r   ndimr   randr   floor_)r   r   r   	keep_probr   random_tensoroutputs          r>   	drop_pathr    s     CxII[^
Q! E 
k! ! ! M Y.FMrF   c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
DonutSwinDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S rJ   )r    r!   r   )r8   r   r=   s     r>   r!   zDonutSwinDropPath.__init__  s$    "rF   rs   c                 8    t          || j        | j                  S rJ   )r  r   r   r8   rs   s     r>   r   zDonutSwinDropPath.forward  s    FFFrF   c                 6    d                     | j                  S )Nzp={})formatr   rp   s    r>   
extra_reprzDonutSwinDropPath.extra_repr  s    }}T^,,,rF   rJ   )r?   r@   rA   r   r   floatr!   r   r   r   rR   r  rD   rE   s   @r>   r  r    s        bb# #(5/ #T # # # # # #GU\ Gel G G G G-C - - - - - - - -rF   r  c                   \     e Zd Z fdZd Z	 	 	 ddej        deej                 fdZ xZ	S )	DonutSwinSelfAttentionc           
      x   t                                                       ||z  dk    rt          d| d| d          || _        t	          ||z            | _        | j        | j        z  | _        t          |t          j	        j
                  r|n||f| _        t          j        t          j                            t          j        d| j        d         z  dz
  d| j        d         z  dz
  z  |                              t          j                                      | _        t          j                            | j                   t          j        | j        d                   }t          j        | j        d                   }t          j        t          j        ||d                    }t          j        |d          }|d d d d d f         |d d d d d f         z
  }	|	                    g d	          }	|	d d d d dfxx         | j        d         dz
  z  cc<   |	d d d d dfxx         | j        d         dz
  z  cc<   |	d d d d dfxx         d| j        d         z  dz
  z  cc<   |	                    d
          }
|                     d|
           t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _         t          j        | j        | j        |j                  | _!        t          j"        |j#                  | _$        d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   r	   ij)indexing)r	   r   r   r   relative_position_indexr   )%r    r!   r   r   r2   attention_head_sizeall_head_sizerQ   r   r   r   r(   r   r   r   r   xavier_normal_r   r   r   relative_position_bias_tabler   arangestackmeshgridr   r   sumregister_bufferr   r*   queryr:   r;   r   r,   r   )r8   r   r   r   r(   coords_hcoords_wcoordscoords_flattenrelative_coordsr  r=   s              r>   r!   zDonutSwinSelfAttention.__init__  sS   ?akCkk_hkkk   $- #&sY#7#7 !58PP +{'?@@,KK{+ 	 -2OG"")!,,q0Q9I!9L5Lq5PQ  "U]##	 -
 -
) 	t8999 < 0 344< 0 344U^HhNNNOOvq11(AAAt4~aaaqqqj7QQ)33III>>111a   D$4Q$7!$;;   111a   D$4Q$7!$;;   111a   A(8(;$;a$??   "1"5"5b"9"968OPPPY 2fo
 
 

 9 2fo
 
 
 Y 2fo
 
 

 z&"EFFrF   c                     |j         d d         | j        | j        gz   }|                    |          }|                    g d          S )Nr   r   r   r	   r   )r   r   r  r   r   )r8   r   new_x_shapes      r>   transpose_for_scoresz+DonutSwinSelfAttention.transpose_for_scores  sP    gcrcl$$&
 
 IIk""{{<<<(((rF   NFrs   r   c                 &   |j         \  }}}|                     |          }|                     |                     |                    }	|                     |                     |                    }
|                     |          }t          j        ||	                    g d                    }|t          j	        | j
                  z  }| j        | j                            dg                   }|                    | j        d         | j        d         z  | j        d         | j        d         z  dg          }|                    g d          }||                    d          z   }|x|j         d         }|                    ||z  || j        ||g          }||                    d                              d          z   }|                    d| j        ||g          }t"          j                            |d          }|                     |          }|||z  }t          j        ||
          }|                    g d          }t+          |j         d d                   | j        fz   }|                    |          }|r||fn|f}|S )	N)r   r	   r   r   r   r   r	   )r   r   r	   )axisr'  )r   r   r)  r:   r;   r   matmulr   mathsqrtr  r  r  r   r(   r   r   r   r   softmaxr   ro   r  )r8   rs   attention_mask	head_maskoutput_attentionsr   r   r$   mixed_query_layer	key_layervalue_layerquery_layerattention_scoresrelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputss                      r>   r   zDonutSwinSelfAttention.forward#  s    )6(;%
C JJ}55--dhh}.E.EFF	//

=0I0IJJ//0ABB !<Y5H5H5V5VWW+di8P.Q.QQ!%!B(00"66"
 "8!?!? #d&6q&99 #d&6q&99"
 "
 "8!A!A)))!L!L+.D.N.Nq.Q.QQ%'-a0J/77*,,     0.2J2J12M2M2W2W3 3    077T-sC8   
 -//0@r/JJ ,,77  -	9O_kBB%//=="'(;CRC(@"A"AE
 #
 &--.EFF0AW]O,,GW 	 rF   NNF)
r?   r@   rA   r!   r)  r   r   r   r   rD   rE   s   @r>   r  r    s        5G 5G 5G 5G 5Gn) ) ) G G|G 
u|	G G G G G G G GrF   r  c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )DonutSwinSelfOutputc                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S rJ   )r    r!   r   r   denser   r,   r   r8   r   r   r=   s      r>   r!   zDonutSwinSelfOutput.__init__o  sD    YsC((
z&"EFFrF   rs   input_tensorr   c                 Z    |                      |          }|                     |          }|S rJ   rC  r   )r8   rs   rE  s      r>   r   zDonutSwinSelfOutput.forwardt  s,     

=11]33rF   r?   r@   rA   r!   r   r   r   rD   rE   s   @r>   rA  rA  n  so        G G G G G
"\9>	       rF   rA  c                   V     e Zd Z fdZ	 	 	 ddej        deej                 fdZ xZS )DonutSwinAttentionc                     t                                                       t          ||||          | _        t	          ||          | _        t                      | _        d S rJ   )r    r!   r  r8   rA  r  setpruned_heads)r8   r   r   r   r(   r=   s        r>   r!   zDonutSwinAttention.__init__  sQ    *63	;OO	)&#66EErF   NFrs   r   c                     |                      ||||          }|                     |d         |          }|f|dd          z   }|S )Nr   r	   )r8   r  )r8   rs   r1  r2  r3  self_outputsattention_outputr>  s           r>   r   zDonutSwinAttention.forward  s]     yy>96G
 
  ;;|AFF#%BB)
 
 rF   r?  )	r?   r@   rA   r!   r   r   r   r   rD   rE   s   @r>   rJ  rJ  ~  sw        " " " " "  | 
u|	       rF   rJ  c                   B     e Zd Z fdZdej        dej        fdZ xZS )DonutSwinIntermediatec                     t                                                       t          j        |t	          |j        |z                      | _        t          j        | _	        d S rJ   )
r    r!   r   r   r2   r)   rC  r   r   intermediate_act_fnrD  s      r>   r!   zDonutSwinIntermediate.__init__  sL    YsC(83(>$?$?@@
#$6   rF   rs   r   c                 Z    |                      |          }|                     |          }|S rJ   )rC  rT  r  s     r>   r   zDonutSwinIntermediate.forward  s,    

=1100??rF   rH  rE   s   @r>   rR  rR    s^        * * * * *
U\ el        rF   rR  c                   B     e Zd Z fdZdej        dej        fdZ xZS )DonutSwinOutputc                     t                                                       t          j        t	          |j        |z            |          | _        t          j        |j                  | _	        d S rJ   )
r    r!   r   r   r2   r)   rC  r   r+   r   rD  s      r>   r!   zDonutSwinOutput.__init__  sT    Ys6#3c#9::C@@
z&"<==rF   rs   r   c                 Z    |                      |          }|                     |          }|S rJ   rG  r  s     r>   r   zDonutSwinOutput.forward  s*    

=11]33rF   rH  rE   s   @r>   rW  rW    s^        > > > > >
U\ el        rF   rW  c            	            e Zd Zd fd	Zd Zd Zd Zd Z	 	 	 dd	ej	        d
e
eef         de
ej	        ej	        f         fdZ xZS )DonutSwinLayerr   c                 (   t                                                       |j        | _        || _        |j        | _        || _        t          j        ||j                  | _	        t          |||| j                  | _        |j        dk    rt          |j                  nt          j                    | _        t          j        ||j                  | _        t#          ||          | _        t'          ||          | _        |j        | _        d S )N)r   )r(   r   )r    r!   chunk_size_feed_forward
shift_sizer(   r   r   r   r0   layernorm_beforerJ  	attentionr-   r  Identityr  layernorm_afterrR  intermediaterW  r  r   )r8   r   r   r   r   r^  r=   s         r>   r!   zDonutSwinLayer.__init__  s    '-'E$$!- 0 "Sf6K L L L+C0@
 
 

 $s** f3444 	
  "|CV5JKKK1&#>>%fc22)rF   c                 p    t          |          | j        k    rd| _        t          |          | _        d S d S Nr   )minr(   r^  )r8   r   s     r>   set_shift_and_window_sizez(DonutSwinLayer.set_shift_and_window_size  s?      D$444DO"#344D 54rF   c           	         d }t          d| j                   t          | j         | j                   t          | j         d           f}t          d| j                   t          | j         | j                   t          | j         d           f}t          j        d||df|          }d}|D ]$}	|D ]}
| j        dk    r||d d |	|
d d f<   |dz  } %t          j        | j        dk                                  t          j                  rt          || j                  }|	                    d| j        | j        z  g          }|
                    d          |
                    d          z
  }|                    |dk    t          d                                        |dk    t          d                    }|S Nr   r	   r   r   r   g      Yr   )slicer(   r^  r   r   r   r   boolr   r   r   masked_fillr  )r8   r   r   r   	attn_maskheight_sliceswidth_slicesimg_maskcountheight_slicewidth_slicemask_windowss               r>   get_attn_mask_exportz#DonutSwinLayer.get_attn_mask_export  s   	!d&&''4##do%5664?"D))
 !d&&''4##do%5664?"D))

 ;65!4EBBB) 	 	L+  ?Q&&@EHQQQk111<=QJE <!+,,//
;; 		6+Hd6FGGL'//T%(889 L %..q11L4J4J14M4MMI!--Qf k)q.%**55  rF   c           	         | j         dk    rwt          j        d||df|          }t          d| j                   t          | j         | j                    t          | j          d           f}t          d| j                   t          | j         | j                    t          | j          d           f}d}|D ]}|D ]}	||d d ||	d d f<   |dz  }t          || j                  }
|
                    d| j        | j        z  g          }
|
                    d          |
                    d          z
  }|                    |dk    t          d                                        |dk    t          d                    }nd }|S ri  )
r^  r   r   rj  r(   r   r   r   rl  r  )r8   r   r   r   rp  rn  ro  rq  rr  rs  rt  rm  s               r>   get_attn_maskzDonutSwinLayer.get_attn_mask  s   ?Q{Avua#8FFFHa$**++t''$/)9::t&--M a$**++t''$/)9::t&--L E -  #/  K@EHQQQk111<=QJEE ,Hd6FGGL'//T%(889 L %..q11L4J4J14M4MMI!--Qf k)q.%**55 I IrF   c                     | j         || j         z  z
  | j         z  }| j         || j         z  z
  | j         z  }ddd|d|ddf}t          j                            ||          }||fS re  )r(   r   r   r   )r8   rs   r   r   	pad_right
pad_bottomr   s          r>   r   zDonutSwinLayer.maybe_pad  st    %0@(@@DDTT	&$2B)BBdFVV
Az1iA>
))-DDj((rF   NFrs   r   r   c                 \   |s|                      |           n	 |\  }}|j        \  }}	}
|}|                     |          }|                    ||||
g          }|                     |||          \  }}|j        \  }	}}}	| j        dk    rO| j         | j         f}| j        r t          j        |t          j	                  }t          j
        ||d          }n|}t          || j                  }|                    d| j        | j        z  |
g          }|                     |||j                  }|                     ||||          }|d         }|                    d| j        | j        |
g          }t!          || j        ||          }| j        dk    rM| j        | j        f}| j        r t          j        |t          j	                  }t          j
        ||d          }n|}|d         dk    p|d         dk    }|r&|d d d |d |d d f                                         }|                    |||z  |
g          }||                     |          z   }|                     |          }|                     |          }||                     |          z   }|r
||d	         fn|f}|S )
Nr   r   )r	   r   )shiftsdimsr   )r3  r   r   r	   )rg  r   r_  r   r   r^  r   r   r   r   rollr   r(   rw  r   r`  r   
contiguousr  rb  rc  r  )r8   rs   r   r2  r3  always_partitionr   r   r   r   channelsshortcutr   
height_pad	width_padshift_valueshifted_hidden_stateshidden_states_windowsrm  attention_outputsrP  attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputss                             r>   r   zDonutSwinLayer.forward  sK      	**+;<<<<("/"5
Ax --m<<%--z65(.STT %)NN=&%$P$P!z&3&9#:y! ?Q O+do-=>K~ K#l;ekJJJ$)Jk% % %!! %2! !1!4#3!
 !
 !6 = =!D$44h?!
 !
 &&z9MDW&XX	 NN!/	 + 
 
 -Q/,44!4#3X>
 
 )t/Y
 
 ?Q?DO<K~ K#l;ekJJJ %
&! ! ! !0]Q&;*Q-!*;
 	V 1!!!WfWfufaaa2G H S S U U-55%2
 
 !4>>2C#D#DD++M::((66$t{{<'@'@@ !!\,Q/00 	
 rF   )r   NFF)r?   r@   rA   r!   rg  ru  rw  r   r   r   r   r2   r   rD   rE   s   @r>   r[  r[    s        * * * * * *(5 5 5  @  B) ) ) V V|V  S/V 
u|U\)	*V V V V V V V VrF   r[  c                   j     e Zd Z fdZ	 	 	 ddej        deeef         deej                 fdZ xZ	S )	DonutSwinStagec                 V   t                                                       | _        | _        t	          j        fdt          |          D                       | _        j        | _        |$ |t          j	        | j                  | _
        nd | _
        d| _        d S )Nc           
      ^    g | ])}t          |d z  dk    rdn	j        d z            *S )r   r   )r   r   r   r   r^  )r[  r(   )rk   ir   r   r   r   s     r>   
<listcomp>z+DonutSwinStage.__init__.<locals>.<listcomp>v  sa     	 	 	  !%5'%&UaZZqqf6HA6M  	 	 	rF   )r   r   r   F)r    r!   r   r   r   
ModuleListrangeblocksr   r   
downsamplepointing)	r8   r   r   r   depthr   r  r  r=   s	    ``` `  r>   r!   zDonutSwinStage.__init__o  s     	m	 	 	 	 	 	 	 u	 	 	
 
  ) !(j <.	  DOO #DOrF   NFrs   r   r   c                 *   |\  }}t          | j                  D ](\  }}	|||         nd }
 |	|||
||          }|d         })|}| j        -|dz   dz  |dz   dz  }}||||f}|                     ||          }n||||f}|||f}|r||dd          z  }|S )Nr   r	   r   )	enumerater  r  )r8   rs   r   r2  r3  r  r   r   r  layer_modulelayer_head_maskr  !hidden_states_before_downsamplingheight_downsampledwidth_downsampledr   stage_outputss                    r>   r   zDonutSwinStage.forward  s	    )(55 	- 	-OA|.7.CillO(L !  M *!,MM,9)?&5;aZA4EPQ	VWGW 1!'0BDU V OO13C MM "( > -
  	/]122..MrF   r  
r?   r@   rA   r!   r   r   r   r2   r   rD   rE   s   @r>   r  r  n  s            J ) )|)  S/) 
u|	) ) ) ) ) ) ) )rF   r  c                   V     e Zd Z fdZ	 	 	 	 	 	 ddej        deeef         fdZ xZ	S )	DonutSwinEncoderc                     t                                                       t          j                   _         _        d t          j        dj        t          j                            D             t          j         fdt           j                  D                        _        d _        d S )Nc                 6    g | ]}|                                 S ri   )item)rk   r   s     r>   r  z-DonutSwinEncoder.__init__.<locals>.<listcomp>  s0     
 
 
 FFHH
 
 
rF   r   c                 t   g | ]}t          t          j        d |z  z            d         d |z  z  d         d |z  z  fj        |         j        |         t          j        d|                   t          j        d|dz                               |j        dz
  k     rt          nd          S )r   r   r	   N)r   r   r   r  r   r  r  )r  r2   r%   r&   r   r  r   r   )rk   i_layerr   dprr   r8   s     r>   r  z-DonutSwinEncoder.__init__.<locals>.<listcomp>  s       & % !F,q'z9::!!G4!!G4& !-0$.w7!FM(7(344s6=7UV;;W7X7XX
 $do&999 .-!    rF   F)r    r!   r'   r&   r   r   r   linspacer-   r  r   r  r  layersgradient_checkpointing)r8   r   r   r  r=   s   ```@r>   r!   zDonutSwinEncoder.__init__  s    fm,,
 
^Av'<c&->P>PQQ
 
 
 m      &  %T_55'  
 
0 ',###rF   NFTrs   r   c	           	         |rdnd }	|rdnd }
|rdnd }|r?|j         \  }}} |j        |g||R  }|                    dddd          }|	|fz  }	|
|fz  }
t          | j                  D ]-\  }}|||         nd }| j        r'| j        r |                     |j        |||||          }n ||||||          }|d         }|d         }|d         }|d         |d         f}|rY|rW|j         \  }}}|	                    |g|d         |d         f|          }|
                    g d          }|	|fz  }	|
|fz  }
nL|rJ|sH|j         \  }}}|	                    |g||          }|
                    g d          }|	|fz  }	|
|fz  }
|r||dd          z  }/|st          d	 ||	|fD                       S t          ||	||

          S )Nri   r   r   r	   r   r,  r   )r   r   r	   r   c              3      K   | ]}||V  	d S rJ   ri   )rk   vs     r>   rl   z+DonutSwinEncoder.forward.<locals>.<genexpr>-  s0        =  === rF   )rr   rs   rt   ru   )r   viewpermuter  r  r  r   _gradient_checkpointing_func__call__r   r   ro   rH   )r8   rs   r   r2  r3  output_hidden_states(output_hidden_states_before_downsamplingr  return_dictall_hidden_statesall_reshaped_hidden_statesall_self_attentionsr   r   r3   reshaped_hidden_stater  r  r  r  r  r   s                         r>   r   zDonutSwinEncoder.forward  s    #7@BBD+?%IRRT"$5?bb4 	C)6)<&J;$6M$6%-%/:% % %! %:$A$A!Q1$M$M!-!11&+@*BB&(55 2	9 2	9OA|.7.CillO* t}  $ A A )!$#%$! ! !-!$#%$! ! *!,M0=a0@- -a 0 1" 57H7LM# G(P G-N-T*
A{(I(Q(Q"+A.0A!0DE $) )% )>(G(G(U(U%!&G%II!*/D.FF**% G.V G-:-@*
A{(5(=(=@"2@K@) )% )>(G(G(U(U%!m%55!*/D.FF*  9#}QRR'88# 	  '):<OP      &++*#=	
 
 
 	
rF   )NFFFFTr  rE   s   @r>   r  r    s         ,  ,  ,  ,  ,L "16X
 X
|X
  S/X
 X
 X
 X
 X
 X
 X
 X
rF   r  c                   <    e Zd ZdZeZdZdZdZd Z	d Z
d Zdd	Zd
S )DonutSwinPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    swinr   Tc                    t          |t          j        t          j        f          r`t          j                            |j        d| j        j                   |j	        &t          j        
                    |j	                   dS dS t          |t          j                  rJt          j        
                    |j	                   t          j                            |j                   dS dS )zInitialize the weightsr   )meanstdN)rQ   r   r   r   r   normal_r   r   r1   r   r   r   r   r8   modules     r>   _init_weightsz&DonutSwinPreTrainedModel._init_weightsF  s    fry")455 	)GOOFM9VOWWW{&v{+++++ '&-- 	)GNN6;'''GMM&-(((((	) 	)rF   c                 V    t          |dd          rdS |                     |           dS )zM
        Initialize the weights if they are not already initialized.
        _is_hf_initializedFN)getattrr  r  s     r>   _initialize_weightsz,DonutSwinPreTrainedModel._initialize_weightsQ  s8     6/77 	F6"""""rF   c                 :    |                      | j                   d S rJ   )applyr  rp   s    r>   	post_initz"DonutSwinPreTrainedModel.post_initY  s    

4+,,,,,rF   Fc                 v    |0|                      ||          }|du r|                    d          }nd g|z  }|S )NTr   )_convert_head_mask_to_5dr   )r8   r2  r   is_attention_chunkeds       r>   get_head_maskz&DonutSwinPreTrainedModel.get_head_mask\  sP     55iARSSI#t++%//33	!22IrF   Nr   )r?   r@   rA   r   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointingr  r  r  r  ri   rF   r>   r  r  ;  sw         
 #L$O&*#	) 	) 	)# # #- - -     rF   r  c                   b     e Zd Zdddg ddddf fd	Zd Z	 	 	 	 	 	 dd
eeef         fdZ xZ	S )DonutSwinModelr   i   r   )r             TFc           
         t                                                       i dddddddddddi d	dd
ddddddd ddddddddddddi ddddddddddddd dd!dd"dd#dd$dd%d d&dd'dd(dd)d d*d i d+dd,d d-d d.d d/d d0d d1d2d3d4d5ddd6d7d d8d d9d d:d d;d d<d d=d d>d d?d i d@dAdBd dCd dDd dE|dF|dGddHddIdJdJgdKdLdM|dNdOdPg dQdR|dSdTdUdVdWddddXdYddZd[|d\}t          d_i |}	|	| _        t	          |	j                  | _        t          |	j        d]| j        dz
  z  z            | _	        t          |	|^          | _        t          |	| j        j                  | _        |rt          j        d          nd | _        || _        |                                  d S )`Nr  Tr  Fr3  use_bfloat16tf_legacy_lossrM  tie_word_embeddingsr]  r   is_encoder_decoder
is_decodercross_attention_hidden_sizeadd_cross_attentiontie_encoder_decoder
max_length   
min_length	do_sampleearly_stopping	num_beamsr	   num_beam_groupsdiversity_penaltyr   temperaturer   top_k2   top_p	typical_prepetition_penaltylength_penaltyno_repeat_ngram_sizeencoder_no_repeat_ngram_sizebad_words_idsnum_return_sequencesoutput_scoresreturn_dict_in_generateforced_bos_token_idforced_eos_token_idremove_invalid_values exponential_decay_length_penaltysuppress_tokensbegin_suppress_tokensarchitecturesfinetuning_taskid2labelLABEL_0LABEL_1)r   r	   label2id)r  r  tokenizer_classprefixbos_token_idpad_token_ideos_token_idsep_token_iddecoder_start_token_idtask_specific_paramsproblem_type_name_or_path _commit_hash_attn_implementation_internaltransformers_versionr3   r   	path_normuse_2d_embeddingsr"   i  r#   r   r$   r%      r&   )r   r      r   r   r(   r   r)   r   r*   r   r   r   r   )r+   r,   r-   r.   r/   r0   r1   r   r   )r   ri   )r    r!   r   r   r'   r&   r   r2   r%   num_featuresr   r   r  r   encoderr   AdaptiveAvgPool1Dpoolerr   r  )r8   in_channelsr3   r   r   add_pooling_layerr   r   donut_swin_configr   r=   s             r>   r!   zDonutSwinModel.__init__h  s    	M
4M
"EM
  M
 E	M

 eM
 BM
 "4M
 &qM
 !%M
 %M
 *4M
 "5M
 "5M
 "M
 !M
  !M
" e#M
 M
$ %M
& q'M
(  )M
* 3+M
, R-M
. S/M
0 1M
2 !#3M
4 c5M
6 #A7M
8 +A9M
: T;M
< #A=M
> U?M
@ &uAM
B "4CM
D "4EM
 M
 M
F $UGM
H /IM
J tKM
L $TMM
N TOM
P tQM
R I)44SM
T A!44UM
V tWM
X dYM
Z D[M
\ D]M
^ D_M
` DaM
b %dcM
d #DeM
f DgM
 M
 M
h RiM
j DkM
l ,TmM
n #DoM
p ;qM
r *sM
t uM
v  wM
x 3*yM
z !{M
| K}M
~ M
@ mmmAM
B CM
D 1EM
F GM
H IM
 M
J $',/! ',#!%"YM
 M
 M
^ !55#455fm,, 0119L3M MNN-f^TTT'0JKK1BLb*1---'rF   c                     | j         j        S rJ   )r   r   rp   s    r>   get_input_embeddingsz#DonutSwinModel.get_input_embeddings  s    //rF   Nr   c                     | j         r|\  }}}	n t          |t                    r	|d         }n|}||n| j        j        }||n| j        j        }||n| j        j        }|t          d          |j        d         }
|
dk    rt          j
        |dd          }|                     |t          | j        j                            }|                     ||          \  }}|                     ||||||          }|d         }d}| j        ?|                     |                    g d	                    }t          j        |d          }|s||f|dd         z   }|S t'          |||j        |j        |j        
          }| j         r|||	fS |S )z
        bool_masked_pos (`paddle.BoolTensor` of shape `(batch_size, num_patches)`):
            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
        r   Nz You have to specify pixel_valuesr	   r   )repeatsr   )r   )r2  r3  r  r  r   )rr   r~   rs   rt   ru   )r   rQ   listr   r3  r  r  r   r   r   repeat_interleaver  r'   r&   r   r  r  r   r   rw   rs   rt   ru   )r8   
input_datar   r2  r3  r  r  r   labelr1  r$   embedding_outputr   encoder_outputssequence_outputpooled_outputr  donut_swin_outputs                     r>   r   zDonutSwinModel.forward  s    = 	*2</L%*d++ *)!}) !, . 	 $/ ! 1 	 '2KK8O 	 ?@@@#)!,1 2<PQRRRL&&y#dk6H2I2IJJ	-1__/ .= .
 .
** ,,/!5# ' 
 
 *!,;" KK(A(A)))(L(LMMM!M-;;M 	%}58KKFM0-')7&1#2#I
 
 
 = 	%$e^;;$$rF   )NNNNNN)
r?   r@   rA   r!   r  r   r   rw   r   rD   rE   s   @r>   r  r  g  s          ..d d d d d dL0 0 0
 !K% K% 
u**	+K% K% K% K% K% K% K% K%rF   r  )r   F)/r   collections.abcr   r.  r   dataclassesr   typingr   r   r   r   torch.nnr   torch.nn.functionalr   r   _CONFIG_FOR_DOC_CHECKPOINT_FOR_DOC_EXPECTED_OUTPUT_SHAPEobjectr   rH   rw   r   r   r   r   Conv2dr   r   r   r   r  rk  r  r  r  rA  rJ  rR  rW  r[  r  r  r  r  ri   rF   r>   <module>r5     s         # # # # # # ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )                 $ I % 6 6 6 6 6f 6 6 6r 3 3 3 3 3[ 3 3 3@ 3 3 3 3 3; 3 3 3D  ,  ,5- 5- 5- 5- 5-") 5- 5- 5-p2 2 2 2 2ry 2 2 2l?- ?- ?- ?- ?-ry ?- ?- ?-F< < < < <BI < < <B CH <$);?
\   &- - - - -	 - - -G G G G GRY G G GV    ")           2	 	 	 	 	BI 	 	 		 	 	 	 	bi 	 	 	y y y y yRY y y yzK K K K KRY K K K^{
 {
 {
 {
 {
ry {
 {
 {
|) ) ) ) )ry ) ) )Xu% u% u% u% u%- u% u% u% u% u%rF   