
    PiX                        d dl mZ d dlmZmZ d dlmZmZmZ d dl	m
Z
 d Z G d d          Z ed	           G d
 de                      Z ed	           G d de                      Z ed	           G d de                      Z ed	           G d de                      Z ed	           G d de                      Z edd           G d de                      Z G d d          Ze
d             Z ed	           G d de                      Z edd           G d de                      Z edd           G d de                      Zd#d Zd! Zd"S )$    )	dataclass)ListOptional)_unwrap_if_constexpr_unwrap_shapeconstexpr_type)constexpr_functionc                 B   | j         pdg|z  }| j        pdg|z  }| j        p(t          t	          t          |                              }t                              | d|           t                              | d|           t                              | d|           d S )N   ctas_per_cgacta_split_num	cta_order)r   r   r   listreversedrangeobject__setattr__)layoutrankr   r   r   s        /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/experimental/gluon/language/_layouts.py_realize_cta_layoutr      s    &41#*L(6QC$JM ?D%++)>)>$?$?I
v~|<<<
v>>>
v{I66666    c                   (    e Zd ZdZed             ZdS )DistributedLayoutz@
    Base class for distributed memory layouts in Gluon IR.
    c                      t          |           S Nr   selfs    r   typezDistributedLayout.type       d###r   N__name__
__module____qualname____doc__propertyr     r   r   r   r      9          $ $ X$ $ $r   r   T)frozenc                       e Zd Zd Zd ZdS )
AutoLayoutc                 *    |                                 S r   )get_auto_layoutr   builders     r   _to_irzAutoLayout._to_ir   s    &&(((r   c                     dS )NALr(   r   s    r   manglezAutoLayout.mangle    s    tr   N)r#   r$   r%   r1   r4   r(   r   r   r,   r,      s2        ) ) )    r   r,   c                       e Zd ZU dZee         ed<   ee         ed<   ee         ed<   ee         ed<   dZeee                  ed<   dZ	eee                  ed<   dZ
eee                  ed	<    fd
Zd ZdefdZd Z xZS )BlockedLayouta`  
    Represents a blocked layout, partitioning a tensor across threads, warps, and CTAs.

    Args:
        size_per_thread (List[int]): Number of elements per thread per dimension.
        threads_per_warp (List[int]): Number of threads per warp per dimension.
        warps_per_cta (List[int]): Number of warps per CTA per dimension.
        order (List[int]): The ordering of dimensions for partitioning.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): Ordering for CTAs.
    size_per_threadthreads_per_warpwarps_per_ctaorderNr   r   r   c                 `   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j	                             t          | j                  }t          | |           t          | j                  |k    sJ t          | j                  |k    sJ t          | j                  |k    sJ t          | j                  |k    sJ t          | j                  |k    sJ t          | j	                  |k    sJ d S )Nr7   r8   r9   r:   r   r   r   )superr   r   r7   r8   r9   r:   r   r   r   lenr   r   r   	__class__s     r   __post_init__zBlockedLayout.__post_init__:   s   -/CDDX/Y/YZZZ.0DTEZ0[0[\\\O-A$BT-U-UVVVG%9$*%E%EFFFN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNN4'((D$'''4())T11114%&&$....4:$&&&&4$%%----4%&&$....4>""d******r   c           	      ~    |                     | j        | j        | j        | j        | j        | j        | j                  S r   )get_blocked_layoutr7   r8   r9   r:   r   r   r   r/   s     r   r1   zBlockedLayout._to_irL   sB    )) !JN
 
 	
r   returnc                    d } || j                   } || j                  } || j                  } || j                  } || j                  } || j                  } || j                  }d| d| d| d| d| d| d| dS )Nc                 Z    | dS d                     t          t          |                     S N _joinmapstrxs    r   	stringifyz'BlockedLayout.mangle.<locals>.stringifyY   &    yr88CQKK(((r   B)r7   r8   r9   r:   r   r   r   )	r   rO   r7   r8   r9   r:   r   r   r   s	            r   r4   zBlockedLayout.mangleW   s    	) 	) 	)
 $)D$899$9T%:;;!	$"455	$*%% y!233!	$"455Idn--	{?{{%5{{{{{{P\{{_l{{ox{{{{r   c                 `   t          t          | j                  t          | j                  t          | j                  t          | j                  | j        rt          | j                  nd | j        rt          | j                  nd | j        rt          | j                  nd f          S r   )	hashtupler7   r8   r9   r:   r   r   r   r   s    r   __hash__zBlockedLayout.__hash__g   s    $&''$'(($$%%$*(,(9CE$#$$$t)-);EE$$%%%%)^=E$.!!!
   	r   r#   r$   r%   r&   r   int__annotations__r   r   r   r   r@   r1   rL   r4   rU   __classcell__r?   s   @r   r6   r6   $   s          #Y3i99(,L(49%,,,)-M8DI&---%)IxS	")))+ + + + +$	
 	
 	
| | | | | 	 	 	 	 	 	 	r   r6   c                   P     e Zd ZU dZeed<   eed<    fdZd Zde	fdZ
d Z xZS )	SliceLayoutz
    Represents a layout corresponding to slicing a distributed tensor along one dimension.

    Args:
        dim (int): The dimension index to slice.
        parent (DistributedLayout): The parent layout before slicing.
    dimparentc                     t                                          dt          | j                             t                                          dt          | j                             d S )Nr]   r^   )r<   r   r   r]   r^   r   r?   s    r   r@   zSliceLayout.__post_init__   sS    E#7#A#ABBBH&:4;&G&GHHHHHr   c                 h    |                     | j        | j                            |                    S r   )get_slice_layoutr]   r^   r1   r/   s     r   r1   zSliceLayout._to_ir   s2    ''HKw''
 
 	
r   rC   c                 L    d| j          d| j                                         dS )NSLrH   )r]   r^   r4   r   s    r   r4   zSliceLayout.mangle   s+    7DH77t{11337777r   c                 8    t          | j        | j        f          S r   )rS   r]   r^   r   s    r   rU   zSliceLayout.__hash__   s    TXt{+,,,r   r#   r$   r%   r&   rW   rX   r   r@   r1   rL   r4   rU   rY   rZ   s   @r   r\   r\   s   s           
HHHI I I I I
 
 
8 8 8 8 8- - - - - - -r   r\   c                        e Zd ZU dZeee                  ed<   eee                  ed<   eee                  ed<   eee                  ed<   ee         ed<    fdZd Zd	 Z	d
 Z
 xZS )DistributedLinearLayouta  
    Represents a linear distributed layout with explicit bases at register, lane, warp, and block levels.
    See: https://arxiv.org/abs/2505.23819 for reference.

    Args:
        reg_bases (List[List[int]]): Bases for register-level distribution.
        lane_bases (List[List[int]]): Bases for lane-level distribution.
        warp_bases (List[List[int]]): Bases for warp-level distribution.
        block_bases (List[List[int]]): Bases for block-level distribution.
        shape (List[int]): The tensor global shape.
    	reg_bases
lane_bases
warp_basesblock_basesshapec                 0   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t          | j                  }| j        D ]}t          |          |k    sJ | j        D ]}t          |          |k    sJ | j        D ]}t          |          |k    sJ | j        D ]}t          |          |k    sJ d S )Nri   rj   rk   rl   rm   )	r<   r   r   ri   rj   rk   rl   rm   r=   )r   r   basisr?   s      r   r@   z%DistributedLinearLayout.__post_init__   s|   Kt~)F)FGGGL-*H*HIIIL-*H*HIIIM=9I+J+JKKKG]4:%>%>???4:^ 	& 	&Eu::%%%%%_ 	& 	&Eu::%%%%%_ 	& 	&Eu::%%%%%% 	& 	&Eu::%%%%%	& 	&r   c                 f    |                     | j        | j        | j        | j        | j                  S r   )get_distributed_linear_layoutri   rj   rk   rl   rm   r/   s     r   r1   zDistributedLinearLayout._to_ir   s6    44T^T_VZVegkgw59ZA A 	Ar   c                 X    d| j          d| j         d| j         d| j         d| j         dS )NDLLrH   )ri   rj   rk   rl   rm   r   s    r   r4   zDistributedLinearLayout.mangle   s@    lT^lldollll$JZll]a]gllllr   c                 v   t          t          t          t          | j                            t          t          t          | j                            t          t          t          | j                            t          t          t          | j                            t          | j                  f          S r   )rS   rT   rK   ri   rj   rk   rl   rm   r   s    r   rU   z DistributedLinearLayout.__hash__   s    #eT^,,--#eT_--..#eT_--..#eT-..//$*
   	r   )r#   r$   r%   r&   r   rW   rX   r@   r1   r4   rU   rY   rZ   s   @r   rh   rh      s         
 
 DIT#YT#Yd3i   9& & & & &$A A Am m m      r   rh   c                   Z     e Zd ZU dZeed<   eed<   eed<    fdZd Zde	fdZ
d	 Z xZS )
DotOperandLayouta
  
    Represents a layout for a dot operand.

    Args:
        operand_index (int): 0 for LHS and 1 for RHS of the dot operation.
        parent (DistributedLayout): The parent layout, representing the MMA.
        k_width (int): Number of elements per 32-bits.
    operand_indexr^   k_widthc                 @   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             d S )Nrw   r^   rx   )r<   r   r   rw   r^   rx   r`   s    r   r@   zDotOperandLayout.__post_init__   sy    O-A$BT-U-UVVVH&:4;&G&GHHHI';DL'I'IJJJJJr   c                 t    |                     | j        | j                            |          | j                  S r   )get_dot_operand_layoutrw   r^   r1   rx   r/   s     r   r1   zDotOperandLayout._to_ir   s2    --d.@$+BTBTU\B]B]_c_klllr   rC   c                 \    d| j          d| j                                         d| j         dS )NDOrH   )rw   r^   r4   rx   r   s    r   r4   zDotOperandLayout.mangle   s6    PD&PP););)=)=PPPPPPr   c                 D    t          | j        | j        | j        f          S r   )rS   rw   r^   rx   r   s    r   rU   zDotOperandLayout.__hash__   s    T'dlCDDDr   rf   rZ   s   @r   rv   rv      s           LLLK K K K K
m m mQ Q Q Q QE E E E E E Er   rv   )r*   eqc                        e Zd ZU dZee         ed<   ee         ed<   ee         ed<   dZeee                  ed<   dZ	eee                  ed<   dZ
eee                  ed<    fd	Zd
 ZdefdZd Z xZS )NVMMADistributedLayouta  
    Represents a layout for NVIDIA MMA (tensor core) operations.

    Args:
        version (List[int]): Version identifier for the MMA instruction.
        warps_per_cta (List[int]): Number of warps per CTA.
        instr_shape (List[int]): Instruction shape for MMA.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    versionr9   instr_shapeNr   r   r   c                 \   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t          | j                  }t          | |           t          | j                  |k    sJ t          | j                  |k    sJ t          | j                  |k    sJ d S )Nr   r9   r   r   r   r   )r<   r   r   r   r9   r   r   r   r   r=   r   r>   s     r   r@   z$NVMMADistributedLayout.__post_init__   s^   I';DL'I'IJJJO-A$BT-U-UVVVM+?@P+Q+QRRRN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNN4%&&D$'''4$%%----4%&&$....4>""d******r   c                 r    |                     | j        | j        | j        | j        | j        | j                  S r   )get_mma_layoutr   r9   r   r   r   r   r/   s     r   r1   zNVMMADistributedLayout._to_ir  s;    %%dlD4FHY[_[m&*nd6FH H 	Hr   rC   c                 h    d| j          d| j         d| j         d| j         d| j         d| j         dS )NMMA_rH   _MMA)r   r9   r   r   r   r   r   s    r   r4   zNVMMADistributedLayout.mangle	  s     Kdl  K  KT%7  K  K$:J  K  KTM^  K  Kaeas  K  Kvz  wE  K  K  K  	Kr   c           
      :   t          t          | j                  t          | j                  t          | j                  | j        rt          | j                  nd | j        rt          | j                  nd | j        rt          | j                  nd f          S r   )rS   rT   r   r9   r   r   r   r   r   s    r   rU   zNVMMADistributedLayout.__hash__  s    U4<((%0B*C*C4+,,$J[.eeD4E.F.F.Fae262DNU4-...$.2nFU4>***$H I I 	Ir   rV   rZ   s   @r   r   r      s	        
 
 #Y9c(,L(49%,,,)-M8DI&---%)IxS	")))+ + + + +H H HK K K K KI I I I I I Ir   r   c                   (    e Zd ZdZed             ZdS )SharedLayoutz;
    Base class for shared memory layouts in Gluon IR.
    c                      t          |           S r   r   r   s    r   r    zSharedLayout.type  r!   r   Nr"   r(   r   r   r   r     r)   r   r   c                     | }|Wt          |          t          |           k    sJ t          t          |                    D ]}||xx         ||         z  cc<   |S r   )r=   r   )rm   r   shape_per_ctar]   s       r   _get_shape_per_ctar     su    M =!!SZZ////]++,, 	5 	5C#-"44r   c                       e Zd ZU dZeed<   eed<   eed<   dZeed<   dZeed<   dZ	e
ee                  ed	<   dZe
ee                  ed
<   dZe
ee                  ed<    fdZd Zee	 	 dd                        ZdefdZd Z xZS )NVMMASharedLayouta4  
    Represents a layout for shared memory suitable for NVIDIA MMA operations.

    Args:
        swizzle_byte_width (int): Width in bytes for swizzling.
        element_bitwidth (int): Bitwidth of element type.
        rank (int): Rank of the tensor.
        transposed (bool): Whether the layout is transposed.
        fp4_padded (bool): Whether FP4 padding is used.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    swizzle_byte_widthelement_bitwidthr   F
transposed
fp4_paddedNr   r   r   c                 >   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j	                             t                                          dt          | j
                             | j        d	v sJ | j        d
v sJ | j        }t          | |           t          | j                  |k    sJ t          | j	                  |k    sJ t          | j
                  |k    sJ d S )Nr   r   r   r   r   r   r   r   )          @   )r   r   r      )r<   r   r   r   r   r   r   r   r   r   r   r   r=   r>   s     r   r@   zNVMMASharedLayout.__post_init__?  s   02FtG^2_2_```.0DTEZ0[0[\\\F$8$C$CDDDL*>t*O*OPPPL*>t*O*OPPPN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNN$7777&*:::::yD$'''4$%%----4%&&$....4>""d******r   c           	      ~    |                     | j        | j        | j        | j        | j        | j        | j                  S r   )get_nvmma_shared_layoutr   r   r   r   r   r   r   r/   s     r   r1   zNVMMASharedLayout._to_irQ  sA    ..#!OON
 
 	
r   c           
         |rdnd}t          | |          }t          |           }	|r|dd         |dd         z   }|d         |z  }
|
|j        z  dz  }|dk    r|dz  dk    rd}n&|dk    r|dz  dk    rd}n|d	k    r|d	z  dk    rd	}nd}d}|dd         D ]}||z  }t          |           dk     s|dk     rd}t          ||j        |	|||||
          S )zReturns an NVMMASharedLayout with default swizzling for a given shape.

        This picks the largest swizzle pattern compatible with the shape, which
        allows emitting the fewest TMA or MMA messages.
           r   Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r=   primitive_bitwidthr   )block_shapedtyper   r   r   r   r   packing_factorr   r   contig_dim_sizecontig_dim_bytesr   flatten_outer_dimsizes                  r   get_default_forz!NVMMASharedLayout.get_default_for\  si    )/a*;FF; 	B)!""-bqb0AAM'+n<*U-EEJs""'7#'='B'B!$##(82(=(B(B!###(82(=(B(B!#!"!#2#& 	& 	&D%{a#4q#8#8!" 1"5!!%'	
 	
 	
 		
r   rC   c           	      H    d| j          d| j         d| j         d| j         d	S )NNVMMA_rH   _NVMMA)r   r   r   r   r   s    r   r4   zNVMMASharedLayout.mangle  s7    s/ss$2Gss$/ss\`\kssssr   c                    t          | j        | j        | j        | j        | j        | j        rt          | j                  nd | j        rt          | j                  nd | j	        rt          | j	                  nd f          S r   )
rS   r   r   r   r   r   r   rT   r   r   r   s    r   rU   zNVMMASharedLayout.__hash__  s    T,d.CTYPTP_aeap151BLU4,---262DNU4-...$.2nFU4>***$H I I 	Ir   )FFNNN)r#   r$   r%   r&   rW   rX   r   boolr   r   r   r   r   r   r@   r1   staticmethodr	   r   rL   r4   rU   rY   rZ   s   @r   r   r   '  sI          
IIIJJ(,L(49%,,,)-M8DI&---%)IxS	")))+ + + + +$	
 	
 	
 qu"&&
 &
 &
  \&
Pt t t t tI I I I I I Ir   r   c                        e Zd ZU dZeed<   eed<   eed<   ee         ed<   dZeee                  ed<   dZ	eee                  ed<   dZ
eee                  ed	<    fd
Zd ZdefdZd Z xZS )SwizzledSharedLayouta  
    Represents a generic swizzled shared memory layout.

    Args:
        vec (int): Vector width for swizzling.
        per_phase (int): Elements per swizzle phase.
        max_phase (int): Maximum number of swizzle phases.
        order (List[int]): Dimension ordering for swizzling.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    vec	per_phase	max_phaser:   Nr   r   r   c                    t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j	                             t          | j                  }t          | |           t          | j                  |k    sJ t          | j                  |k    sJ t          | j	                  |k    sJ d S )Nr   r   r   r:   r   r   r   )r<   r   r   r   r   r   r:   r   r   r   r=   r   r>   s     r   r@   z"SwizzledSharedLayout.__post_init__  s~   E#7#A#ABBBK)=dn)M)MNNNK)=dn)M)MNNNG%9$*%E%EFFFN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNN4:D$'''4$%%----4%&&$....4>""d******r   c           	      ~    |                     | j        | j        | j        | j        | j        | j        | j                  S r   )get_swizzled_shared_layoutr   r   r   r:   r   r   r   r/   s     r   r1   zSwizzledSharedLayout._to_ir  s?    11HNNJN
 
 	
r   rC   c                     d }d| j          d| j         d| j         d || j                   d || j                   d || j                   d || j                   dS )Nc                 Z    | dS d                     t          t          |                     S rF   rI   rM   s    r   rO   z.SwizzledSharedLayout.mangle.<locals>.stringify  rP   r   SSS_rH   _SSS)r   r   r   r:   r   r   r   r   rO   s     r   r4   zSwizzledSharedLayout.mangle  s    	) 	) 	)
 zdh  z  z  z  z$.  z  z99TZCXCX  z  z[d[deiev[w[w  z  z  {D  {D  EI  EW  {X  {X  z  z  [d  [d  ei  es  [t  [t  z  z  z  	zr   c                    t          | j        | j        | j        t	          | j                  | j        rt	          | j                  nd | j        rt	          | j                  nd | j        rt	          | j                  nd f          S r   )	rS   r   r   r   rT   r:   r   r   r   r   s    r   rU   zSwizzledSharedLayout.__hash__  s    TXt~t~4:&&DDU(_d.?(@(@(@[_262DNU4-...$.2nFU4>***$H I I 	Ir   )r#   r$   r%   r&   rW   rX   r   r   r   r   r   r@   r1   rL   r4   rU   rY   rZ   s   @r   r   r     s          
HHHNNNNNN9(,L(49%,,,)-M8DI&---%)IxS	")))+ + + + +	
 	
 	
z z z z zI I I I I I Ir   r   c                        e Zd ZU dZeee                  ed<   ee         ed<   dZeee                  ed<   dZ	eee                  ed<   dZ
eee                  ed<    fdZd	 Zd
efdZd Zd Z xZS )PaddedSharedLayouta  
    Represents a layout for the access to shared memory. Compared to SwizzledSharedLayout,
    it uses padding to avoid shared memory bank conflicts. After every interval tensor elements,
    the corresponding number of padding elements are inserted.
    If a position corresponds to multiple intervals, the padding amounts are summed.

    In the following example of a tensor,
    `eM` represents original elements in the and `pN` represents padded element.

    Before padding, the shared memory looks like:
    [e0, e1,
     e2, e3,
     e4, e5,
     e6, e7,
     ...]

    After padding with interval-padding list [[2, 1], [4, 2]],
    the shared memory will be
    [e0, e1, p0,
     e2, e3, p1, p2, p3,
     e4, e5, p4,
     e6, e7, p5, p6, p7,
     ...]

    Args:
        interval_padding_pairs (List[int]): List of [interval, padding] pair and both interval and padding must be powers of 2.
        order (List[int]): Order of logical tensor dimensions; fastest-varying first.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    interval_padding_pairsr:   Nr   r   r   c                 8   t                                          dt          | j                             t                                          dt	          | j                             t                                          dt	          | j                             t                                          dt	          | j                             t                                          dt	          | j                             | 	                                 d S )Nr   r:   r   r   r   )
r<   r   r   r   r   r:   r   r   r   verifyr`   s    r   r@   z PaddedSharedLayout.__post_init__  s    4mDD_6`6`aaaG%9$*%E%EFFFN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNNr   c                     t          | j         \  }}|                    ||| j        | j        | j        | j                  S r   )zipr   get_padded_shared_layoutr:   r   r   r   )r   r0   	intervalspaddingss       r   r1   zPaddedSharedLayout._to_ir   sH    !4#>?	8//	8TZQUQbdhdv04@ @ 	@r   rC   c                     d }d || j                    d || j                   d || j                   d || j                   d || j                   dS )Nc                 Z    | dS d                     t          t          |                     S rF   rI   rM   s    r   rO   z,PaddedSharedLayout.mangle.<locals>.stringify  rP   r   PaddedShared_rH   _PaddedShared)r   r:   r   r   r   r   s     r   r4   zPaddedSharedLayout.mangle  s    	) 	) 	)
 Hyy)DEE  H  H		RVR\H]H]  H  H`i`ijnj{`|`|  H  H  @I  @I  JN  J\  @]  @]  H  H  `i  `i  jn  jx  `y  `y  H  H  H  	Hr   c                    | j         }t          |          dk    s
J d            t          d |D                       sJ t          | \  }}t	          t          |                    }t          |          t          |          k    sJ d t          fd|D                       s
J d            t          fd|D                       s
J d            t          | j                  }|dk    s
J d	            t          | |           t          | j                  |k    sJ t          | j	                  |k    sJ t          | j
                  |k    sJ d S )
Nr   zVPaddedSharedLayout interval_padding_pairs must have at least one interval-padding pairc              3   <   K   | ]}t          |          d k    V  dS )r   N)r=   ).0pairs     r   	<genexpr>z,PaddedSharedLayout.verify.<locals>.<genexpr>  s,      44d3t99>444444r   c                 &    | dk    o| | dz
  z  dk    S )Nr   r   r(   )ns    r   <lambda>z+PaddedSharedLayout.verify.<locals>.<lambda>  s    !a%"<AQK1,< r   c              3   .   K   | ]} |          V  d S r   r(   r   r   is_power_of_2s     r   r   z,PaddedSharedLayout.verify.<locals>.<genexpr>  s-      77==##777777r   z;PaddedSharedLayout interval values must all be power of twoc              3   .   K   | ]} |          V  d S r   r(   r   s     r   r   z,PaddedSharedLayout.verify.<locals>.<genexpr>  s-      66==##666666r   z:PaddedSharedLayout padding values must all be power of twoz*PaddedSharedLayout order must not be empty)r   r=   allr   r   setr:   r   r   r   r   )r   pairsr   r   unique_intervalsr   r   s         @r   r   zPaddedSharedLayout.verify  s   +5zzA~~~w~~~44e44444444!5k	8I//#$$I6666<<7777Y77777vv9vvvv6666X66666tt8tttt4:axxxExxxD$'''4$%%----4%&&$....4>""d******r   c           	      :   t          t          t          t          | j                            t          | j                  | j        rt          | j                  nd | j        rt          | j                  nd | j        rt          | j                  nd f          S r   )rS   rT   rK   r   r:   r   r   r   r   s    r   rU   zPaddedSharedLayout.__hash__#  s    U3ud&ABBCC4:&&DDU(_d.?(@(@(@[_262DNU4-...$.2nFU4>***$H I I 	Ir   )r#   r$   r%   r&   r   rW   rX   r   r   r   r   r@   r1   rL   r4   r   rU   rY   rZ   s   @r   r   r     s         > !cO+++9(,L(49%,,,)-M8DI&---%)IxS	")))    @ @ @
H H H H H+ + +*I I I I I I Ir   r   c                     dg|z  }| s|S d }| D ]T}t          d t          |          D             d           }||}||xx         dz  cc<   >|s|J ||xx         dz  cc<   U|S )Nr   c              3   ,   K   | ]\  }}|d k    |V  dS )r   Nr(   )r   ivs      r   r   z bases_per_dim.<locals>.<genexpr>5  s*      ==$!Qa1ffAffff==r   r   )next	enumerate)basesr   skip_broadcastresultnon_zero_idxro   idxs          r   bases_per_dimr   +  s    S4ZF L 	& 	&==)E"2"2===tDD?L3KKK1KKKK 	&+++<   A%   Mr   c                     t          | t                    r"t          | j        t	          |                    S t          | t
          t          f          rt          | j        |          S | j        S r   )	
isinstancerh   r   rk   r=   r\   rv   r9   r^   )r   rm   s     r   r9   r9   A  sa    &122 $V.E

;;;	F[*:;	<	< $V]E222##r   N)T)dataclassesr   typingr   r   triton.language.corer   r   r   triton.runtime.jitr	   r   r   r,   r6   r\   rh   rv   r   r   r   r   r   r   r   r9   r(   r   r   <module>r      s   ! ! ! ! ! ! ! ! ! ! ! ! ! ! T T T T T T T T T T 1 1 1 1 1 17 7 7$ $ $ $ $ $ $ $ $    "    $K K K K K% K K K\ $- - - - -# - - -8 $2 2 2 2 2/ 2 2 2j $E E E E E( E E E8 $4   ,I ,I ,I ,I ,I. ,I ,I ! ,I^$ $ $ $ $ $ $ $    $eI eI eI eI eI eI eI eIP $4   <I <I <I <I <I< <I <I ! <I~ $4   VI VI VI VI VI VI VI ! VIt   ,$ $ $ $ $r   