
    Pi                        d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ dgZ ed	           G d
 de                      ZdS )    )annotations)	dataclass)ListOptional)_unwrap_if_constexpr)_realize_cta_layoutDistributedLayout)languageAMDMFMALayoutT)frozenc                       e Zd ZU dZded<   ded<   ded<   ded<   ej        Zd	ed
<   dZded<   dZ	ded<   dZ
ded<   dZded<    fdZd ZddZd Zd Z xZS )r   a  
    Represents a layout for AMD MFMA (matrix core) operations.

    Args:
        version (int): Major and minor identifier for the MFMA instruction.
        instr_shape: (M, N) dimension for the instrinsic shape.
        transposed (bool): indicates the result tensor is transposed so that each thread holds consecutive elements in the same row instead of column, which is good for chained dot and global write.
        warps_per_cta (List[int]): Number of warps per CTA.
        elem_type Optional(ttgl.dtype): Supported types are int32, fp32 and fp64. Default is fp32.
        tiles_per_warp Optional(List[int]): Number of tiles per WARP. For mfma layout, if missing, use the default where we have unit tile size on all dimensions.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    intversionz	List[int]instr_shapebool
transposedwarps_per_ctaz
ttgl.dtype	elem_typeNzOptional[List[int]]tiles_per_warpctas_per_cgacta_split_num	cta_orderc                J   t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j                             t                                          dt          | j	                             t                                          dt          | j
                             t                                          d	t          | j                             | j        2t                              | dd
gt          | j                  z             |                                  d S )Nr   r   r   r   r   r   r   r   r      )super__setattr__r   r   r   r   r   r   r   r   r   r   objectlenverify)self	__class__s    /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/experimental/gluon/language/amd/_layouts.py__post_init__zAMDMFMALayout.__post_init__)   s   I';DL'I'IJJJM+?@P+Q+QRRRL*>t*O*OPPPO-A$BT-U-UVVV,.B4CV.W.WXXXK)=dn)M)MNNNN,@AR,S,STTTO-A$BT-U-UVVVK)=dn)M)MNNN&t%5sSAS=T=T7TUUU    c                    | j                             |          }|                    | j        | j        | j        | j        || j        | j        | j	        | j
        	  	        S N)r   to_irget_amd_mfma_layoutr   r   r   r   r   r   r   r   )r    buildertypes      r"   _to_irzAMDMFMALayout._to_ir9   s_    ~##G,,**4<9I4?\`\npt+/+>@QSWSegkguw w 	wr$   returnstrc                
   d }d| j          d || j                   d| j         d || j                   d || j                   d| j         d || j                   d || j                   d || j                   dS )Nc                Z    | dS d                     t          t          |                     S )N _)joinmapr-   )xs    r"   	stringifyz'AMDMFMALayout.mangle.<locals>.stringify@   s&    yr88CQKK(((r$   MFMA_r1   _MFMA)	r   r   r   r   r   r   r   r   r   )r    r5   s     r"   manglezAMDMFMALayout.mangle>   sf   	) 	) 	)
 Ht|  H  Hii0@&A&A  H  HDO  H  HV_V_`d`rVsVs  H  Hvv  AE  AT  wU  wU  H  H  X\  Xf  H  H  ir  ir  sw  sD  iE  iE  H  H  HQ  HQ  RV  Rd  He  He  H  H  hq  hq  rv  r@  hA  hA  H  H  H  	Hr$   c                6   | j         dk    r| j         dk    s
J d            ddgddgddgddgg}| j        |v sJ dt          |          z               | j                                        s<| j                                        s#| j                                        s
J d            t          | j                  }t          | |           t          | j
                  |k    sJ t          | j                  |k    sJ t          | j                  |k    sJ d S )	Nr      z#version must be in the [1, 4] range       @   z-invalid intrinsic shape; accepted shapes are z/element type must be float32, float64, or int32)r   r   r-   r   is_fp32is_fp64is_int32r   r   r   r   r   r   )r    valid_shapesranks      r"   r   zAMDMFMALayout.verifyG   sN   |q  T\Q%6%6%68]%6%6%6R2r(RGaW=<///1`cfgsctct1t///~%%'' 	[4>+A+A+C+C 	[^$$&&	[ 	[)Z	[ 	[ 	[ 4%&&D$'''4$%%----4%&&$....4>""d******r$   c                p   t          | j        t          | j                  | j        t          | j                  | j        | j        rt          | j                  nd | j        rt          | j                  nd | j	        rt          | j	                  nd | j
        rt          | j
                  nd f	          S r&   )hashr   tupler   r   r   r   r   r   r   r   )r    s    r"   __hash__zAMDMFMALayout.__hash__U   s    L$"##O$$%%N*.*=GE$%&&&4(,(9CE$#$$$t)-);EE$$%%%%)^=E$.!!!

 
 
 
	r$   )r,   r-   )__name__
__module____qualname____doc____annotations__ttglfloat32r   r   r   r   r   r#   r+   r8   r   rF   __classcell__)r!   s   @r"   r   r      s          LLL LI((((*.N....(,L,,,,)-M----%)I))))     w w w
H H H H+ + +      r$   N)
__future__r   dataclassesr   typingr   r   triton.language.corer   +triton.experimental.gluon.language._layoutsr   r	   triton.experimental.gluonr
   rL   __all__r    r$   r"   <module>rW      s    " " " " " " ! ! ! ! ! ! ! ! ! ! ! ! ! ! 5 5 5 5 5 5 ^ ^ ^ ^ ^ ^ ^ ^ 6 6 6 6 6 6 
 $P P P P P% P P P P Pr$   