
    PiW                       d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ  ed	          Z ed
          Z G d de          Z G d de	e                   ZdS )    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                                         z   dz   | j                                        z   | _        t	          t
          |                               | j                   d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      p/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sl    2T[5I5I5K5KKgUX\XcXlXlXnXnn'..77EEEEE    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      sA        F F F F F F F F Fr   r   c                  B   e Zd ZU ej        Zded<   eZded<   d Zdd
ZddZ	ddZ
ddZdddZddZ	 	 ddd"Zdd%Zdd)Zdd*Zdd+Zdd,Zdd-Zdd/Zdd0Zdd5Zdd6Zdd9Zdd:Zdd;Zdd<Zdd=Zdd>Zdd?Zdd@Z ddAZ!ddBZ"ddCZ#ddDZ$ddEZ%ddFZ&ddIZ'ddJZ(ddKZ)ddLZ*ddMZ+ddNZ,ddOZ-dPdQddUZ.ddWZ/ddXZ0dd[Z1d d]Z2dd^Z3ddaZ4ddbZ5ddcZ6ddfZ7ddgZ8ddjZ9ddkZ:d	dlZ;d
doZ<ddqZ=dddsZ>dt Z?du Z@dv ZAdw ZBdx ZCdy ZDdz ZEd{ ZFd| ZGddZHddZIddZJddZKddZLd ZMd ZNddZOddZPddZQddZRddZSddZTddZUd ZVd ZWddZXddZYddZZddZ[ddZ\ddZ]ddZ^ddZ_ddZ`ddZaddZbd ZcddZdddZeddZfddZgddZhd ZiddZjddĄZkddǄZldȄ Zmd d΄Znd!dЄZod"d҄Zpd"dӄZqd"dԄZrd#dՄZsd$dڄZtd%d݄Zud#dބZvd߄ ZwddZxd&dZyd&dZz	 d'd(dZ{dPS ()  TritonSemanticzType[TensorTy]tensorz
ir.builderbuilderc                    || _         d S N)r'   )r   r'   s     r   r   zTritonSemantic.__init__   s    r   axisintreturnr   c                    |dvrt          d|           |                     | j                            |          t          j                  S )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr&   r'   create_get_program_idtlint32r   r*   s     r   
program_idzTritonSemantic.program_id&   sJ    y  Q4QQRRR{{4<==dCCRXNNNr   c                    |dvrt          d|           |                     | j                            |          t          j                  S )Nr.   z-num_programs axis must be 0, 1, or 2 but got )r/   r&   r'   create_get_num_programsr1   r2   r3   s     r   num_programszTritonSemantic.num_programs+   sJ    y  STSSTTT{{4<??EErxPPPr   a_tytl.dtypeb_tyc                   |j         }|j         }|j        }|j        }||k    r
||k    r|n|S |t          j        j        j        k    r
||k    r|n|S |t          j        j        j        k    r
||k    r|n|S t          d| d|           )Nzunexpected signedness r   )int_bitwidthint_signednessr1   dtype
SIGNEDNESSUNSIGNED	TypeError)r   r8   r:   a_rankb_ranka_snb_sns          r   integer_promote_implz#TritonSemantic.integer_promote_impl4   s    """" 4<<!F??444RX(111!V++445RX(111!V++445BBBDBBCCCr   a_is_scalarboolb_is_scalar
div_or_modc                   ||k    rk|r||fn||f\  }}|                                 j        |                                 j        k    r*|r&|t          j        t          j        fv rt          j        S |S |                                s|                                rt          j        S |                                s|                                rt          j        S |	                                s|	                                r|rt          j        S t          j        S |
                                r.|
                                r|rt          j        S t          j        S |
                                s|
                                rt          j        S |                                r(|                                r||k    r|nt          j        S |                                r|                                st          d| d|           |rO|j        |j        k    r?t          d|                                z   dz   |                                z   dz             |                     ||          S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer1   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intrA   r=   r   rF   )r   r8   rG   r:   rI   rJ   	scalar_ty	tensor_tys           r   computation_type_implz$TritonSemantic.computation_type_implC   sO   
 +%%3>#PD$<<T4L Iy~~%)9)9)??? &9R[0I#I#I:%   <<>> 	T\\^^ 	: <<>> 	T\\^^ 	: <<>> 	"T\\^^ 	" "z!z!<<>> 	#dllnn 	# #z!{"<<>> 	T\\^^ 	:;;== 	8T[[]] 	84<<44RZ7{{}} 	BDKKMM 	B@t@@$@@AAA  	p$-1DDD9DMMOOKgUX\XeXeXgXggoo p p p ((t444r   T
check_typec                   t          |t                    r8|                     | j                            |          t
          j                  S t          |t                    rd|cxk    rdk     rn nt
          j        }njd|cxk    rdk     rn nt
          j	        }nMd|cxk    rdk     rn nt
          j
        }n0d|cxk    rdk     rn nt
          j        }nt          d| d          |                     ||	          S t          |t                    r~d
}dddz  z  }t          d         |          }|t          d          k    s|dk    s||k    s||cxk    r|k    rn nt
          j        }nt
          j        }|                     ||	          S t          |t
          j                  r|                     |j                  S t          || j                  r|S |r#t+          d| dt-          |           d          |S )N           l                             l            zNonrepresentable integer .r>   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstancerH   r&   r'   get_int1r1   int1r+   r2   uint32int64uint64r/   scalar_constantfloat__builtins__rR   rT   	constexpr	to_tensorrO   rA   type)r   xr]   r>   min_float32max_float32abs_xs          r   rr   zTritonSemantic.to_tensoru   sG   a 	;;t|44Q77AAA3 	""""U"""""!####e#####	1$$$$u$$$$$!####e#####	 !AQ!A!A!ABBB'''7775!! 	!K%C/K '**Ee$$||Avve2222{22222

'''7772<(( 	>>!'***4;'' 	H 	OMaMM$q''MMMNNNr   r   r   allow_ptr_aNonec                    |                                 r`|st          ||          |                                 r||k    rt          ||          |                                rt          ||          d S d S r)   )is_ptrr   is_floating)r   r   r   rx   s       r   check_ptr_type_implz"TritonSemantic.check_ptr_type_impl   s    ==?? 	@ @/???}} @Ff$4$4/???!!## @/???	@ 	@@ @r   FlhsTensorTy | numbers.NumberrhsTuple[TensorTy, TensorTy]c                   t          |t          j                  }t          |t          j                  }|r|}	|                     |          }|r|}
|                     |          }|j        j        }|j        j        }|                     |||           |                     |||           |r|                                s{|                                sf|                     |||||          }|r|	dk     r|	                                s|r)|
dk     r#|	                                rt          d          |                                r|rH|                                |	cxk    r|                                k    sn t          d|	 d|           |rH|                                |
cxk    r|                                k    sn t          d|
 d|           |r|                     |	|          n|                     ||          }|r|                     |
|          n|                     ||          }|                     ||          \  }}||fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type rd   )rh   numbersNumberrr   rs   scalarr}   r{   r\   is_int_unsignedr/   rY   get_int_min_valueget_int_max_valuern   castbroadcast_impl_value)r   r~   r   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrJ   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implz+TritonSemantic.binary_op_type_checking_impl   s    #377"377 	&J..%%C 	&J..%%C X_
X_
  ZGGG  ZGGG 	vJ$5$5$7$7 	v
@Q@Q@S@S 	v33Jz[hjtuuJ L*q..Z5O5O5Q5Q.$ #1)3aJ<V<V<X<X  "K L L L  "" c  c**F*F*H*HJ +I +I +I +I*4*F*F*H*H+I +I +I +I$%az%a%aU_%a%abbb  c**F*F*H*HJ +I +I +I +I*4*F*F*H*H+I +I +I +I$%az%a%aU_%a%abbbHUu$&&z&DDD[_[d[dehjt[u[uCHUu$&&z&DDD[_[d[dehjt[u[uC ,,S#66SCxr   	binary_opcallablec                   |j         j        j        dk    s| j        j        j        sd S |j         j        }|j         j        }||k    sJ |                                sJ |                     |t          j	                  }|                     |t          j	                  } |||d          }|
                                }|                     |t          j	                  }|                                }|                     |t          j	                  }|                     |                     ||          |                     ||                    }	d|j         d|j         }
|                     |	|
d            d S )N@   Fr+   z! overflow detected for operation )rs   r   r<   r'   optionssanitize_overflowrY   r   r1   rl   r   rn   r   and_
less_equalgreater_equalr    device_assert)r   r~   r   r   r   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implz/TritonSemantic.binary_op_sanitize_overflow_impl   sY   8?'2--T\5I5[-FX_
X_
Z''''  """""iiRX&&iiRX&&iS%((0022	((BH==	0022	((BH==	yyi88$:L:LSR[:\:\]]bJ+bbiN`bb4d+++++r   inputotherr   c                   |                      ||dd          \  }}|j        j        }|j        j        }|                                r#|                                rt	          d          |                                r0|                                s||}}|j        j        }|j        j        }|                                r|j        }|j                                        rm|j        j        dk     r]|j        	                    t          j                                      | j                  }| j                            |j        |d          }|                     | j                            |j        |          |j                  S |                                r>|                     | j                            |j        |j                  |j                  S |                                r\|r|                     ||| j                   |                     | j                            |j        |j                  |j                  S t	          d|           )NTzcannot add pointers togetherr   FrL   )r   rs   r   r{   rA   handler>   r   r<   with_element_tyr1   rl   to_irr'   create_int_castr&   create_addptrr|   create_faddrY   r   add
create_add)r   r   r   r   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   zTritonSemantic.add   s    88tTRRu*+*+!!## 	<(>(>(@(@ 	<:;;; !!## 	0O,B,B,D,D 	0 %5E#j/O#j/O!!## 	` <L{**,, Y1IB1N1N33BH==CCDLQQ#|;;EL&RWXX;;t|99%,UUW\Wabbb((** 	`;;t|77elSSUZU_```##%% 	`  N55eUDHMMM;;t|66u|U\RRTYT^___<?<<===r   c                v   |                      ||dd          \  }}|j        j        }|                                r+|                     ||                     |          d          S |                                r>|                     | j        	                    |j
        |j
                  |j                  S |                                r\|r|                     ||| j                   |                     | j                            |j
        |j
                  |j                  S t          d|           )NTF)r   rL   )r   rs   r   r{   r   minusr|   r&   r'   create_fsubr   rY   r   sub
create_subrA   r   r   r   r   rZ   s        r   r   zTritonSemantic.sub   s"   88tUSSuJ%	 	O88E4::e#4#48NNN  "" 	`;;t|77elSSUZU_``` 	`  N55eUDHMMM;;t|66u|U\RRTYT^___6966777r   c                   |                      ||          \  }}|j        j        }|                                r>|                     | j                            |j        |j                  |j                  S |                                r\|r| 	                    ||| j
                   |                     | j                            |j        |j                  |j                  S t          d|           NrL   )r   rs   r   r|   r&   r'   create_fmulr   rY   r   mul
create_mulrA   r   s        r   r   zTritonSemantic.mul  s    88FFuJ%	  "" 	`;;t|77elSSUZU_``` 	`  N55eUDHMMM;;t|66u|U\RRTYT^___6966777r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r,|                                r|                     ||          }n |                                r+|                                r|                     ||          }n|                                rU|                                rA|                     |t          j                  }|                     |t          j                  }nx|                                rR|                                r>|j        |j        k    r|                     ||          }n)|                     ||          }nt          d|           | 
                    | j                            |j        |j                  |j                  S NFTrL   )r   rs   r   r|   rY   r   r1   rR   fp_mantissa_widthrA   r&   r'   create_fdivr   )r   r   r   r   r   s        r   truedivzTritonSemantic.truediv  s   88ueUY[_``u*+*+&&(( 	B_-C-C-E-E 	BIIe_55EE##%% 	B/*E*E*G*G 	BIIe_55EE##%% 	B/*@*@*B*B 	BIIeRZ00EIIeRZ00EE((** 	B/J/J/L/L 	B0?3TTT		%99		%99 @@@AAA{{4<33EL%,OOQVQ[\\\r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r|                                r|                     ||          }|                     ||          }|                     ||          }|                                r>|                     | j        	                    |j
        |j
                  |j                  S |                     | j                            |j
        |j
                  |j                  S t          d|           r   )r   rs   r   rY   rF   r   is_int_signedr&   r'   create_sdivr   create_udivrA   )r   r   r   r   r   ret_tys         r   floordivzTritonSemantic.floordiv7  s%   88ueUY[_``u*+*+!!## 	e(>(>(@(@ 	e..PPFIIeV,,EIIeV,,E##%% e{{4<#;#;EL%,#W#WY^Ycddd{{4<#;#;EL%,#W#WY^Ycddd<?<<===r   ieee_roundingc                Z   |j         j        }|j         j        }|                                r|                                st          d          |                     ||dddd          \  }}| j                            |j        |j                  }|                     ||j                   S )Nz4both operands of fdiv must have floating scalar typeFT)	rs   r   r|   rA   r   r'   r   r   r&   )r   r   r   r   r   r   r   s          r   fdivzTritonSemantic.fdivE  s    *+*+**,, 	TO4O4O4Q4Q 	TRSSS88ueUZ\`aaul&&u|U\BB{{3
+++r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r>|                     | j                            |j        |j                  |j                  S |                                r|j	        |j	        k    r?t          d|                                z   dz   |                                z   dz             |                                r>|                     | j                            |j        |j                  |j                  S |                     | j                            |j        |j                  |j                  S t          d|           )NFTzCannot mod z by rM   rL   )r   rs   r   r|   r&   r'   create_fremr   rY   r=   rA   r   r   create_sremcreate_urem)r   r   r   rZ   r   s        r   modzTritonSemantic.modN  sx   88ueUY[_``uJ%	*+  "" 	e;;t|77elSSUZU_``` 	e'?+III	0B0B0D0D Dv MP_PhPhPjPj j ns !s t t t &&(( e{{4<#;#;EL%,#W#WY^Ycddd{{4<#;#;EL%,#W#WY^Ycddd6966777r   rt   ypropagate_nantl.PropagateNanc                F   |                      ||          \  }}|j        }|                                r|t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S |t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S t          d|           |                                r>|                     | j                            |j	        |j	                  |j
                  S |                                r>|                     | j                            |j	        |j	                  |j
                  S t%          d|           NzUnexpected propagate_nan Unexpected dtype )r   r>   r|   r1   PropagateNanALLr&   r'   create_minimumfr   rs   NONEcreate_minnumfr/   r   create_minsir   create_minuirA   r   rt   r   r   r>   s        r   minimumzTritonSemantic.minimume  `   00A661 	9 333{{4<#?#?!(#S#SUVU[\\\"/"666{{4<#>#>qx#R#RTUTZ[[[ !L]!L!LMMM  "" 	9;;t|8818LLafUUU""$$ 	9;;t|8818LLafUUU777888r   c                F   |                      ||          \  }}|j        }|                                r|t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S |t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S t          d|           |                                r>|                     | j                            |j	        |j	                  |j
                  S |                                r>|                     | j                            |j	        |j	                  |j
                  S t%          d|           r   )r   r>   r|   r1   r   r   r&   r'   create_maximumfr   rs   r   create_maxnumfr/   r   create_maxsir   create_maxuirA   r   s        r   maximumzTritonSemantic.maximumv  r   r   minmaxc                ~   |                      ||          \  }}|                      ||          \  }}|                      ||          \  }}|j        }|                                rE|                     | j                            |j        |j        |j        |          |j                  S t          d| d          )Nr   z(. Only floating point clamp is supported)	r   r>   r|   r&   r'   create_clampfr   rs   rA   )r   rt   r   r   r   r>   s         r   clampzTritonSemantic.clamp  s    44S#>>S221c::3221c::3 	a;;t|99!(CJPSPZ\ijjlmlrsss____```r   c                x   |                      ||          \  }}|j        j        }|j        j        }|                                r|                                st	          ||          |                     ||          }||k    r|                     ||          }||k    r|                     ||          }||fS r)   )r   rs   r   rY   r   rF   r   )r   r   r   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implz,TritonSemantic.bitwise_op_type_checking_impl  s    88FFuz(z(""$$ 	HL,?,?,A,A 	H+L,GGG..|\JJ
%%IIeZ00E%%IIeZ00Ee|r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_andr   rs   r   r   r   s      r   r   zTritonSemantic.and_  I    99%GGu{{4<225<NNPUPZ[[[r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   	create_orr   rs   r   s      r   or_zTritonSemantic.or_  sG    99%GGu{{4<11%,MMuzZZZr   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_xorr   rs   r   s      r   xor_zTritonSemantic.xor_  r   r   c                   |j                                         s |                     |t          j                  }|j                                         s |                     |t          j                  }|                     ||          S r)   )rs   is_int1bitcastr1   rj   r   r   s      r   logical_andzTritonSemantic.logical_and  sl    z!!## 	1LL00Ez!!## 	1LL00Eyy&&&r   c                   |j                                         s |                     |t          j                  }|j                                         s |                     |t          j                  }|                     ||          S r)   )rs   r   r   r1   rj   r   r   s      r   
logical_orzTritonSemantic.logical_or  sl    z!!## 	1LL00Ez!!## 	1LL00Exxu%%%r   c                    |j                                         s |                     |t          j                  }|                     |          S r)   )rs   r   r   r1   rj   invertr   r   s     r   not_zTritonSemantic.not_  s?    z!!## 	1LL00E{{5!!!r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   create_lshrr   rs   r   s      r   lshrzTritonSemantic.lshr  I    99%GGu{{4<33EL%,OOQVQ[\\\r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   create_ashrr   rs   r   s      r   ashrzTritonSemantic.ashr  r	  r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_shlr   rs   r   s      r   shlzTritonSemantic.shl  r   r   c                    |S r)    r  s     r   pluszTritonSemantic.plus  s    r   c                J   |j         j        }|                                r't          d|                                z   dz             |                     | j                            |                    | j                            |          }| 	                    ||d          S )Nz$wrong type argument to unary minus ()T)
rs   r   r{   r/   r   r&   r'   get_null_valuer   r   )r   r   r   _0s       r   r   zTritonSemantic.minus  s    z(   	eClF[F[F]F]]`ccddd[[44\5G5G5U5UVVXdeexxE4(((r   c                p   |j         j        }|                                s|                                r't	          d|                                z   dz             |                     | j                            |	                    | j                            |          }| 
                    ||          S )Nz%wrong type argument to unary invert (r  )rs   r   r{   r|   r/   r   r&   r'   get_all_ones_valuer   r   )r   r   r   _1s       r   r  zTritonSemantic.invert  s    z(   	fL$<$<$>$> 	fD|G\G\G^G^^addeee[[889K9KDL9Y9YZZ\hiiyy###r   vtl.block_typec                J    |j                             t          j                  S r)   )rs   r   r1   rj   )r   r  s     r   
_bool_likezTritonSemantic._bool_like  s    v%%bg...r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOGTr   r  rY   r   create_icmpSGTcreate_icmpUGTrA   r   r   r   rZ   s       r   greater_thanzTritonSemantic.greater_than  )   88FFuJ%	  "" 	t;;t|::5<VVX\XgXghmXnXnooo 	t&&(( t{{4<#>#>u|U\#Z#Z\`\k\klq\r\rsss{{4<#>#>u|U\#Z#Z\`\k\klq\r\rsss6966777r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOGEr   r  rY   r   create_icmpSGEcreate_icmpUGErA   r"  s       r   r   zTritonSemantic.greater_equal  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOLTr   r  rY   r   create_icmpSLTcreate_icmpULTrA   r"  s       r   	less_thanzTritonSemantic.less_than  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOLEr   r  rY   r   create_icmpSLEcreate_icmpULErA   r"  s       r   r   zTritonSemantic.less_equal  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                rL|                     | j        
                    |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOEQr   r  rY   create_icmpEQrA   r"  s       r   equalzTritonSemantic.equal"      88FFuJ%	  "" 	o;;t|::5<VVX\XgXghmXnXnooo 	o;;t|99%,UUW[WfWfglWmWmnnn6966777r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                rL|                     | j        
                    |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpUNEr   r  rY   create_icmpNErA   r"  s       r   	not_equalzTritonSemantic.not_equal-  r6  r   N)r   startendr   c               (   t          |t                    rt          |t                    st          d          t          |dz	            }t          |dz	            }|s|rt          d          ||k    rt          d          ||z
  }||dz
  z  dk    rt          d          |g}|t	          j        t          j        |          }|                    | j                  }| 	                    | j        
                    |||          |          S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)rh   r+   r/   rH   r1   
block_typer2   r   r'   r&   create_make_range)	r   r;  r<  r   is_start_int64is_end_int64rangeshape	ret_ty_irs	            r   arangezTritonSemantic.arange<  s   %%% 	PZS-A-A 	PNOOOerk**C2I 	9\ 	97888%<<\]]]eUQYA%%BCCC>]28U33FLL..	{{4<99)UCPPRXYYYr   r>   c                   |t          d          |dk    r3| j                            |                    | j                            }n(t	          | j        d|j                   } ||          }|                     ||          S )Nz2dtype must be specified when value is not a tensorr   get_)r/   r'   r  r   getattrnamer&   )r   rO   r>   get_value_fns       r   rn   zTritonSemantic.scalar_constantN  s    =QRRRA::L//DL0I0IJJEE"4<1D
1D1DEEL L''E{{5%(((r   c                    t          |t          j                  r0|j        j        dk    s
J d            |                     ||          S |                     ||          S )Nr   zonly accepts size-1 tensor)rh   r1   r&   numelrO   r   rn   )r   rO   r>   s      r   make_scalarzTritonSemantic.make_scalarY  s_    eRY'' 	+;$)))+G)))99UE***##E5111r   rD  	List[int]c                V    |                      |                     ||          |          S r)   )splatrN  )r   rD  rO   r>   s       r   fullzTritonSemantic.full`  s&    zz$**5%88%@@@r   rO   c                >   |j                                         r
J d            t          |          dk    r|S t          j        |j        |          }|                     | j                            |	                    | j                  |j
                  |          S )NzCannot splat a block tensorr   )rs   is_blocklenr1   r?  r>   r&   r'   create_splatr   r   )r   rO   rD  r   s       r   rQ  zTritonSemantic.splatg  s    :&&((GG*GGGGu::??Lu{E22{{4<44V\\$,5O5OQVQ]^^`fgggr   c                r    |                      | j                            |j                  |j                  S r)   )r&   r'   create_unsplatr   r>   )r   rO   s     r   unsplatzTritonSemantic.unsplatn  s*    {{4<66u|DDekRRRr   	dst_shapecan_reorderc                    d}|D ]}||z  }|j         j        |k    rt          d          t          j        |j         j        |          }|                     | j                            |j	        ||          |          S )Nr   z:reshape() cannot change total number of elements in tensor)
rs   rM  r/   r1   r?  r   r&   r'   create_reshaper   )r   r   rZ  r[  rM  sr   s          r   reshapezTritonSemantic.reshapeq  s     	 	AQJEE:u$$YZZZuz0)<<{{4<66u|YP[\\^deeer   c                V   d |j         D             }|                    |d           |j                                        s|                     ||          S t          j        |j        j        |          }|                     | j	        
                    |j        |          |          S )Nc                6    g | ]}t          j        |          S r  r1   _unwrap_if_constexpr).0rt   s     r   
<listcomp>z.TritonSemantic.expand_dims.<locals>.<listcomp>{  s#    EEEAR,Q//EEEr   r   rD  )rD  insertrs   rT  rQ  r1   r?  r   r&   r'   create_expand_dimsr   )r   r   r*   rZ  r   s        r   expand_dimszTritonSemantic.expand_dimsz  s    EEEEE	q!!!z""$$ 	6::e9:555uz0)<<{{4<::5<NNPVWWWr   c                2   |s
J d            t          |j                  dk    sJ t          j        |j        j        |j        d         |j        d         z   g          }|                     | j                            |j	        |j	                  |          S )Nz;current implementation of `cat` always may reorder elementsr   r   )
rU  rD  r1   r?  rs   r   r&   r'   
create_catr   )r   r~   r   r[  ret_types        r   catzTritonSemantic.cat  s    YYYYYY39~~""""=39Q<#)A,3N2OPP{{4<223:szJJHUUUr   abc                   |                      ||          \  }}|j        g k    }|r,|                     |d          }|                     |d          }t          |j        d         t          j                  rt	          j        d          }nd}|j        |gz   }t	          j        |j        j        |          }| 	                    | j
                            |j        |j                  |          }|r|                     |dgd          }|S )Nr   r   Fr[  )r   rD  ri  rh   r1   rq   r?  rs   r   r&   r'   create_joinr   r_  )r   rn  ro  
was_rank_1two	new_shaperl  r   s           r   joinzTritonSemantic.join  s    ((A..1 W]
 	'  A&&A  A&&Aagbk2<00 	,q//CCCGseO	=	::kk$,2218QXFFQQ 	<,,sQCU,;;C
r   c                x   t          |j                  dk    sJ t          j        |j        d                   dk    sJ |j        d d         }t          j        |j        j        |          }| j                            |j	                  \  }}| 
                    ||          | 
                    ||          fS )Nr   rq  r   )rU  rD  r1   rc  r?  rs   r   r'   create_splitr   r&   )r   rn  rv  rl  outLHSoutRHSs         r   splitzTritonSemantic.split  s    AGq    '449999GCRCL	=	::2218<<KK))KK))
 	
r   dims
Tuple[int]c                   t          j                  t          |          k    rt          d          t          d |D                       t	          t          t          |                              k    rt          d|           t          j        j        j	        fd|D                       }| 
                    | j                            j        |          |          S )Nz5permute dims must have the same length as input shapec              3  >   K   | ]}t          j        |          V  d S r)   rb  )rd  ds     r   	<genexpr>z)TritonSemantic.permute.<locals>.<genexpr>  s-      ;;")!,,;;;;;;r   z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                *    g | ]}j         |         S r  rf  )rd  r  r   s     r   re  z*TritonSemantic.permute.<locals>.<listcomp>  s    4R4R4RU[^4R4R4Rr   )rU  rD  r/   sortedlistrC  r1   r?  rs   r   r&   r'   create_transr   )r   r   r}  rl  s    `  r   permutezTritonSemantic.permute  s    u{s4yy((TUUU;;d;;;;;tE#d))DTDT?U?UUUe_ceefff=!24R4R4R4RT4R4R4RSS{{4<44U\4HH(SSSr   c                H   |j                                         s|                     ||          S |j                                         }t	          |          t	          |          k    rt          d| d|           ||k    r|S t          |          D ];\  }}||         |k    r*|dk    r$t          d||          d| d| d| d| 
          <t          j        |j         j	        |          }| 
                    | j                            |j        |          |          S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rs   rT  rQ  get_block_shapesrU  r/   	enumerater1   r?  r   r&   r'   create_broadcastr   )r   r   rD  	src_shapeiitemr   s          r   broadcast_impl_shapez#TritonSemantic.broadcast_impl_shape  s[   z""$$ 	,::eU+++J//11	y>>SZZ''UUUeUUVVVIL ++ 	@ 	@GAtQx4DAII  "?W\]^W_ "? "?CG"? "?%&"? "?*3"? "?7<"? "? @ @ @ uz0%88{{4<88uMMvVVVr   c           	        |j         }|j         }|                                r||                                sh|                    |j                  }|                     | j                            |                    | j                  |j                  |          }n|                                s||                                rh|                    |j                  }|                     | j                            |                    | j                  |j                  |          }n8|                                r#|                                r|	                                }|	                                }t          |          t          |          k     rt          t          |          t          |                    D ]r}|                     | j                            |j        d          t          j        |j        dg|j        z                       }|j         }|	                                }snt          |          t          |          k     rt          t          |          t          |                    D ]r}|                     | j                            |j        d          t          j        |j        dg|j        z                       }|j         }|	                                }st          |          t          |          k    sJ g }t!          |          D ]\  }	}
||	         }|
dk    r|                    |           )|dk    s||
k    r|                    |
           Kt%          dt'          |	          z   dz   t'          |
          z   dz   t'          |          z             ||k    rNt          j        |j        |          }|                     | j                            |j        |          |          }||k    rNt          j        |j        |          }|                     | j                            |j        |          |          }||fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index r  r   )rs   rT  r   r   r&   r'   rV  r   r   r  rU  rC  rh  r1   r?  valuesr  appendr/   strr  )r   r~   r   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaper  leftrightr   s                r   r   z#TritonSemantic.broadcast_impl_value  s    ?? +	`V__%6%6 +	`++FM::F++dl77T\8R8RTWT^__aghhCC"" '	`v'8'8 '	`++FM::F++dl77T\8R8RTWT^__aghhCC__ #	`6??#4#4 #	`//11I//11I9~~I..s9~~s9~~>> : :A++dl&E&EcjRS&T&T&(mFMA3IYCY&Z&Z\ \C XF & 7 7 9 9II	:
 Y#i..00s9~~s9~~>> : :A++dl&E&EcjRS&T&T&(mFMA3IYCY&Z&Z\ \C XF & 7 7 9 9IIy>>S^^3333I$Y// e e4!!199$$U++++qjjetmm$$T****$ &136q66&:<@&ACFt99&MOV&WY\]bYcYc&d e e eI%%v}i@@kk$,"?"?
I"V"VX^__I%%v}i@@kk$,"?"?
I"V"VX^__Cxr   rounding_modeOptional[str]c                    |d S |dk    rt           j        j        S |dk    rt           j        j        S t	          d| d          )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNERTZr/   )r   r  s     r   _str_to_rounding_modez$TritonSemantic._str_to_rounding_mode  sU     4F""#((E!!#''r=rrrsssr   dst_tyc                8   |j         }|                                r|                    |j                  }||k    r|S |j        }|j        }|                                s|                                r|                     ||          S |j        }|j        }||k    r2t          dt          |          z   dz   t          |          z             | 	                    | j
                            |j        |                    | j
                            |          S )Nz!Cannot bitcast data-type of size z to data-type of size )rs   rT  r   r   r{   r   primitive_bitwidthr/   r  r&   r'   create_bitcastr   r   )r   r   r  src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   zTritonSemantic.bitcast  s   ?? 	;++FM::FVL]
]
 	,*"3"3"5"5 	,99UF+++00x@3x==P T2 247MMB C C C{{4<66u|V\\RVR^E_E_``bhiiir   fp_downcast_roundingc                   |j         }|j        }|j        }||k    r|S |                                r|                    |          }|                     |          }d}|                                rP|                                r<|j        |j        k     r,|t          j        j	        }nL|t          j        j	        k    rd}n4|2t          dt          |          z   dz   t          |          z             |                                s|                                rI| j        j                            d          	 
J d             | j        j        d         ||||           S |                                r|                                s*|                                r|                                s|rM|                     | j                            |j        |                    | j                  |          |          S |                                r|                                r(|                                rH|                                s4|                     |                     |t2          j                  |          S |                                o#|                                o|j        |j        k    }|rL|                     | j                            |j        |                    | j                            |          S |                                o#|                                o|j        |j        k     }	|	rL|                     | j                            |j        |                    | j                            |          S |                                r'|                                r|j        |j        k    s|j        |j        k    r|                                 o|!                                 }
|!                                rh|j"                            | j                  }|                     | j        #                    |          |j"                  }| $                    ||          S |                     | j        %                    |j        |                    | j                  |
          |          S |&                                r=|                                r(|!                                rh|j"                            | j                  }|                     | j        #                    |          |j"                  }| $                    ||          S |                                 rL|                     | j        '                    |j        |                    | j                            |          S |                     | j        (                    |j        |                    | j                            |          S |                                r|&                                r|!                                s|                                 sL|                     | j        )                    |j        |                    | j                            |          S |                     | j        *                    |j        |                    | j                            |          S |+                                r|                                r|j        }|dk    rL|                     | j        ,                    |j        |                    | j                            |          S |d	k    rj| $                    |                     |t2          j-                  |                     | j        .                    d
          t2          j-                            S |                                r`|+                                rL|                     | j        /                    |j        |                    | j                            |          S |+                                r`|+                                rL|                     | j        0                    |j        |                    | j                            |          S J d| d|             )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.)	_semanticr   r   r   zcannot cast z to )1rs   r   rT  r   r  r|   r  r   r  r  r/   r  is_fp8e4b15r'   codegen_fnsgetrX   r&   create_fp_to_fpr   r   rV   rU   rW   r   r1   rR   create_fp_trunccreate_fp_extrY   r<   r=   r   is_boolr>   r  r:  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fpr{   create_ptr_to_intrl   	get_int64create_int_to_ptrr  )r   r   r  r  r  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r   zTritonSemantic.cast  sd   ]
]
##L?? 	8++J77F  $99:NOO#!!## 	2
(>(> )
 )
 	2+j.KKK#+BDTDY-A-A%)9)>>>VZ@S#/  ":<?
OO"LNi"j!$Z"1 2 2 2 ""$$ 	y
(>(>(@(@ 	y<+//&( (/34 45g4 4 4C4<+,BCE6Sgswxxxx  	vJ$:$:$<$< 	v""$$	v)3):):)<)<	v	v ;;,,U\6<<;U;UWkllntv v v    	G););)=)= 	G  	G)3););)=)=	G99TYYubj99:FFF
 !,,.. J""$$J)J,II 	  	o;;t|;;EL&,,W[WcJdJdeegmnnn '')) J""$$J)J,II 	  	m;;t|99%,UYUaHbHbcceklll  		+:#4#4#6#6 		+#z'>>>*B[_i_xBxBx$2244QZ=O=O=Q=Q9QK!!## +[&&t|44[[!<!<R!@!@%+NN~~eR000{{4<#?#?fll[_[gNhNhju#v#v#)+ + + **,, 	s1B1B1D1D 	s!!## s[&&t|44[[!<!<R!@!@%+NN~~eR000))++ s{{4<#?#?fll[_[gNhNh#i#ikqrrr{{4<#?#?fll[_[gNhNh#i#ikqrrr  	s:#B#B#D#D 	s!!## s:+C+C+E+E s{{4<#?#?fll[_[gNhNh#i#ikqrrr{{4<#?#?fll[_[gNhNh#i#ikqrrr  	t:#4#4#6#6 	t!.H2~~{{4<#A#A%,PVP\P\]a]iPjPj#k#kmsttt1}}~~diirx&@&@$++dlNdNdefNgNgikiqBrBrsss  	q:#4#4#6#6 	q;;t|==elFLLY]YeLfLfggioppp  	n:#4#4#6#6 	n;;t|::5<VZVbIcIcddflmmm88U8888888r   c                    t           j        j        }|r[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGCVr/   r   cache_modifiercaches      r   _str_to_load_cache_modifierz*TritonSemantic._str_to_load_cache_modifier  s}    !& 	S&&),5((),5((), !Q>!Q!Q!QRRRr   c                   t           j        j        }|rs|dk    rt           j        j        }n[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nz.wbr  z.csz.wtr  r  )r   r  r   WBr  CSWTr/   r  s      r   _str_to_store_cache_modifierz+TritonSemantic._str_to_store_cache_modifier  s    !& 
	S&&),5((),5((),5((), !Q>!Q!Q!QRRRr   c                    t           j        j        }|rC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )N
evict_lastevict_firstzEviction policy r  )r   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr/   )r   eviction_policyevictions      r   _str_to_eviction_policyz&TritonSemantic._str_to_eviction_policy  se    %, 	U,..-8 M11-9 !SO!S!S!STTTr   c                    d }|rC|dk    rt           j        j        }n+|dk    rt           j        j        }nt	          d| d          |S )NzeronanzPadding option r  )r   PADDING_OPTIONPAD_ZEROPAD_NANr/   )r   padding_optionpaddings      r   _str_to_padding_optionz%TritonSemantic._str_to_padding_option  s_     	S''+45((+3 !Q>!Q!Q!QRRRr   c                   t           j        j        }|rs|dk    rt           j        j        }n[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr/   )r   
sem_optionsems      r   _str_to_semzTritonSemantic._str_to_sem  s    o- 
	PY&&o-y((o-y((o5y((o- !NJ!N!N!NOOO
r   c                    t           j        j        }|r[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Ngpuctasysr  r  )r   MEM_SYNC_SCOPEGPUCTASYSTEMr/   )r   scope_optionscopes      r   _str_to_scopezTritonSemantic._str_to_scope  s}    !% 	Ru$$)-&&)-&&)0 !PL!P!P!PQQQr   c                n   |rt          |d          s|g}d |D             }|D ]5}t          |t                    rd|cxk    rt          |          k     sn J 6t          |          dk    sJ t          |          t          t	          |                    k    s
J d            t          |          S dS )N__iter__c                T    g | ]%}t          |t          j                  r|j        n|&S r  rh   r1   rq   rO   rd  elems     r   re  z?TritonSemantic._canonicalize_boundary_check.<locals>.<listcomp>  s1    pppY]JtR\,J,JTdjjPTpppr   r   z'Duplicate dimension in `boundary_check`r  )hasattrrh   r+   rU  setr  )r   boundary_checkblock_shapedims       r   _canonicalize_boundary_checkz+TritonSemantic._canonicalize_boundary_check  s     	*>:66 2"0!1ppaopppN% L L!#s++KS0K0K0K0K3{;K;K0K0K0K0K0K0K0K~&&****~&&#c..A.A*B*BBBBDmBBB.)))rr   c	           
        ||t          d          |j        j        j        }	|	t          j        k    s
J d            |	                                r$|t          j        j        k    rt          d          |j        j        }
| 	                    ||

                                          }|                     | j                            |j        |||||          |
          S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r/   rs   
element_tyr1   rj   rY   r   r  r  r  r  r&   r'   create_tensor_pointer_loadr   )r   ptrmaskr   r  r  r  r  is_volatileelt_tyr  s              r   _load_block_pointerz"TritonSemantic._load_block_pointer  s     u0jkkk$/   "X   ==?? 	aw"*;*CCC_``` $ ::>6KbKbKdKdee {{L33CJPWY^`hjuvv  	r   c	           
        |j         j                                        s*t          d|j                                          d          ||t          d          |s|rt          d          |j                                         sT|r(|j                                         rt          d          |r(|j                                         rt          d          |j                                         r6||                     ||          \  }}||                     ||          \  }}|j         j        }	|	j        }
|
t          j	        k    }|r<t          j
        }
t          j        |
|	j                  }	|                     ||	          }||                     ||
          }|j                                         r|j                             |
          }n|
}|7|                     | j                            |j        |||          |          }nF|                     | j                            |j        |j        |r|j        nd |||          |          }|r |                     |t          j	                  }|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rs   r   r{   r/   r   rT  r   r  r1   rj   int8pointer_typeaddress_spacer   r   r&   r'   create_loadr   create_masked_load)r   r  r  r   r  r  r  r  r  ptr_tyr  r  r  r   s                 r   _load_legacyzTritonSemantic._load_legacy  s   x%%'' 	YWSX5F5F5H5HWWWXXX <E-HIII 	Yn 	Y X Y Y Y
 x  "" 	k j	**,, j !hiii k,,.. k !ijjj 8 	C 55c4@@	T !66sEBB
U " BG# 	)WF_VV-ABBF))C((C IIeV,,E 8 	X--f55FF F <++dl66sz5(T_``bhiiCC++//
DKY^Ihdhjo08+G GHNP PC  	*))C))C
r   r  r  Optional[TensorTy]r  r   r  r  r  r  r  c	           
     ^   |                      |          }	|                     |          }
|                     |          }|j                                        r:|j        j                                        r|                     ||||||	|
|          S |                     ||||||	|
|          S r)   )	r  r  r  rs   r{   r  rT  r  r$  )r   r  r  r   r  r  r  r  r  r  r  r  s               r   loadzTritonSemantic.load5  s     00@@//@@--n==8?? 	n!4!=!=!?!? 	n++CungW\^fhsttt $$S$~wPUW_almmmr   desctl.tensor_descriptor_basec                   t          |t          j                  sJ t          |j                  }t          |          |k    sJ d| dt          |                       |                     |d          }| j                            |j        || 	                    |          | 
                    |                    }|                     ||j                  S )N	expected  offsets, but got Frequire_i64)rh   r1   tensor_descriptor_baserU  r  _convert_to_ir_valuesr'   create_descriptor_loadr   r  r  r&   r?  )r   r(  offsetsr  r  ndimrt   s          r   descriptor_loadzTritonSemantic.descriptor_loadC  s    $ 9:::::4#$$7||t###%W%W%WW%W%W###,,W%,HHL//WdFfFfguFvFv040L0L_0]0]_ _{{1do...r   c                    t          |t          j                  sJ t          |j                  }t          |          |k    sJ d| dt          |                       |j        |j        k    sJ d S )Nr+  r,  )rh   r1   r/  rU  r  rD  )r   r(  rO   r2  r3  s        r   validate_store_likez"TritonSemantic.validate_store_likeN  s}    $ 9:::::4#$$7||t###%W%W%WW%W%W###{d.......r   c                   |                      |||           |                     ||j                  }|                     |d          }|                     | j                            |j        |j        |          t          j	                  S NFr-  )
r6  r   r>   r0  r&   r'   create_descriptor_storer   r1   void)r   r(  rO   r2  s       r   descriptor_storezTritonSemantic.descriptor_storeT  sx      ug666		%,,,,W%,HH{{4<??U\[bccegelmmmr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        t          j        t          j        hv s
J d            | 	                    |d          }t          j        j        }|                     | j                            ||j        |j        |          t          j                  S NUnsupported dtypeFr-  )r6  r>   r1   rk   r2   rm   rR   rP   rQ   r0  r   DESCRIPTOR_REDUCE_KINDADDr&   r'   create_descriptor_reducer   r:  r   r(  rO   r2  rN   s        r   descriptor_atomic_addz$TritonSemantic.descriptor_atomic_add[  s      ug666zbi29bj"*VXVabbbbdwbbb,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                j    t           j                                        }|j        dk    o
|j        dk    S )NcudaZ   )r   activeget_current_targetbackendarch)r   targets     r   _has_native_tmazTritonSemantic._has_native_tmab  s.    1133&(>V[B->?r   c                   |t           j        t           j        t           j        t           j        t           j        t           j        hv s
J d            |t           j        t           j        hv r|                                 sJ d            d S d S )Nr>  z-16-bit float types require native tma support)r1   rk   r2   rm   rl   rP   rQ   rL  )r   r>   s     r   $_descriptor_atomic_min_max_supportedz3TritonSemantic._descriptor_atomic_min_max_supportedf  sx    BHbi2:r{[[[[]p[[[RZ---''))ZZ+ZZZZ .-ZZr   c                >   |                      |||           |                     |j                   |                     |d          }t          j        j        }|                     | j        	                    ||j
        |j
        |          t          j                  S r8  )r6  rN  r>   r0  r   r?  MINr&   r'   rA  r   r1   r:  rB  s        r   descriptor_atomic_minz$TritonSemantic.descriptor_atomic_mink        ug66611$*===,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                >   |                      |||           |                     |j                   |                     |d          }t          j        j        }|                     | j        	                    ||j
        |j
        |          t          j                  S r8  )r6  rN  r>   r0  r   r?  MAXr&   r'   rA  r   r1   r:  rB  s        r   descriptor_atomic_maxz$TritonSemantic.descriptor_atomic_maxr  rR  r   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r=  )r6  r>   r1   rk   r2   rm   rl   r0  r   r?  ANDr&   r'   rA  r   r:  rB  s        r   descriptor_atomic_andz$TritonSemantic.descriptor_atomic_andy        ug666zbi29bhGGGGI\GGG,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r=  )r6  r>   r1   rk   r2   rm   rl   r0  r   r?  ORr&   r'   rA  r   r:  rB  s        r   descriptor_atomic_orz#TritonSemantic.descriptor_atomic_or  s      ug666zbi29bhGGGGI\GGG,,W%,HH(+{{4<@@t{TYT`bijjlnlstttr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r=  )r6  r>   r1   rk   r2   rm   rl   r0  r   r?  XORr&   r'   rA  r   r:  rB  s        r   descriptor_atomic_xorz$TritonSemantic.descriptor_atomic_xor  rY  r   c                ~   t          |t          j                  sJ |dk    s
J d            |dk    s
J d            t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     t          |j                  dk    sJ d	|j                     |j        d         d
k    sJ d|j                     |j        }d|j        z  d
z  }|j        d         |k    sJ d| d| d|j        d                      t          j        |j        |j        d         |j        d         g          }| 	                    |fd          d         }| j
                            |j        |j        ||                    | j
                            }	|                     |	|          S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got r>  zdescriptor gather of  must have at least  columns, but got Fr-  )rh   r1   r/  rU  r  rD  r>   r  r?  r0  r'   create_descriptor_gatherr   r   r&   )
r   r(  	x_offsetsy_offsetr  r  r>   min_colsrs   rt   s
             r   descriptor_gatherz TritonSemantic.descriptor_gather  s    $ 9:::::###%J###"$$$&L$$$ 4#$$)))+_TM]+_+_)))"a''')hVZVf)h)h''' 9?##q(((*\9?*\*\((( q!Q&&&(q`i`o(q(q&&&
11A5   AE  A  Ax  A  Akok{|}k~  A  A   }TZ)/!*<d>Nq>Q)RSS--xl-NNqQL11$+y?OQY[_[e[efjfr[s[stt{{1d###r   c                   t          |t          j                  sJ t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     |j        }d	|j        z  dz  }|j        d         |k    sJ d
| d| d|j        d                      | 	                    |fd          d         }| j
                            |j        |j        |j        |           |                     d t          j                  S )Nr   rb  r   r   rc  rd  re  z6descriptor scatter must have at least 8 rows, but got r>  zdescriptor scatter of rf  rg  Fr-  )rh   r1   r/  rU  r  rD  shapaer>   r  r0  r'   create_descriptor_scatterr   r&   r:  )r   r(  rO   ri  rj  r>   rk  s          r   descriptor_scatterz!TritonSemantic.descriptor_scatter  s   $ 9::::: 4#$$)))+_TM]+_+_)))"a''')hVZVf)h)h''' 9?##q(((*]9K[*]*]((( q!Q&&&(rajap(r(r&&&
11A5   BU  B  BPX  B  Blpl|}~l  B  B   --xl-NNqQ..t{EL)JZ\deee{{4)))r   c           	     Z   |t          d          |j        j                                        }|j                                        s|                     ||          }|j                                        s
J d            ||j                                        k    s(J d| d|j                                         d            |j        j        j        |j        j        k    s*J d|j        j        j         d|j        j         d            |j        j        j        }|t          j        k    s
J d            |                     ||          }| 	                    ||          }| 
                    | j                            |j        |j        |||          t          j                  S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r/   rs   r  r  rT  r  r1   rj   r  r   r&   r'   create_tensor_pointer_storer   r:  )	r   r  valr  r  r  r  r  r  s	            r   _store_block_pointerz#TritonSemantic._store_block_pointer  s    jkkk h)::<<x  "" 	>++C==Cx  ""SS$SSSSch77 
 
 
 
 
`+``9R9R9T9T```
 
 
x"-1DDDD  Gu\_\d\o\z  Gu  Gu  VY  V^  Vi  Gu  Gu  GuDDD$/   "X    ::>;WW iiV$$ {{L44SZ^]bdlmmoqovx x 	xr   c           	        |j         j                                        s*t          d|j                                          d          |rt          d          |j                                         sR|j                                         rt          d          |r(|j                                         rt          d          |j                                         r\|                     ||j                                                   }|-|                     ||j                                                   }|j         j        }|j        }|t          j
        k    r<t          j        }t          j        ||j                  }|                     ||          }|                     ||          }|E|                     | j                            |j        |j        ||          t          j                  S |j         j                                        st          d          |                     | j                            |j        |j        |j        ||          t          j                  S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  "Mask must have boolean scalar type)rs   r   r{   r/   r   rT  r  r  r  r1   rj   r  r  r   r   r&   r'   create_storer   r:  r  create_masked_store)	r   r  rs  r  r  r  r  r#  r  s	            r   _store_legacyzTritonSemantic._store_legacy  sA   x%%'' 	ZXSX5F5F5H5HXXXYYY  	F E F F F
 x  "" 	jx  "" k !ijjj j	**,, j !hiii 8 	T++C1J1J1L1LMMC00sx7P7P7R7RSS" RWWF_VV-ABBF))C((C iiV$$ <;;t|88SZQVX`aacecjkkky'')) 	CABBB{{4<;;CJ
TXT_afhpqq7$ $ 	$r   rs  c                   |                      |          }|                     |          }|j                                        s|j        j                                        rt          d          |j                                        r8|j        j                                        r| 	                    ||||||          S | 
                    ||||||          S )N"Cannot store to a constant pointer)r  r  rs   is_constr   r/   r{   r  rT  rt  ry  )	r   r  rs  r  r  r  r  r  r  s	            r   storezTritonSemantic.store  s     11.AA//@@8 	C#(/":":"<"< 	CABBB8?? 	W!4!=!=!?!? 	W,,S#t^UT\]]] %%c3neXVVVr   cmpr  r  c           	     4   |                      |          }|                     |          }|j        j        j        }|j        dvrt          d          |                     | j        	                    |j
        |j
        |j
        ||          |j                  S )N)   r>  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rs   r   r  r  r/   r&   r'   create_atomic_casr   )r   r  r~  rs  r  r  r  s          r   
atomic_caszTritonSemantic.atomic_cas  s    s##""5))X_/
(<<XYYY{{4<99#*cjRUR\^achiiknkstttr   op#Tuple[TensorTy, TensorTy, TensorTy]c                   |j         j                                        s)t          d|j                                         z             |j                                         s|j         j                                        rt          d          |j         j        j        }|t          j        u r|dk    rt          d|z   dz             |t          j	        u r|dk    rt          d|z   dz             |t          j
        t          j        fv s|j        dk     r%t          d|z   dz   t          |          z             |j                                         r^|-|                     ||j                                                   }|-|                     ||j                                                   }|                     ||j         j        j                  }|| j                            d	          }t          j        }|j                                         rW|j                             t          j                  }| j                            |                    | j                  |          }|                     ||          }|||fS )
Nz)Pointer argument of store instruction is r{  r   atomic_z does not support fp16z does not support bf16r  z does not support T)rs   r   r{   r/   r   r|  r  r1   rP   rQ   int16uint16r  r  rT  r  r  r   r'   ri   rj   r   rV  r   r&   )r   r  rs  r  r  r  mask_irmask_tys           r   atom_red_typechecking_implz)TritonSemantic.atom_red_typechecking_impl  s=   x%%'' 	`H38K\K\K^K^^___8 	C#("5">">"@"@ 	CABBBX_/
##eY^.FFGGG$$uY^.FFGGG"(BI...*2ORT2T2TY^.BBS__TUUU8 	R00sx7P7P7R7RSS//SX5N5N5P5PQQiiSX_788<l++D11GgGx  "" Z(2227;;,33GMM$,4O4OQXYY;;w00DC~r   c                    |j         j        }t          j        |d          }|                     ||          }|                     ||dz
            }|                     |t          j                  S )NF)r  signedr   )r>   r  r1   get_int_dtyper   r  r   rj   )r   rt   r  idtypeixsignbits         r   _signbitzTritonSemantic._signbit;  sa    7-!8EBBB\\!V$$))B1--yy"'***r   c                   |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                r|                                rV|                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |t          j        t          j        hvrt%          d|           |t          j        k    rt          j        nt          j        }|                     ||          }|                     |t          j        |d                    }	|t          j        k    rt          j        nt          j        }
|                     ||
          }|                     |t          j        |
d                    }|                     |          }|                     |          }|                     | j        	                    t          j        j        |	j        |j        |                     ||          j        ||          |j                  }|                     | j        	                    t          j        j        |j        |j        |                     ||          j        ||          |j                  }|                     |||          }|                     ||          S )Nr   z#atomic_max not supported for dtype r   )r  r  r  rs   r   rY   r   r&   r'   create_atomic_rmwr   	ATOMIC_OPrT  r   UMAXr1   rR   rT   rA   r2   rl   r   r  rk   rm   r  r  r   UMINwherer   r  rs  r  r  r  sca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retr   s                     r   
atomic_maxzTritonSemantic.atomic_maxB     88c4OOS$s##""5))==?? 	##%% {{L222<3CSZQTQ[]a]hjmotuuH   {{L222<3DcjRUR\^b^iknpuvvH   "*bj111J&JJKKK#rz11rxS&))S"/&!"<"<==%33"))c7++c2?7A#>#>??mmC  iinn++L**2<+;U\5<+/99T3+?+?+FUT TUZU_a a ++L**2<+<fmV]+/99T3+?+?+FUT TU[U`b b jjgw//||C(((r   c                   |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                r|                                rV|                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |t          j        t          j        hvrt%          d|           |t          j        k    rt          j        nt          j        }|                     ||          }|                     |t          j        |d                    }	|t          j        k    rt          j        nt          j        }
|                     ||
          }|                     |t          j        |
d                    }|                     |          }|                     |          }|                     | j        	                    t          j        j        |	j        |j        |                     ||          j        ||          |j                  }|                     | j        	                    t          j        j        |j        |j        |                     ||          j        ||          |j                  }|                     |||          }|                     ||          S )Nr   z#atomic_min not supported for dtype r   )r  r  r  rs   r   rY   r   r&   r'   r  r   r  rP  r   r  r1   rR   rT   rA   r2   rl   r   r  rk   rm   r  r  r   r  r  r  s                     r   
atomic_minzTritonSemantic.atomic_minh  r  r   c           
        |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                rt          j        j        nt          j        j	        }| 
                    | j                            ||j        |j        |j        ||          |j                  S )Nr   )r  r  r  rs   r   r|   r   r  FADDr@  r&   r'   r  r   )r   r  rs  r  r  r  r  r  s           r   
atomic_addzTritonSemantic.atomic_add  s    88c4OOS$s##""5))"("4"4"6"6LR\BL<L{{4<99"cj#*VZVacfhmnn8% % 	%r   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nand)r  r  r  r&   r'   r  r   r  rW  r   rs   r   r  rs  r  r  r  s         r   
atomic_andzTritonSemantic.atomic_and      88c4OOS$s##""5)){{L**2<+;SZUYU`beglmmorowy y 	yr   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nor)r  r  r  r&   r'   r  r   r  r[  r   rs   r  s         r   	atomic_orzTritonSemantic.atomic_or  s    88c4NNS$s##""5)){{L**2<?CJ
TXT_adfkllnqnvx x 	xr   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nxor)r  r  r  r&   r'   r  r   r  r^  r   rs   r  s         r   
atomic_xorzTritonSemantic.atomic_xor  r  r   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nxchg)r  r  r  r&   r'   r  r   r  XCHGr   rs   r  s         r   atomic_xchgzTritonSemantic.atomic_xchg  s    88c4PPS$s##""5)){{L**2<+<cj#*VZVacfhmnnH  	r   c                    |                                 | j        j        j        v sJ d| j        j        j         d|             |                                }|dk    rd}t          t          j        |          S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr'   r   allowed_dot_input_precisionsupperrI  r   INPUT_PRECISION)r   input_precisions     r   _str_to_dot_input_precisionz*TritonSemantic._str_to_dot_input_precision  s    $$&&$,*>*[[[[xdl.B._xxgvxx \[[)//11h&&&Or)?;;;r   accr  max_num_imprecise_acc	out_dtypec           
     b   |j                                         r|j                                         sJ |j                                        r|j                                        rn|j        t          j        t          j        t          j        t          j        t          j	        t          j
        fv sJ d|j                     |j        t          j        t          j        t          j        t          j        t          j	        t          j
        fv sJ d|j                     |j        |j        k    sJ d|j         d|j                     |j                                        s|j                                        rgd| j        j        j        v rt          j        d           |                     |t          j                  }|                     |t          j                  }|j                                        p|j                                        }|j                                        p|j                                        }|s|r|rdnd}	|	| j        j        j        v rn| j        j        j        }
t          j        |	 d	|
 d
|
 d           |                     |t          j                  }|                     |t          j                  }|| j        j        j        }|                     |          }t/          |j                  }t/          |j                  }||cxk    rdk    s,n ||cxk    rdk    sn J d|j         d|j         d            |j        d         j        |j        d         j        k    sAJ d|j         d|j         d|j        d         j         d|j        d         j         d	            | j        j                            d          	 
J d             | j        j        d         |j         |j                   }|j        d         j        |d         k    r8|j        d         j        |d         k    r|j        d         j        |d         k    s%J d|d          d|d          d|d                      |j         j                                        rL|j         j        t          j        k    s
J d            | j                            d          }t          j        }n|                                 rtC          d          |j         j        "                                s|j         j                                         r'| j        #                    d          }t          j	        }n|j         j        $                                r'| j        %                    d          }t          j
        }nJ|&                                r| j        '                    d          n| j        #                    d          }|}|j         j        d         }|j         j        d         }|j         j        d         }|dk    r|j         j        d         nd }t	          j(        ||r|||gn||g          }|4| j        )                    |*                    | j                  |          }n.|j+        }|j         j        |j        k    r|j         j,        |k    sJ |G|j                                        r+|j                                        r| j        j        j-        }nQd}nN|j                                        r5|j                                        r||k    rtC          d | d!| d          | .                    | j        /                    |j+        |j+        |||          |          S )"NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   fp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releasefp8e4b8fp8e5b16z- is AMD gfx942 specific and not supported on z^ so it's upcasted to fp16 and can cause significant slow down. Please use OCP fp8 variants on z for performancer      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r  rq  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()0rs   rT  r>   rX   r1   r  uint8rP   rQ   rR   rT   r  r'   r   !deprecated_fp8_dot_operand_dtypeswarningswarnr   
is_fp8e4b8is_fp8e5b16rJ  default_dot_input_precisionr  rU  rD  rO   r  r  r   rY   	get_int32r2   rW   r/   rU   get_fp32rS   get_fp64rV   get_fp16r?  rV  r   r   r  max_num_imprecise_acc_defaultr&   
create_dot)r   r~   r   r  r  r  r  uses_fp8e4b8uses_fp8e5b16	type_namerJ  lhs_rankrhs_rankr  r  ret_scalar_tyMNKBr   
acc_handles                         r   dotzTritonSemantic.dot  s   x  "":sx'8'8':'::::9 	p#)"2"2"4"4 	p9"(BJRZ!#!- - - -.Rsy.R.R- - -9"(BJRZ!#!- - - -.Rsy.R.R- - -9	)))+oTWT]+o+odgdm+o+o)))9  "" 	-ci&;&;&=&= 	-T\1SSS m   ))C,,C))C,,Cy++--G1E1E1G1G	--//J393H3H3J3J 	1= 	1%1A		zIDL0RRR|+0  M Mt M M6:M M MN N N iiRZ00iiRZ00""l2NO::?KKsy>>sy>>8((((q((((H,E,E,E,EA,E,E,E,E,E  HVuxu~  HV  HV  JM  JS  HV  HV  HV,E,E,Ey}"ci'   uSY  u  uQTQZ  u  u  Z]  Zc  df  Zg  Zm  u  u  _b  _h  ik  _l  _r  u  u  u  |'++ #'( ()]( ( (?t|/?#(SSy}"l1o55#)B-:MQ]^_Q`:`:`	"#|A666uauuVWuudpqrdsuu 7668?!!## 	&8?bg---/E---''**BHMM   	&z   X_$$&& 	&#(/*A*A*C*C 	&&&q))BJMMX_$$&& 	&&&q))BJMM-6->->-@-@^&&q)))dlF[F[\]F^F^B%MHN2HN2HN2!)QCHN1D}1.Hq!Qii1a&II;226<<3M3MrRRJJJ8>V\11ch6IY6V6V6V6V !(y!! *ci&6&6&8&8 *(,(<(Z%%()%%y!! hci&6&6&8&8 h=RUV=V=V !f;P!f!fbc!f!f!fggg{{L##CJ
JYnooqwy y 	yr   float_formatc                    t          t          j        |                                d           }|t	          d| d          |S )NzInvalid float format: rc   )rI  r   ScaleDotElemTypeTYr  r/   )r   r  ty_enums      r   _str_to_fp_typezTritonSemantic._str_to_fp_type  sF    "/1C1C1E1EtLL?ElEEEFFFr   c                   t           j        t           j        t           j        t           j        d                    |          }|<|dk    sJ d|             |j        t           j        k    sJ d|j                     |S |j        |k    r|S t           j        t           j        t           j        t           j        d|         }|j        |k    sJ d| d|j                     | 	                    ||          S )z
        If float_format is subbyte, make sure it's packed as uint8 and return it.
        Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
        )e5m2e4m3bf16fp16Ne2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r1   float8e5
float8e4nvrQ   rP   r  r>   r  r  r   )r   rs  r  	triton_tyunsigned_tys        r   _bitcast_to_fp_typez"TritonSemantic._bitcast_to_fp_type#  s   
  ["-Z! !!$\!2!2 	6)))+eWc+e+e)))9(((*aVYV_*a*a(((J9	!!J#%8RXryZ\ZcddeqrK9+++-d\-d-dY\Yb-d-d+++<<Y///r   	lhs_scale
lhs_format	rhs_scale
rhs_formatTensorTy | None	fast_math
lhs_k_pack
rhs_k_packc                D   |j                                         r|j                                         sJ t          |j                  }t          |j                  }||cxk    rdk    s,n ||cxk    rdk    sn J d|j         d|j         d            |j        }|j        }|                     |          }|                     |          }h d}||v sJ d|             ||v sJ d|             |d u p"t          |t          j                  o|j        d u }|d u p"t          |t          j                  o|j        d u }| 	                    ||          }| 	                    ||          }|	s|d	k    s
J d
            |
s|d	k    s
J d
            |j         j        dd          \  }}|j         j        dd          \  }}|d	k    rdnd}|d	k    rdnd}|	r||z  n|}|
r||z  n|}||k    sJ d|j         d|j         d            |dk    r|j         j        d         nd }|	s||z  }|
s||z  }t          j
        ||r|||gn||g          }| j                            d          }|4| j                            |                    | j                  |          }n.|j        }|j         j        |j        k    r|j         j        |k    sJ |rd n|j        }|rd n|j        } |                     | j                            |j        | ||j        ||||	|
|
  
        |          S )Nr   r  r  r  r  >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  zBonly mxfp4 inputs can be packed along a dimension different than Kr  r   zCReduction dimension should pack the same number of elements; (lhs: r   )rs   rT  rU  rD  rO   r  rh   r1   rq   r  r?  r'   r  rV  r   r   r  r&   create_dot_scaled)!r   r~   r  r  r   r  r  r  r  r   r  r  r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  K_LHSK_RHSr  PACKED_APACKED_BPACKED_A_DIMPACKED_B_DIMr  r   r  r  rhs_scale_handlelhs_scale_handles!                                    r   
dot_scaledzTritonSemantic.dot_scaled5  s#    x  "":sx'8'8':'::::sy>>sy>>8((((q((((H,E,E,E,EA,E,E,E,E,E  HVuxu~  HV  HV  JM  JS  HV  HV  HV,E,E,E$*
$*
..z::..z::BBB_,,,.M.M.M,,,_,,,.M.M.M,,,%-r*Y2U2U2qZcZimqZq%-r*Y2U2U2qZcZimqZq&&sJ77&&sJ77wZ61113w111wZ61113w1118>"##&58>"##&q"f,,11!"f,,11!+5@x%''5+5@x%''5|+++  .Tsvs|  .T  .T  HK  HQ  .T  .T  .T+++!)QCHN1D 	HA 	HAyq*D1a))q!fEE\""1%%;226<<3M3MrRRJJJ8>V\11ch6IY6V6V6V6V#4J44):J#4J44):J{{L**3:7GZ]Zdfv+:IzS]_ik klrt t 	tr   	conditionc                0   |j         t          j        k    rt          j        d|j                     |                     |t          j                  }|                     ||dd          \  }}|j                                        r3| 	                    ||          \  }}| 	                    ||          \  }}n| 	                    ||          \  }}|j        }| 
                    | j                            |j        |j        |j                  |          S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r>   r1   rj   r  r  r   r   rs   rT  r   r&   r'   create_selectr   )r   r  rt   r   r  r   s         r   r  zTritonSemantic.wherei  s   ?bg%%M L  {D  {J  L  L   IIi11	00AtTBB1>""$$ 	C44YBBLIq,,Q22DAqq44YBBLIq{{4<55i6FRSRZ[[]cdddr   c                b    |rt          j        ||          }n|}|                     ||          S r)   )r1   r?  r&   )r   rt   rZ   r  res_tys        r   wrap_tensorzTritonSemantic.wrap_tensor}  s8     	]9i88FF F{{1f%%%r   inputsSequence[TensorTy]Tuple[TensorTy, ...]c                2    t           fdD                       dd         j        j        t                    }|k     sJ d| d            fdt	                    D             t          fdD                       s
J d             j                            d D                        |                                           sJ t           fd	t          t                              D                       S )
Nc              3  ^   K   | ]'}                     ||j        j        gd           V  (dS )Trr  N)r_  rM  rO   )rd  tr   s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>  s<      ^^RS4<<AGM?<MM^^^^^^r   r   z&reduction axis must be < inputs rank (r  c                &    g | ]\  }}|k    |S r  r  )rd  r  r^  r*   s      r   re  z,TritonSemantic.reduction.<locals>.<listcomp>  s"    AAA41aqDyyQyyyr   c              3  8   K   | ]}|j         j        k    V  d S r)   )rs   rD  )rd  r  rD  s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>  s,      99Q16<5(999999r   z-all reduction inputs must have the same shapec                    g | ]	}|j         
S r  r   rd  r  s     r   re  z,TritonSemantic.reduction.<locals>.<listcomp>  s    /I/I/IQ/I/I/Ir   c              3     K   | ]>}                                         |          |         j        j                  V  ?d S r)   r  
get_resultrs   r   )rd  r  r  	reduce_opr  r   s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>  se       u u\]DY11!44fQin6KYWWu u u u u ur   )
tuplers   rD  rU  r  allr'   create_reduceverifyrC  )r   r  r*   region_builder_fnrankr&  r  rD  s   ```  @@@r   	reductionzTritonSemantic.reduction  su   <^^^^W]^^^^^FDq	$5zzd{{{LTLLL{{{AAAA9U#3#3AAA	9999&99999jj;jjjjL../I/I&/I/I/I4PP	)$$$!!!!! u u u u u u uafgjkqgrgrasasu u u u u 	ur   reversec                    d         j         j        t                    }| |cxk    r|k     sn J d| d| d            |dk     r||z  }D ]}|j         j        k    s
J d             j                            d D             ||           |                                           sJ t           fdt          t                              D                       S )Nr   z
scan axis z must be < inputs rank (r  z(all scan inputs must have the same shapec                    g | ]	}|j         
S r  r!  r"  s     r   re  z3TritonSemantic.associative_scan.<locals>.<listcomp>  s    +E+E+EAH+E+E+Er   c              3     K   | ]>}                                         |          |         j        j                  V  ?d S r)   r$  )rd  r  r  scan_opr   rD  s     r   r  z2TritonSemantic.associative_scan.<locals>.<genexpr>  sN      ww_`T%%g&8&8&;&;VAY^=RTYZZwwwwwwr   )rs   rD  rU  r'   create_scanr*  r'  rC  )	r   r  r*   r+  r.  r,  r  r2  rD  s	   ``     @@r   associative_scanzTritonSemantic.associative_scan  s5   q	$5zzu####t#####%W$%W%WPT%W%W%W###!88DLD 	U 	UA6<5(((*T((((,**+E+Ef+E+E+EtWUU'"""~~wwwwwwwdijmntjujudvdvwwwwwwr   srcindexc                \   |j                                         s
J d            t          |j        j                  }t          |j        j                  |k    s
J d            | |cxk    r|k     sn J d| d| d            |dk     r||z  }t          |          D ]=}||k    r	|j        j        |         |j        j        |         k    sJ d| d            >| j                            |j        |j        |          }| 	                    ||j        j
        |j        j                  S )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r  r   z
index dim z( must match the corresponding source dim)r>   rY   rU  rs   rD  rC  r'   create_gatherr   r  r   )r   r5  r6  r*   r,  r  gathers          r   r9  zTritonSemantic.gather  sO   {!!##FF%FFFF38>""5:#$$,,,.`,,,u####t#####%YD%Y%YRV%Y%Y%Y###!88DLDt 	y 	yADyy:#A&#(.*;;;;=x$=x=x=x;;;;++CJdKK9IJJJr   c                "   |sdS |^}}t          t          |                    D ]$}|                     |||                   \  }||<   %t          t          |                    D ]$}|                     |||                   \  }||<   %|g|R S )Nr  )rC  rU  r   )r   r  headtailr  s        r   broadcast_tensorsz TritonSemantic.broadcast_tensors  s     	2ts4yy!! 	E 	EA 55dDGDDMD$q''s4yy!! 	E 	EA 55dDGDDMD$q''}t}}r   Sequence[tl.tensor]result_typesSequence[tl.dtype]packTuple[tl.tensor, ...]c                >      j          t                    dk    s
J d            fd|D             } j                            d D              fd|D             |           |           t	           fdt          |          D                       S )Nr   z1map_elementwise must have at least 1 input tensorc                Z    g | ]'}d          j                             |j                  (S )r   )rs   r   r   )rd  r  r  s     r   re  z2TritonSemantic.map_elementwise.<locals>.<listcomp>  s/    YYYbq	66ryAAYYYr   c                    g | ]	}|j         
S r  r!  r"  s     r   re  z2TritonSemantic.map_elementwise.<locals>.<listcomp>  s    &&&!QX&&&r   c                D    g | ]}|                     j                  S r  )r   r'   )rd  r  r   s     r   re  z2TritonSemantic.map_elementwise.<locals>.<listcomp>  s'    ;;;RXXdl##;;;r   c              3  p   K   | ]0\  }}                                         |          |          V  1d S r)   )r&   r%  )rd  r  r  elementwise_opr   s      r   r  z1TritonSemantic.map_elementwise.<locals>.<genexpr>  sC      hhuq"T[[!:!:1!=!=rBBhhhhhhr   )r=  rU  r'   create_map_elementwiser'  r  )r   r  r?  rA  r+  rH  s   ``   @r   map_elementwisezTritonSemantic.map_elementwise  s    ''06{{Q SYYYYLYYY<<&&v&&&;;;;l;;;
 

 	.))) hhhhhPYZfPgPghhhhhhr   num_binsc                   t          |j                  dk    s
J d            |j                                        s
J d            |O|                     ||j                  }|j        j                                        st          d          |j	        }| 
                    | j                            |j	        ||          t          j        t          j        |g                    S )Nr   z histogram only supports 1D inputz%histogram only supports integer inputrv  )rU  rD  r>   rY   r  rs   r   r  r/   r   r&   r'   create_histogramr1   r?  r2   )r   r   rK  r  s       r   	histogramzTritonSemantic.histogram  s    5;1$$$&H$$${!!##LL%LLLL,,T5;??D9#++-- G !EFFF;D{{4<88xQUVV=H:>>@ @ 	@r   r  c                   t          dt          |j                            t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rU  rD  r/   r   set_attrr   	make_attrget_contextr   rt   r  s      r   multiple_ofzTritonSemantic.multiple_of  sl    q#ag,,3v;;..`aaa	+R\&!(BVBVBXBX-Y-YZZZr   c                    t          |j                  t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrU  rD  r/   r   rP  r   rQ  rR  rS  s      r   max_contiguouszTritonSemantic.max_contiguous  sa    qw<<3v;;&&cddd	/2<@T@T@V@V+W+WXXXr   c                    t          |j                  t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyrV  rS  s      r   max_constancyzTritonSemantic.max_constancy  sa    qw<<3v;;&&bccc	.",vqx?S?S?U?U*V*VWWWr   c                p    |                      | j                                        t          j                  S r)   )r&   r'   create_barrierr1   r:  )r   s    r   debug_barrierzTritonSemantic.debug_barrier  s&    {{4<6688"'BBBr   prefixargsList[TensorTy]hexc                   |                     d          s|r|dz  }|                     d          s|r|d d         dz   }t          |          dk    r|                    d          sd|z   }d |D             }d |D             }|                     | j                            ||||          t          j                  S )N r  rq  r   c                    g | ]	}|j         
S r  r!  rd  args     r   re  z/TritonSemantic.device_print.<locals>.<listcomp>  s    ///3CJ///r   c                @    g | ]}|j                                         S r  )r>   r   rd  s     r   re  z/TritonSemantic.device_print.<locals>.<listcomp>  s&    ???3SY,,..???r   )endswithrU  
startswithr&   r'   create_printr1   r:  )r   r]  r^  r`  new_args	is_signeds         r   device_printzTritonSemantic.device_print  s     s## 	 	cMFt$$ 	( 	(CRC[4'Fv;;??6#4#4S#9#9?6\F//$///??$???	{{4<44VS(IVVXZX_```r   r   r   c                    | j         j        j        sd S |)|                     ||                     |                    }|                     | j                             |j        |          t          j	                  S r)   )
r'   r   debugr   r  r&   create_assertr   r1   r:  )r   r   r   r  s       r   r   zTritonSemantic.device_assert  sd    |#) 	F88D$))D//22D{{4<55dk3GGQQQr   c                |    |                      | j                            |j                  t          j                  S r)   )r&   r'   create_assumer   r1   r:  )r   r   s     r   assumezTritonSemantic.assume  s*    {{4<55dkBBBGLLLr   c                   t          |t                    rt          j        |          }t          |t          j                  rt          |j        t
                    r| j                            |j                  S |rFd|j        cxk    rdk     sn J d|j         d            | j                            |j                  S d|j        cxk    rdk     sn J d|j         d            | j        	                    |j                  S t          |t          j
                  r|j        j        dk    s
J d	            |j                                        s
J d
            |j        t          j        k    rQ|rO| j                            |j        | j                                        |j                                                  S |j        t          j        k    r|s
J d            |j        S J dt'          |                       )Nra   rb   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the ranger_   r`   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rh   r+   r1   rq   rO   rH   r'   ri   r  r  r&   rM  r>   rY   rl   r   r   get_int64_tyr   rs   )r   r
  r.  s      r   _convert_elem_to_ir_valuez(TritonSemantic._convert_elem_to_ir_value  s3   dC   	&<%%DdBL)) 	$*d++ 9|,,TZ888 :3333e33333 6J#':6J 6J 6J333|--dj9993333e33333 6J#':6J 6J 6J333|--dj999bi(( 		:#q(((*V(((:$$&&bb(bbbbzRX%%+%|33DKAZAZA\A\48J4L4L4N4NP P Prx'''W W W W W;XXDQUJJXXXXXr   c                r     t          |d          r fd|D             S                      |          gS )Nr  c                <    g | ]}                     |          S r  )ru  )rd  r
  r.  r   s     r   re  z8TritonSemantic._convert_to_ir_values.<locals>.<listcomp>;  s)    \\\$D224EE\\\r   )r  ru  )r   	list_liker.  s   ` `r   r0  z$TritonSemantic._convert_to_ir_values9  sN    9j)) 	]\\\\\R[\\\\..y+FFGGr   basec           	     .   |                      |          }|                      |          }|                      |d          }|j                                        r|j        j                                        rt          d          |j        j        t          j        k    r=|                     |t          j	        t          j
        |j        j                            }t          d          sgd D             t          d D                       s
J d            t          |d          s|g}d |D             }t          |          t          t!          t#          |                              k    s
J d	            t          fd
||||fD                       s
J d            | j                            |j        ||||          }|                     |t          j	        t          j        |j        j                                      S )NFr-  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                T    g | ]%}t          |t          j                  r|j        n|&S r  r  r	  s     r   re  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>P  s0    fffRVZbl%C%CMtzzfffr   c              3  `   K   | ])}t          |t                    od |cxk    odk     nc V  *dS )r_   r`   N)rh   r+   r	  s     r   r  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>Q  sM      \\:dC((CVt-C-C-C-Ce-C-C-C-C\\\\\\r   zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                T    g | ]%}t          |t          j                  r|j        n|&S r  r  r	  s     r   re  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>W  s/    ZZZDz$==G4ZZZr   z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  X   K   | ]$}t                    t          |          k    V  %d S r)   )rU  )rd  rx  r  s     r   r  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>[  s6      hh)3{##s9~~5hhhhhhr   zBExpected shape/strides/offsets/block_shape to have the same length)r0  rs   r{   r  rT  r/   r1   rj   r   r  r  r   r  r(  r  r  rC  rU  r'   create_make_block_ptrr   r&   r?  )r   ry  rD  stridesr2  r  orderr   s        `  r   make_block_ptrzTritonSemantic.make_block_ptr>  s2    **511,,W55,,W%,HH y!! 	nTY%9%B%B%D%D 	nlmmm 927**99T2?27DI<S#T#TUUD {J// 	(&-KffZefff\\P[\\\\\ 	V 	VU	V 	V 	V uj)) 	GEZZTYZZZe}}U3u::%6%6 7 77779w777 hhhhwX_afGghhhhh 	Q 	QP	Q 	Q 	Q 33DKQXZeglmm{{62?2=AUWb3c3c#d#deeer   c                    |                      |d          }|                     | j                            |j        |          |j                  S r8  )r0  r&   r'   create_advancer   rs   )r   ry  r2  s      r   advancezTritonSemantic.advanced  sF    ,,W%,HH {{4<66t{GLLdiXXXr   r  r  r  List[tl.constexpr]tl.tensor_descriptorc           	         t          |          }d|cxk    rdk    sn t          d| d          t          |          |k    r"t          d| dt          |                     t          |          |k    r"t          d| dt          |                     t          |j        t          j                  sJ |j        j        j        d	z  }t	          j        |d
                   }||z  dk     rt          d| d| d||z   d          t	          j        |d
                   }	|	dk    rt          d|	            fd|D             } fd|D             }t	          j	        |          }t          |j
        t          j                  sJ t	          j        |j
        j        |          }
|j        }|j
        j                                        }                     |          }|j
        j                                        r$|t           j        j        k    rt          d           j                            |d |D             d |D             |||          }t	          j        ||||
          S )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got re  rq  r  zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got c                P    g | ]"}                     |t          j                  #S r  )rN  r1   r2   rd  rt   r   s     r   re  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>  s+    >>>1!!!RX..>>>r   c                t    g | ]4}                     t          j        |          t          j                  5S r  )rN  r1   rc  rl   r  s     r   re  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>  s6    [[[a4##B$;A$>$>II[[[r   z8Padding option `nan` is not supported for integer blocksc                    g | ]	}|j         
S r  r!  rd  r^  s     r   re  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>  s    IbIbIbWX!(IbIbIbr   c                    g | ]	}|j         
S r  r!  r  s     r   re  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>  s    <W<W<W!QX<W<W<Wr   )rU  r/   rh   r>   r1   r  r  r  rc  _unwrap_shapers   r?  r   r   r  rY   r   r  r  r'   create_make_tensor_descriptortensor_descriptor)r   ry  rD  r  r  r  r3  	elem_sizecontig_dim_sizelast_striders   base_handleis_signed_intr  r   s   `              r   make_tensor_descriptorz%TritonSemantic.make_tensor_descriptork  s   5zzTQQQQQRRRw<<4NNNGNNOOO{t##eTeeWZ[bWcWceefff$*bo66666J)<A	1+b/BBY&++ ket  k  k  zC  k  k  HW  Zc  Hc  k  k  k   -gbk::!Z[ZZ[[[>>>>>>>[[[[SZ[[[ &{33$)R_55555}TY1;??k	,::<<--n==9&&(( 	YW8I8Q-Q-QWXXX;;KIbIb\aIbIbIb<W<Ww<W<W<WYdfs<CE E #FE7DAAAr   )r*   r+   r,   r   )r8   r9   r:   r9   r,   r9   )r8   r9   rG   rH   r:   r9   rI   rH   rJ   rH   r,   r9   )T)r]   rH   )r   r9   r   r9   rx   rH   r,   ry   )FFTF)r~   r   r   r   r,   r   )r~   r   r   r   r   r   )r   r   r   r   r   rH   r,   r   )r   r   r   r   r,   r   )r   r   r   r   r   rH   r,   r   )rt   r   r   r   r   r   )rt   r   r   r   r   r   r   r   )r   r   r   r   r,   r   )r   r   r   r   r,   r   )r   r   )r   r   r,   r   )r  r   r,   r  )r;  r+   r<  r+   r   r  r,   r   )r>   r9   r,   r   )rD  rO  r>   r9   r,   r   )rO   r   rD  rO  r,   r   )rO   r   r,   r   )r   r   rZ  rO  r[  rH   r,   r   )r   r   r*   r+   r,   r   )r~   r   r   r   r[  rH   r,   r   )rn  r   ro  r   r,   r   )rn  r   r,   r   )r   r   r}  r~  r,   r   )r   r   rD  r~  r,   r   )r~   r   r   r   r,   r   )r  r  )r   r   r  r9   r,   r   r)   )r   r   r  r9   r  r  r,   r   )r  r   r  r%  r   r%  r  r   r  r  r  r  r  r  r  rH   r,   r   )r(  r)  r  r  r  r  r,   r   )r(  r)  rO   r   r,   ry   )r(  r)  rO   r   r,   r   )r  r  r  r  r,   r   )r  r   rs  r   r  r%  r  r  r  r  r,   r   )r  r   r~  r   rs  r   r  r  r  r  r,   r   )
r  r   rs  r   r  r   r  r  r,   r  )rt   r   r,   r   )r  r   rs  r   r  r   r  r  r  r  r,   r   )r~   r   r   r   r  r   r  r  r  r+   r  r9   r,   r   )r  r  )rs  r   r  r  )r~   r   r  r   r  r  r   r   r  r%  r  r  r  r  r  rH   r   rH   r  rH   r  r9   r,   r   )r  r   rt   r   r   r   r,   r   )r  r  r*   r+   r,   r  )r  r  r*   r+   r.  rH   r,   r  )r5  r   r6  r   r*   r+   r,   r   )r  r>  r?  r@  rA  r+   r,   rB  )r   r   rK  r+   r  r%  r,   r   )rt   r   r  rO  r,   r   )r,   r   )r]  r  r^  r_  r`  rH   r,   r   )r   r   r   r  r  r%  r,   r   )ry  r   r,   r   )r  )ry  r   rD  r_  r  r_  r  r  r  r  r,   r  )|r    r!   r"   r1   r&   __annotations__langr   r4   r7   rF   r\   rr   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r   r  r  r#  r   r-  r   r5  r:  rF  rn   rN  rR  rQ  rY  r_  ri  rm  rw  r|  r  r  r   r  r   r   r  r  r  r  r  r  r  r  r$  r'  r4  r6  r;  rC  rL  rN  rQ  rU  rX  r\  r_  rl  rp  rt  ry  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-  r4  r9  r=  rJ  rN  rT  rW  rY  r\  rl  r   rr  ru  r0  r  r  r  r  r   r   r%   r%      s0
        YF&&&&D  O O O O
Q Q Q QD D D D05 05 05 05d# # # # #R	@ 	@ 	@ 	@ ae05# # # # #J, , , ,$> > > >>8 8 8 8"8 8 8 8] ] ] ]2> > > >, , , ,8 8 8 8.9 9 9 9"9 9 9 9"	a 	a 	a 	a   \ \ \ \[ [ [ [\ \ \ \' ' ' '& & & &" " " "
] ] ] ]] ] ] ]\ \ \ \   ) ) ) )$ $ $ $/ / / /8 8 8 88 8 8 88 8 8 88 8 8 8	8 	8 	8 	8	8 	8 	8 	8 GK Z Z Z Z Z Z$	) 	) 	) 	)2 2 2 2A A A Ah h h hS S S Sf f f fX X X XV V V V   0

 

 

 

T T T TW W W W 2 2 2 2pt t t tj j j j$i9 i9 i9 i9 i9^    	 	 		 	 	    
 
 
  ,: : :xn n n n	/ 	/ 	/ 	// / / /n n n nu u u u@ @ @[ [ [
u u u uu u u uu u u uu u u uu u u u$ $ $ $0* * * **x x x8*$ *$ *$XW W W W(u u u u   8+ + + +$) $) $) $)L$) $) $) $)L% % % %y y y yx x x xy y y y   < < <[y [y [y [yz   0 0 0 0$.t .t .t .the e e e(& & &u u u u,x x x x.K K K K,  i i i i*	@ 	@ 	@ 	@         C C C Ca a a aR R R RM M M MY Y Y4H H H H
$f $f $f $fLY Y Y Y W](B (B (B (B (B (B (Br   r%   )
__future__r   r  typingr   r   r   r   r   r	   r
   r   triton.runtimer   _C.libtritonr   ra  r   r1   r   r   	Exceptionr   r%   r  r   r   <module>r     s0   " " " " " "  J J J J J J J J J J J J J J J J J J  ! ! ! ! ! !            GCLL7:F F F F F	 F F FzB zB zB zB zBWX& zB zB zB zB zBr   