
    Ni                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZeez   Zg dZed	gz   Zeez   Zed	gz   Zd
dgZdgez   dgz   ez   d	gz   Z e ee          h dz
            Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d3d ee         fd!Z2d4d"ej3        d#eeej4        f         fd$Z5d"e6d#ej7        fd%Z8d#e6fd&Z9d' Z:d5d)Z;d5d*Z<ej=        >                     e;              e<            +          Z?d,e@d-e@fd.ZAd/eej4        ejB        jC        j        f         d#ej4        fd0ZDd4d1eee6                  fd2ZEdS )6    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                  J    t           j                            dd          dk    S )NTRITON_INTERPRET01)osenvironget     p/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/_internal_testing.pyis_interpreterr%      s    :>>,c22c99r#   c                  r    t                      rd S t          j        j        j                                        S N)r%   tritonruntimedriveractiveget_current_targetr"   r#   r$   r,   r,      s/     t> '::<<<r#   c                  <    t                      } | dn
| j        dk    S )NFcudar,   backendtargets    r$   is_cudar3   $   s"    !!FN55&(@@r#   c                  n    t                      o't          j                                        d         dk    S )Nr      r3   torchr.   get_device_capabilityr"   r#   r$   is_ampere_or_newerr9   )   )    99C99;;A>!CCr#   c                  n    t                      o't          j                                        d         dk    S )Nr   
   r6   r"   r#   r$   is_blackwellr=   -   s)    99D99;;A>"DDr#   c                  n    t                      o't          j                                        d         dk    S Nr   	   r6   r"   r#   r$   is_hopper_or_newerrA   1   r:   r#   c                  n    t                      o't          j                                        d         dk    S r?   r6   r"   r#   r$   	is_hopperrC   5   r:   r#   c                  <    t                      } | dn
| j        dk    S )NFhipr/   r1   s    r$   is_hiprF   9   "    !!FN55%(??r#   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )NrE   gfx90ar,   r0   archr1   s    r$   is_hip_cdna2rL   >   0    !!FU&.E"9UfkX>UUr#   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )NrE   gfx942rJ   r1   s    r$   is_hip_cdna3rP   C   rM   r#   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )NrE   gfx950rJ   r1   s    r$   is_hip_cdna4rS   H   rM   r#   c                  N    t                      } | d uo| j        dk    od| j        v S )NrE   gfx11rJ   r1   s    r$   is_hip_gfx11rV   M   0    !!FT&.E"9Tg>TTr#   c                  N    t                      } | d uo| j        dk    od| j        v S )NrE   gfx12rJ   r1   s    r$   is_hip_gfx12rZ   R   rW   r#   c                  V    t                      pt                      pt                      S r'   )rL   rP   rS   r"   r#   r$   is_hip_cdnar\   W   s    >>=\^^=|~~=r#   c                  &    t                      rdndS )Ni  i   )rS   r"   r#   r$   get_hip_lds_sizer^   [   s    !^^.66.r#   c                  <    t                      } | dn
| j        dk    S )NFxpur/   r1   s    r$   is_xpura   _   rG   r#   c                  N    t                      } | dnt          | j                  S )N )r,   strrK   r1   s    r$   get_archre   d   s%    !!F22S%5%55r#   rsc                    t          | t                    r| f} |t          d          }|t          t          z   v rt          j        t          t
          |                    }||j        nt          ||j                  }||j	        nt          ||j	                  }t          t
          |          }|
                    ||| |          }d||dk    <   |S |r)d|v r%|
                    dd	| t
          j                  }|S |t          v r*|                    dd|                               |          S |d
k    re|                    dd|                               d                              d          t          j        d          z                      d          S |dv r|                    dd|           dk    S t#          d|           )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrf   lowhighrv   rj   xs           r$   numpy_randomr   i   s   
 % 		zb!!!J,,,Y//00;eiiCUY,?,? Luyyc$	.B.BI&&JJsD%uJ55!q&		 
9x9,,JJr2uBGJ44	l	"	"yyAu%%,,Y777	j	 	 		!Q&&--i88==hGG")T^J_J__eefoppp	/	/	/yyAu%%++7I77888r#   r   returnc                 "   | j         j        }|t          v rt|                    d          }|                     t          t          |                    }t          t          j	        ||          t          t          |                    S |r;d|v r7t          t          j	        | |          t          t          |                    S |dk    r.|dk    r(t          j	        | |                                          S t          j	        | |          S )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerl   r   r   )rj   namert   lstripr}   rw   ru   r
   r7   tensortlr   )r   r   dst_typetsigned_type_namex_signeds         r$   	to_tritonr      s     	
AK88C==88GB(899::5<@@@'"a..QQQ 	VH,,u|Af===wr8?T?TUUU	>>h*44<&111::<<<|Af----r#   c                 B    t          j        t          |          d           S r'   )r   	str_to_tyr   r   s    r$   str_to_triton_dtyper      s    <215t<<<r#   c                 .   t          | t          j        j                  r| j        S t          | t
          j                  r7t          j        dt          |                     }|	                    d          S t          dt          |                      )Nz^torch\.(\w+)$rk   znot a triton or torch dtype: )rq   r(   languagerj   r   r7   rematchrd   group	TypeErrortype)rj   ms     r$   torch_dtype_namer      s|    %.// Gz	E5;	'	' GH&E

33wwqzzEUEEFFFr#   c                 ,   t          | t                    rc| j                                                                                            t          t          t          | j	                                      S t          | t          j                  rq| j	        t          j        u r8|                                                                                                 S |                                                                 S t          d|            )Nz Not a triton-compatible tensor: )rq   r	   basecpunumpyr}   rw   ru   r   rj   r7   Tensorr   float
ValueErrorr   s    r$   to_numpyr      s    !]## Avzz||!!##**727G7P7P+Q+QRRR	Au|	$	$ A7en$$5577==??((***uuww}}?A??@@@r#   Fc                 z   t                      rdS t                      sdS t          j        j        j        }| rdnd}t          t          t          |	                    d                              }t          |          dk    s
J |            t          j                                        d         dk    o||k    S )	NTF)   r   )r      .   r   r@   )r%   r3   r   nvidiaptxasversiontuplemaprr   splitlenr7   r.   r8   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tuples       r$   supports_tmar      s     t99 u<%-L",9ww's3(:(:3(?(?@@AA!""a''');''':++--a0A5`:LP`:``r#   c                     | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r"   )r   s    r$   tma_skip_msgr      s     ^ff]]r#   )reasonsizealignc                 D    t          j        | t           j        d          S )Nr.   )rj   r   )r7   emptyr   )r   r   _s      r$   default_alloc_fnr      s    ;t5:f====r#   r   c                 \    t          | t          j        j        j                  r| j        S | S r'   )rq   r(   r)   jitr	   r   )r   s    r$   unwrap_tensorr      s(    !V^'566 vHr#   skipped_attrc                      ddl m  t                       t          j                     fdj                                        D             g j        fd}fd}||fS )Nr   r   c                 d    i | ],\  }}t          |j                  r|j        k    %|v)||-S r"   )rq   
base_knobs).0r   knobsetr   r   s      r$   
<dictcomp>z%_fresh_knobs_impl.<locals>.<dictcomp>   sZ       D'gu/00 6=@P5P5PUYamUmUm 	gUmUmUmr#   c                                                      D ]\  } }t          | |                                                                           |j                                        D ]L}|j        t          j        v r	                    |j        d           2
                    |j                   Md_        S )NF)raisingT)itemssetattrcopyresetknob_descriptorsvalueskeyr   r    delenvappendpropagate_env)r   r   knobenv_to_unsetr   	knobs_mapmonkeypatchs      r$   fresh_functionz)_fresh_knobs_impl.<locals>.fresh_function   s    &__.. 	2 	2MD'E4!5!5!7!788807799 2 28rz))&&tx&???? ''1111	2
 #r#   c                                                       D ]\  } }t          | |                                            D ]}|t          j        v rt          j        |= _        d S r'   )r   r   undor   r    r   )r   r   kr   r   r   r   prev_propagate_envs      r$   reset_functionz)_fresh_knobs_impl.<locals>.reset_function   s{    &__.. 	* 	*MD'E4)))) 	 	" 	"ABJJqM0r#   )r(   r   setpytestMonkeyPatch__dict__r   r   )r   r   r   r   r   r   r   r   s   `  @@@@@r$   _fresh_knobs_implr      s    uu$&&K    "^1133  I L,
 
 
 
 
 
 
 
	1 	1 	1 	1 	1 	1 	1 	1 	1 >))r#   )NNNr'   )F)Fr   r   r   ru   r7   r(   triton.languager   r   r   typingr   r   r   r   numpy.randomr   triton.runtime.jitr	   r
   r   rs   rt   integral_dtypesr{   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedr   
tma_dtypesr%   r,   r3   r9   r=   rA   rC   rF   rL   rP   rS   rV   rZ   r\   r^   ra   re   r   ndarrayr   r   rd   rj   r   r   r   r   r   markskipifrequires_tmarr   r   r)   r   r   r   r"   r#   r$   <module>r      s
   				 				                   ' ' ' ' ' ' ' ' ' '  $ $ $ $ $ $ U U U U U U U U U U000
555{*000)ZL8 	<	', &6 x*$y0<?:,NVCC,--0N0N0NNOO
: : := = =A A A
D D DE E ED D DD D D@ @ @
V V V
V V V
V V V
U U U
U U U
> > >/ / /@ @ @
6 6 6
9 9x'< 9 9 9 9<. . .u]EL=X7Y . . . .&=3 =28 = = = =Gs G G G GA A A	a 	a 	a 	a^ ^ ^ ^ {!!llnn"4\\^^!LL>3 >s > > > >U5<);)IIJ u|    +* +*HSX$6 +* +* +* +* +* +*r#   