
    Pi^              	         d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
l m!Z! ddl"m"Z" ddl m#Z# d	dl$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* d dl+m,Z, dZ-dZ. ed          Z/ G d dej0                  Z1dGdZ2 G d d          Z3i Z4g Z5d Z6dHdZ7 G d dee/                   Z8d  Z9d! Z:d" Z; G d# d$          Z<e G d% d&                      Z=d' Z> G d( d)e<e8e/                   Z?edId,            Z@edddddddd-dJd8            Z@	 dKdddddddd-dLd;Z@ G d< d=          ZA G d> d?          ZBd@ ZCdA ZD G dB dCe<          ZE G dD dEe<          ZFdF ZGdS )M    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtype)get_cache_key)get_cache_invalidating_env_varsztriton.languagez"triton.experimental.gluon.languageTc                       e Zd ZdZd fdZed             Zd Zd Zdd	Z	d
 Z
d Zd Zd Zd Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                "   t                                                       || _        t          j        |                    d                    | _        || _        || _        h d| _	        t          t          ddh| _        i | _        d| _        d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr5   r:   r;   src	__class__s        j/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/runtime/jit.pyr4   zDependenciesFinder.__init__.   s    	nSZZ%8%899 "*
 *
 *
& 	"
" TV*/'''    c                4    | j                                         S N)r9   	hexdigestrB   s    rE   retzDependenciesFinder.retY   s    {$$&&&rF   c                    t          j        |j                  rdS t          |dd          }|                    t
                    S )NT
__module__ )inspect	isbuiltinfuncr/   
startswithr>   )rB   noderQ   modules       rE   _is_triton_builtinz%DependenciesFinder._is_triton_builtin]   sA    TY'' 	4|R00  ///rF   c                0   t          |t                    sJ | j                                        |j                                        z  D ]V}|\  }}| j        |         \  }}|j        |         \  }}||k    r)t	          d| d| d| j         d|j         d| d          W| j                            |j                   |j        }|t          t          |dd                    z  }| j                            |                    d	                     d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr&   )r0   JITCallabler@   keysRuntimeErrorr5   __name__update	cache_keystrr/   r9   r8   )rB   rQ   kvar_name_v1v2func_keys           rE   _update_hashzDependenciesFinder._update_hashc   s_   $,,,,, &++--0E0J0J0L0LL 	 	AKHa)!,EB)!,EBRxx" Px  P  PB  P  PPTPY  P  Pnrn{  P  P  UW  P  P  P    	$$T%:;;;>Cj%889998??73344444rF   Nc                   ddl m} |t          |          t          u rd S t	          |dd          rd S t	          |dd          dk    rd S t          |t                    r|                     |           d S t          |          r7t          |t                    s"t          ||          st          d|           | j
        rd S |-t          j        |          |f| j        |t          |          f<   d S )	Nr   	constexpr__triton_builtin__FrM   rN   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.coreri   typer   r/   r0   rY   rf   callabler[   rA   r1   deepcopyr@   id)rB   valvar_dictr5   ri   s        rE   record_referencez#DependenciesFinder.record_referenceu   s"   ------ ;$s))z11F3,e44 	F 3b))-NNNFc;'' 	c"""FC== 	JC!6!6 	Jz#y?Y?Y 	JH3HHIII * 	F;?=;M;Mx:XD!4H"67rF   c                     t          |j                  t          j        u r|j        S |j         j        v rd S  fd} ||j                  \  }}|j         j        v r|S                      |||j                   |S )Nc                    j                             | d           }|	|j         fS j                            | d           }|	|j        fS dS )NNN)r:   getr;   )r5   rp   rB   s     rE   name_lookupz2DependenciesFinder.visit_Name.<locals>.name_lookup   sZ    ,""4..CDL((.$$T400CDN**:rF   )rl   ctxastStorero   local_namesr<   rr   )rB   rS   rw   rp   rq   s   `    rE   
visit_NamezDependenciesFinder.visit_Name   s    >>SY&&7N7d&&&4	 	 	 	 	 $DG,,X7d444Jc8TW555
rF   c                *      fd|j         D             S )Nc                :    g | ]}                     |          S  )visit).0eltrB   s     rE   
<listcomp>z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>   s#    555C

3555rF   )eltsrB   rS   s   ` rE   visit_TuplezDependenciesFinder.visit_Tuple   s!     6555495555rF   c                f   |                      |j                  }t          |t          j                  r4|                      |j                  }t          |t          j                  4t          |dd          }|	|| j        v rd S t          ||j                  }|                     |           |S )Nr\   rN   )	r   valuer0   ry   	Attributer/   r?   attrrr   )rB   rS   lhslhs_namerK   s        rE   visit_Attributez"DependenciesFinder.visit_Attribute   s    jj$$cm,, 	(**SY''C cm,, 	(3
B//;(d&<<<4c49%%c"""
rF   c                f    d |j         j         D             | _        |                     |           d S )Nc                    h | ]	}|j         
S r   arg)r   r   s     rE   	<setcomp>z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>   s    >>>CG>>>rF   )argsr{   generic_visitr   s     rE   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s6    >>ty~>>>4     rF   c                .     fd}t          j        |j        |j        |j        r|j        gng |j                  D ]}                     |            ||j                   |j                             |j                    ||j	                   d S )Nc                    	 j         rJ d_         | D ]}|                    |           	 d_         d S # d_         w xY w)NTF)rA   r   )defaultsexprrB   s     rE   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sp    8::::26/$ ) )D'

4((() 38///%/7777s	   ,9 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsr   kw_defaultskwargr   )rB   rS   r   r   s   `   rE   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 	8 	8 	8 	8 ?4#3TYQUQ\@dbdfjfuvv 	 	CJJsOOOOt'(((:!JJtz"""t}%%%%%rF   c                    |                      |          }t          |t                    r| xj        t	          |          z  c_        d S | j                            |           d S rH   )r   r0   r+   r{   setadd)rB   rS   targets      rE   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sd     D!!fd## 	)F+  (((((rF   c                    t          |j                  dk    rt          d          |                     |j        d                    |                     |           d S )Nr   z2Simultaneous multiple assignment is not supported.r   )r(   targets	TypeErrorr   r   r   s     rE   visit_AssignzDependenciesFinder.visit_Assign   s^    t|!!
 PQQQT\!_--- 	4     rF   c                d    |                      |j                   |                     |           d S rH   r   r   r   r   s     rE   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   4    T[))) 	4     rF   c                d    |                      |j                   |                     |           d S rH   r   r   s     rE   	visit_ForzDependenciesFinder.visit_For   r   rF   )r#   r$   ru   )r\   rM   __qualname____doc__r4   propertyrK   rU   rf   rr   r|   r   r   r   r   r   r   r   r   __classcell__rD   s   @rE   r"   r"   "   s       	 	)0 )0 )0 )0 )0 )0V ' ' X'0 0 05 5 5$       D  06 6 6
	 	 	! ! !
& & &@) ) )! ! !! ! !! ! ! ! ! ! !rF   r"   r#   r_   c                   dd l mc m} t          | t                    r|                                 } |                     d          rH|                     d          } t          |           } |                     d          sJ d| dd          z   S | 	                    d          rdt          | d d                   z   S |                     d          rdt          | dd                    z   S |                     d          r"t          |                     d                    S nut          | |j
                  rdt          | j                   S t          | |j                  r| j        } n,t          | t                    r| j        } nt	          |           } t!          j        |                     d	d
          |           S )Nr   zconst const**kr   ztl._trN   )triton.language.corelanguagecorer0   r_   striprR   removeprefix_normalize_tyendswithpointer_type
element_tydtyper5   rl   r\   r   rv   replace)tyr   s     rE   r   r     s   '''''''''"c XXZZ=="" 	!))Br""B==%%%%%"QRR&= ;;s 	0r#2#w////== 	/r!""v....== 	9 !7!7888	9	B)	*	* 1=//111	B
	#	# W	B		 [WW%)"**T2*>*>CCCrF   c                      e Zd ZdZdd	Zed
             Zedd            Zedd            Zed             Z	ed             Z
ed             Zed             ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr'   paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                >    || _         || _        || _        || _        d S rH   )r   _paramr   r   )rB   r   r   r   r   s        rE   r4   zKernelParam.__init__)  s&    !2.L+++rF   c                    | j         j        S rH   )r   r5   rJ   s    rE   r5   zKernelParam.name0  s    {rF   r#   r_   c                    | j         j        r| j         j        t          j        j        k    rdS t          | j         j                  S )NrN   )r   
annotationrO   	Parameteremptyr   rJ   s    rE   r   zKernelParam.annotation4  s<    {% 	)?7CTCZ)Z)Z2T[3444rF   c                    | j         }|                    d          r|dd          }n|                    d          r
|dd          }|t          t          j                              v r| j         S dS )Nr   r   r   r   rN   )r   rR   r   r   values)rB   as     rE   annotation_typezKernelParam.annotation_type:  sw    O<< 	!""AA\\# 	!""A.5778888?"rrF   c                    d| j         v S Nri   )r   rJ   s    rE   is_constexprzKernelParam.is_constexprE  s    do--rF   c                Z    | j         rdS d| j        v p| j                            d          S )NFr   r   )r   r   rR   rJ   s    rE   is_constzKernelParam.is_constI  s4     	5$/)MT_-G-G-M-MMrF   c                    | j         j        S rH   )r   defaultrJ   s    rE   r   zKernelParam.defaultO  s    {""rF   c                @    | j         j        t          j        j        k    S rH   )r   r   rO   r   r   rJ   s    rE   has_defaultzKernelParam.has_defaultS  s    {"g&7&===rF   N)r   r'   r   r   r   r   r   r   r#   r_   )r\   rM   r   r   r4   r   r5   r   r   r   r   r   r   r   r   rF   rE   r   r   &  s        LLM M M M     _  5 5 5 _5
    _ . . _. N N _N
 # # X# > > X> > >rF   r   c                8     ddl m ddlm d fd	S )	Nr   rh   r   r   FTc                     dS t           t                    rdS t           t                    r@|r  d|          nd } dk    r|rdS d k    r
 dk    rd	|fS d
 k    r
 dk    rd|fS d|fS t           t                    rdS t	           d          rh j        |f}t                              |d           }|,|d         rdndt          |d                   z   }|t          |<   |r  d|          nd }||fS t           t                    r	d j
        fS t                     rd fS t           t                    rAfd D             } fd} |d |D                       }	 |d |D                       }
|	|
fS t           t                    rLt	           j        d          sJ t           j        j                  }d| t           j                   dd fS t                     rTt	           j        d          sJ t           j        j                  }d| t           j                   d j        dd fS t#          dt%                     z            )N)ri   N)u1Nr'   )alignr   )ri   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorri   c                &    g | ]} |          S r   r   )r   xspecialize_impls     rE   r   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>  s#    4441OOA&&444rF   c                f    t          d          r t                    |  nt          |           S )N_fields)hasattrrl   tuple)valsr   s    rE   <lambda>zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>  s2    '#y:Q:Q&bid3ii&6&6W\]aWbWb rF   c                    g | ]
}|d          S r   r   r   r   s     rE   r   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>  s    111qad111rF   c                    g | ]
}|d          S r   r   r   s     rE   r   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>  s    222qt222rF   ztensordesc<>,zUnsupported type: %s)r0   r   r'   r,   r   r   	dtype2strrv   r   rY   r^   r   r   baser+   block_shapelayoutr   rl   )r   r   specialize_valuer   keydskresspec
make_tupletysrZ   innerGluonTensorDescriptorri   specialize_extrar   s   `           rE   r   z/create_specialize_impl.<locals>.specialize_impla  s   ;&&T"" *	@<S!! (	@?OY""3U;;;;UYCaxx,x''SSI%5%5s|###"2"2s|#s|#U## 	@!>S*%% 	@9h'C--T**C{"1v.tt32DSV2L2LL!$	#BR\""3>>>>X\C:[)) 	@//Y'' 	@%%U## 	@4444444DbbbbJ*11D11122C:22T22233D;-.. 		@38Z00000&sx~66EA%Aco)>)>AAA4HH233 	@38Z00000&sx~66EP%Pco)>)>PPPPPRVWW2T#YY>???rF   )FTT)r   ri   'triton.experimental.gluon.nvidia.hopperr   )r  r  ri   r   s   `@@@rE   create_specialize_implr  \  si    $$$$$$aaaaaa-@ -@ -@ -@ -@ -@ -@ -@ -@^ rF   Fc                    t          t                    dk    r(t                              t          d                      t          d         } || |          d         S )Nr   c                    d S rH   r   )rb   kwargss     rE   r   zmangle_type.<locals>.<lambda>  s    PT rF   )r  )r(   specialize_impl_cacheappendr  )r   
specializer   s      rE   mangle_typer    s[    
 !!Q&&$$%;<T<T%U%UVVV+A.O?3<<<Q??rF   c                  "    e Zd ZU ded<   ddZdS )KernelInterfacer    runr#   c                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 $     j         | dd|S )NFgridwarmup)r  )r   r  r  rB   s     rE   r   z-KernelInterface.__getitem__.<locals>.<lambda>  s     xtx$T%'Y'YRX'Y'Y rF   r   )rB   r  s   ``rE   __getitem__zKernelInterface.__getitem__  s     ZYYYYYrF   N)r#   r    )r\   rM   r   __annotations__r  r   rF   rE   r  r    s9         
FFFZ Z Z Z Z ZrF   r  c           	        d |                                 D             }dd l}| |d |                                D             t          |                                          d |                                D             t          |                                          |j        |d}|                    |          }|S )Nc                X    i | ]'\  }}||j         j        d k    rt          |          n|(S )r   )rD   r\   r_   r   r  r   s      rE   
<dictcomp>z1serialize_specialization_data.<locals>.<dictcomp>  s:    wwwWaWZ\aEO$<$G$Gc%jjjUwwwrF   r   c                ,    g | ]}t          |          S r   r+   r   s     rE   r   z1serialize_specialization_data.<locals>.<listcomp>  s    ?b?b?bAQ?b?b?brF   c                ,    g | ]}t          |          S r   r&  r   s     rE   r   z1serialize_specialization_data.<locals>.<listcomp>  s    0O0O0OQa0O0O0OrF   )r5   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr  )itemsjsonrZ   r+   r   __dict__dumps)	r5   r(  	constantsattrsr-  r  r/  objserialized_objs	            rE   serialize_specialization_datar6    s    wwenetetevevwwwIKKK9?b?bQZQ_Q_QaQa?b?b?bY  0O0O%**,,0O0O0O_cdidpdpdrdr_s_s#C C
 ZZ__NrF   c           
     v   t          | j                  t          |          k    sJ g }t          | j                                        |          D ]\  }}|j        r|                    d| d           &|j        rdnd}|j        rdnd}|j        rdnd}d| d| d| d| d	}	|j	        r{t          |j	        t                    r|j	        dk    s|j	        dd	         d
v rd}|r"|                    d|j	         d|	 d           |                    d|j	         d           |                    |	            d }
dd                    t          t          |
| j                                                            dgz              dd                    d | j                                        D                        dd                    |           d}d | j                                        D             }t           |d<   t#          |j                  |d<   t'          ||           |d         S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                t    | d         j         t          j        j        u r| d         n| d          d| d          S )Nr   r   z	=default_r   rO   r   r   )r   s    rE   r   z0create_function_from_signature.<locals>.<lambda>  s=    AaDLG,=,CCCAaDDAaDIaIa[\]^[_IaIa rF   z
def dynamic_func(z	**optionsz):
    params = {c                    g | ]
}d | d| S )'z': r   )r   r5   s     rE   r   z2create_function_from_signature.<locals>.<listcomp>  s)    QQQ4.t....QQQrF   z}
    specialization = [r   z-]
    return params, specialization, options
c                Z    i | ](\  }}|j         t          j        j        ud | |j         )S )default_r?  )r   r5   r   s      rE   r$  z2create_function_from_signature.<locals>.<dictcomp>  sE       D%= 1 777 	45=777rF   rY   r   dynamic_func)r(   
parametersziprZ   r   r  r   r   r   r   r0   r_   joinr+   mapr.  rY   r  get_arg_specializationexec)sigkparamsbackendspecializationr5   kpr   r  r   rK   r   	func_bodyfunc_namespaces                rE   create_function_from_signaturerR    s    s~#g,,....N++--w77 0 0b? 	0!!":4":":":;;;;!#9vv'H$&$8DfJ!@LGGfEOTOOXOOOOuOOOC! 0b0#66 +)T11R5G5K|5[5[%*
 M"))*Rr/A*R*R*R*R*RSSSS #))*Kr/A*K*K*KLLLL%%h//// b
aC))DS#.*>*>*@*@!A!ABBk]RSS 		QQ3>;N;N;P;PQQQRR  xx//  I >//11  N %0N=!(>w?](^(^N$% 	N### .))rF   c                $    | j          d| j         S )N.)rM   r   fns    rE   get_full_namerW    s    m//bo///rF   c                  x    e Zd Zd Zd Zed             Zd Zed             Zd Z	d Z
d Z eee
	          Zd
S )rY   c                   || _         t          j        |          | _        	 t          j        |          \  | _        | _        n"# t          $ r}t          d          |d }~ww xY wt          |          | _	        t          j                    | _        t          j        d                    | j                            }|t!          j        d|t           j                                                  d          }|| _        d | _        i | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        d S )Nz1@jit functions should be defined in a Python filerN   z^def\s+\w+\s*\()rV  rO   r(  getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorrW  _fn_name	threadingRLock
_hash_locktextwrapdedentrG  research	MULTILINEstart_srchashr@   r   r\   r   __globals__rM   )rB   rV  erC   s       rE   r4   zJITCallable.__init__  s,    *2..	Y6=6LR6P6P3DL$33 	Y 	Y 	YPQQWXX	Y%b))#/++ obggdl3344").R\BBHHJJKKL		 TV zO>-s   !A 
A#AA#c                N    | j         t          j        | j                  j        z  S rH   )rk  rO   getclosurevarsrV  r;   rJ   s    rE   get_capture_scopezJITCallable.get_capture_scope  s     '"8"A"A"KKKrF   c                4   | j         5  | j        | j        cd d d            S d| j         | _        t          j        | j                  j        }t          | j        | j        || j	                  }|
                    |                                            |j        t          | j                  z   | _        t          t!          |j                                                            | _        ddlm | xj        t          fd| j                                        D                       z  c_        t+          j        | j                            d                                                    | _        d d d            n# 1 swxY w Y   | j        S )Nz
recursion:)r5   r:   r;   rC   r   rh   c                J    g | ]\  \  }}\  }}t          |          ||f S r   )r0   )r   r5   rb   rp   ri   s       rE   r   z)JITCallable.cache_key.<locals>.<listcomp>)  sH     = = ="5)4Xc1!+C!;!;=tSk = = =rF   r&   )rb  rj  r_  rO   rn  rV  r;   r"   rk  rC   r   parserK   r_   r\  dictsortedr@   r.  r   ri   r6   r7   r8   rI   )rB   r;   dependencies_finderri   s      @rE   r^   zJITCallable.cache_key  s    _ 	N 	Ny$y	N 	N 	N 	N 	N 	N 	N 	N
 5T]44DI.tw77AI"4$-QUQamv9=#C #C #C%%djjll333+/#d6O2P2PPDI$(0C0T0Z0Z0\0\)])]$^$^D!666666II = = = =9=9N9T9T9V9V= = = > > >II  ty'7'7'@'@AAKKMMDI#	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N$ ys   FEFFFc                    t          j        | j                  }t          |t           j                  sJ t          |j                  dk    sJ t          |j        d         t           j                  sJ |S )Nr   r   )ry   rr  ri  r0   Moduler(   bodyFunctionDef)rB   trees     rE   rr  zJITCallable.parse2  sg    y##$
+++++49~~""""$)A,88888rF   c                $    ddl m}  ||           S )Nr   )constexpr_type)r   r|  )rB   r|  s     rE   rl   zJITCallable.type9  s$    777777~d###rF   c                "    d| _         || _        dS )a"  
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

        Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
        N)rj  ri  )rB   new_srcs     rE   _unsafe_update_srczJITCallable._unsafe_update_src>  s     				rF   c                     t          d          )NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrJ   s    rE   _set_srczJITCallable._set_srcH  s     ( ) ) 	)rF   c                    | j         S rH   )ri  rJ   s    rE   _get_srczJITCallable._get_srcM  s
    yrF   )fgetfsetN)r\   rM   r   r4   ro  r   r^   rr  rl   r  r  r  rC   r   rF   rE   rY   rY     s         (  (  (DL L L   X2   $ $ X$  ) ) )
   (x
0
0
0CCCrF   rY   c                  .    e Zd ZU ded<   ded<   ded<   dS )JitFunctionInfor   rT   r_   r5   JITFunctionjit_functionN)r\   rM   r   r   r   rF   rE   r  r  S  s3         IIIrF   r  c                    t          |          t          |          f}|                     |d           }||S t          |          t          |          z   }|| |<   |S rH   )r   r_   rv   )kernel_key_cacherN  r-  r  r^   s        rE   compute_cache_keyr  Z  sd      #g,,
/C $$S$//IN##c'll2I%SrF   c                  n     e Zd Zd ZddZd Zd Zd Zd Zd	 Z		 	 d fd	Z
d Zd Zd Zd Zd Z xZS )r  c                    dS )NFr   rJ   s    rE   is_gluonzJITFunction.is_gluong  s    urF   r#   bool | Nonec	                   |sd S | j         j        }	| j         j        }
d                    d t	          | j        |d                   D                       }|	 d|j         d|j         d|j         d|j	         d|j
         d	| d
}t          | j                   }t          ||||d         ||          }||||j        |j        |j        |j	        |j
        |j        |||d} |||t          |
|	|           d|i||d          S )Nr;  c                ,    g | ]\  }}|j          d | S )z: r5   )r   r   r   s      rE   r   z*JITFunction._call_hook.<locals>.<listcomp>z  s,    ___%*4444___rF   r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r8  r   )r(  devicer2  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr  F)r  reprrV  compileis_manual_warmupalready_compiled)rV  r   rM   rG  rF  paramsr  r  r  r  r  rW  r6  r  r  )rB   hookr  r(  r  r2  r-  r  r  r5   rT   	arg_reprsr  	full_namer  r  s                   rE   
_call_hookzJITFunction._call_hookj  s     	4w##II__c$+WZ[\W]F^F^___``	  k  k7#4  k  kAQ  k  k`g`r  k  k  HO  H`  k  k  |C  |[  k  k  _h  k  k  k!$'**	;IyR[]def]gipruvv #" *(!, ' 8'.'F".#6"
 
 tvtT22C*6*&"
 
 
 	
rF   c                \    t          |          sJ | j                            |           dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)rm   pre_run_hooksr  )rB   r  s     rE   add_pre_run_hookzJITFunction.add_pre_run_hook  s3    
 ~~!!$'''''rF   c                    ddl m}m}m}m} t
          j                                        } ||          }|| _        || _        || _        t          | j	        | j
        |          }i i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelr  	ASTSourcemake_backend)compilerr  r  r  r  r   activeget_current_targetrR  r(  r  )rB   r  r  r  r  r   rM  binders           rE   create_binderzJITFunction.create_binder  s     	POOOOOOOOOOO1133,v&&,"/WUU2vw..rF   c                                        |          }d | j        D             }d |D             }d t          ||          D             }d|vs
J d            d|vs
J d            d|vs
J d	            |D ]!}	|	|j        vr|	|vrt	          d
|	z            "t          |d           }
fd|
D             }
d |D             t          d           }fd|D             }|||
|fS )Nc                    g | ]	}|j         
S r   r  r   s     rE   r   z*JITFunction._pack_args.<locals>.<listcomp>  s    ///a16///rF   c                    g | ]
}|d          S r   r   r   s     rE   r   z*JITFunction._pack_args.<locals>.<listcomp>  s    000A1Q4000rF   c                    i | ]\  }}||	S r   r   )r   r`   vs      rE   r$  z*JITFunction._pack_args.<locals>.<dictcomp>  s    >>>fq!Q>>>rF   device_typez=device_type option is deprecated; current target will be usedr  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    |dk    S r   r   )rb   rp   s     rE   r   z(JITFunction._pack_args.<locals>.<lambda>  s    3+;M rF   c           	     p    i | ]2}|t          t                                                    |          3S r   )r   r+   r   )r   path
bound_argss     rE   r$  z*JITFunction._pack_args.<locals>.<dictcomp>  s;    fffSWd-d:3D3D3F3F.G.GNNfffrF   c                    g | ]
}|d          S r   r   r   s     rE   r   z*JITFunction._pack_args.<locals>.<listcomp>  s    111QAaD111rF   c                ,    t          |t                    S rH   )r0   r_   )rb   r   s     rE   r   z(JITFunction._pack_args.<locals>.<lambda>  s    Z35G5G rF   c           	     X    i | ]&}|                     t          |                    'S r   )
parse_attrr   )r   r`   attrvalsrM  s     rE   r$  z*JITFunction._pack_args.<locals>.<dictcomp>  s4    VVV1G&&'81'E'EFFVVVrF   )parse_optionsr  rF  r0  KeyErrorr   )rB   rM  r  r  rN  r-  sigkeyssigvalsr(  r`   
constexprsr3  r  s    ` `        @rE   
_pack_argszJITFunction._pack_args  si   ''////4;///00000>>GW(=(=>>>	F***,k***v%%%'a%%%v%%%'a%%% 	Y 	YA(((Qg-=-=SVWWXXX"7,M,MNN
ffff[efff
11.111h(G(GHHVVVVVPUVVV	:u44rF   c                  |                     d| j                  pt          j        j        |d<   t          j                                        }t          j                            |          }| j        D ]
} ||i | | j	        |         \  }}	}
}} ||i |\  }}}t          |	||          }|                     |d           }|=|                     |||||          \  }}}}|                     |||||||          }|d S t                      }| j                                        D ]?\  \  }}\  }}|                     ||          x}|k    rt!          d| d| d|           @|s|J t#          |          r ||          }t%          |          }|d         }|dk    r|d         nd}|dk    r|d         nd}t'          |d          r|                                } |j        ||g|                                R  } |j        |||||j        |j        |t          j        j        t          j        j        g	|                                R   |S )	NdebugrW   z1 has changed since we compiled this kernel, from z to r   r   r   result)rv   r  r   runtimer   r  get_current_deviceget_current_streamr  device_cachesr  r  _do_compileobjectr@   r.  r[   rm   r(   r   r  launch_metadatar   r  functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) rB   r  r  r   r  r  r  r  kernel_cacher  r   rM  r  r  rN  r-  r  kernelr(  r  r3  not_presentr5   rb   rp   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s                                    rE   r  zJITFunction.run  s    **Wdj99PU]=Pw 113311&99 & 	" 	"DD$!&!!!!BFBTU[B\?& /5fd.Ef.E.E+
NG 0.'JJ!!#t,, >48OOGVU_aoDK5M 5M1GY
E %%c9fj'SXZ`aaF~t hh.2.C.I.I.K.K 	q 	q*IT1*\&**4===#EE"otoo^aoogmooq q q F  	n###~~ (tJ''D		I!WF )AT!WW1F )AT!WW1Fvx(( )4f4T6XJDUDUDWDWXXXOFJvvvvvH^`o}68VnYcYjYjYlYln n n nrF   c                H    | j         | j        n|                      |          S rH   )_reprr_  )rB   rb   s     rE   r  zJITFunction.repr  s     $
 2t}}

1ErF   Nc	           	     z   |r|ng }|r|ng }t                                          |           |j        | _        || _        || _        || _        || _        || _        g | _	        t          | j        j                                                  D ]I\  }	}
|	|v p|
j        |v }|	|v p|
j        |v }| j	                            t!          |	|
||                     Jt#          | j                  | _        d | _        || _        || _        d | j	        D             | _        d | j	        D             | _        g | _        d S )Nc                    g | ]	}|j         
S r   r  r   ps     rE   r   z(JITFunction.__init__.<locals>.<listcomp>  s    666Q!&666rF   c                *    g | ]}|j         	|j        S r   )r   r   r  s     rE   r   z(JITFunction.__init__.<locals>.<listcomp>  s!    HHHQH15HHHrF   )r3   r4   rM   rT   versionr   r   r  r  r  	enumerater(  rE  r   r5   r  r   r   r  r  r  r  rX   	arg_namesr  r  )rB   rV  r  r   r   r  rX   r  r  ir   dnsdns_oarD   s                rE   r4   zJITFunction.__init__  se   1BJ--Ki)q)G)Goq&m!2.L+
.!$.";"B"B"D"DEE 	C 	CHAu((KEJ:K,KC88hEJJh<hFK{1eS&AABBBB ));<< 
  76$+666HH$+HHH  rF   c               R     | j         t          t          j        |          |dd|S )NTr  )r  rH  
MockTensor
wrap_dtype)rB   r  r   r  s       rE   r  zJITFunction.warmup   s.    txZ5JD1Q1QT$\\U[\\\rF   c           	        dd l }dd lm t          j                                        }|                    |          }|d         | j        k    r t          d|d          d| j                   t          t          |d                   }|d         }fdt          ||          D             }t          t          |d                   }|d	         }	t          t          ||	                    }
t          |d
                                                   }d |d                                         D             }|d         }| j        |         \  }}}}}|                    |          }|                     ||||||
d          S )Nr   r5   zSpecialization data is for z but trying to preload for r)  r*  c                z    i | ]7\  }}|j                             |          r                     |          n|8S r   )r   is_dtype)r   r  r   tls      rE   r$  z'JITFunction.preload.<locals>.<dictcomp>-  sR     
 
 
U BH$5$5e$<$<G%%
 
 
rF   r+  r,  r(  c                b    i | ],\  }}|t          |t                    rt          |          n|-S r   )r0   r+   r   r#  s      rE   r$  z'JITFunction.preload.<locals>.<dictcomp>5  sG     
 
 
U E4!8!8Cue
 
 
rF   r-  r  T)r  )r/  triton.languager   r   r  r  loadsr_  r[   rH  r   rF  rs  r.  r  r  r  )rB   r  r/  r  deserialized_objr)  r*  r  r+  r,  r3  r(  r-  r  rb   rM  r  s                   @rE   preloadzJITFunction.preload#  s   $$$$$$1133::&9::F#t}44r.>v.Frrcgcprrt t tE#3O#DEE(9
 
 
 
!-??
 
 

  0 >??
%l3
SZ0011)+6<<>>??	
 
.y9??AA
 
 
 u%"081a!''00   
 
 	
rF   c           
     H     j                  \  }}	}                     t          j        j        g          rd S                                 t          j                                        }
|
Nt                      t          |	          } fd} f	d}|
                    |||          }nN                     j                  }|<                        t          j        j        g           |S )Nc                 @                         j                   S )N)r   r-  	_env_vars)r  r0  )env_varsr-  rB   rC   r   s   rE   async_compilez.JITFunction._do_compile.<locals>.async_compileS  s!    ||C@P\d|eeerF   c           
     j   	 | <                        t          j        j        g	           d S rH   )r  r   r  jit_post_compile_hook)
r  r3  r  r  r  r  r-  rB   r(  r  s
    rE   finalize_compilez1JITFunction._do_compile.<locals>.finalize_compileV  sH    $*S! CS)U[]gip!&1 1 1 1 1rF   )r   r-  )r  r  r   r  jit_cache_hookr  r   active_moderv   r   r   submitr  r0  r  )rB   r  r(  r  r  r-  r3  r  rb   rM  
async_moder^   r  r  r  r  r  rC   r   s   ````````       @@@@rE   r  zJITFunction._do_compileF  s   .2.@.H+a!??5=7iQ[]dglfmouvv 	4nnT9j%@@#/3355
!688H%c7GXFFIf f f f f f f f f1 1 1 1 1 1 1 1 1 1 1 1 1
  &&y-AQRRFF\\#fg>N\OOF &LOOEM?iQWYcelotnu"$ $ $rF   c                     t          d          )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)r[   rB   r   r  s      rE   __call__zJITFunction.__call__c  s    WXXXrF   c                2    d| j          d| j        j         dS )NzJITFunction(:r8  )rT   rV  r   rJ   s    rE   __repr__zJITFunction.__repr__f  s"    CdkCCDG,@CCCCrF   )r#   r  )NNNNNNN)r\   rM   r   r  r  r  r  r  r  r  r4   r  r  r  r	  r  r   r   s   @rE   r  r  e  s         ,
 ,
 ,
 ,
\( ( (/ / /5 5 502 2 2hF F F mq;?"  "  "  "  "  " H] ] ]!
 !
 !
F  :Y Y YD D D D D D DrF   r  rV  JITFunction[T]c                    d S rH   r   rU  s    rE   jitr  o  s    CrF   r  r  r  r   r   r  rX   r  Optional[Callable]r  r   Optional[Iterable[int | str]]r   r  Optional[bool]rX   Callable[[T], JITFunction[T]]c                    d S rH   r   r  s          rE   r  r  t  s	     CrF   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c               F    dfd}|  ||           S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    rV  r    r#   r  c           
         t          |           sJ t          j        j        rddlm}  ||           S t          |           S )Nr   )InterpretedFunction)r  r   r   r  rX   r  r  )rm   r   r  	interpretinterpreterr  r  )	rV  r  r  r   r   r  rX   r  r  s	     rE   	decoratorzjit.<locals>.decorator  s    ||=" 	888888&&r7N_Fdlq08tUdf f f f "3/M! /	 	 	 	rF   NrV  r    r#   r  r   )	rV  r  r  r  r   r   r  rX   r  s	    ``````` rE   r  r    sb    :           & 
~y}} rF   c                  b    e Zd ZdZed             ZddZd Zed             Zed             Z	dS )	r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                Z    | j         j        dk    r| j        dk    rt          |           S | S )Nr   torch)rD   r\   rM   r  r   s    rE   r  zMockTensor.wrap_dtype  s/    =!W,,71J1Jc??"
rF   Nc                ,    |dg}|| _         || _        d S )Nr   )r   shape)rB   r   r#  s      rE   r4   zMockTensor.__init__  s     =CE



rF   c                    dg}| j         dd          D ] }|                    |d         |z             !t          t          |                    S )Nr   r   )r#  r  r   reversed)rB   stridessizes      rE   stridezMockTensor.stride  sV    #JqrrN 	/ 	/DNN72;-....Xg&&'''rF   c                     dS Nr   r   r   rF   rE   r   zMockTensor.data_ptr      qrF   c                     dS r*  r   r   rF   rE   	ptr_rangezMockTensor.ptr_range  r+  rF   rH   )
r\   rM   r   r   staticmethodr  r4   r(  r   r-  r   rF   rE   r  r    s         
   \
   ( ( (   \   \  rF   r  c                  L    e Zd Zd Zd Zd ZddZd Zd Zd	 Z	d
 Z
d Zd ZdS )TensorWrapperc                t    || _         || _        |j        | _        |j        | _        | j        j        | _        d S rH   )r   r  datar  r#  )rB   r  r   s      rE   r4   zTensorWrapper.__init__  s1    
	I	kY_


rF   c                4    | j                                         S rH   )r  r   rJ   s    rE   r   zTensorWrapper.data_ptr  s    y!!###rF   c                      | j         j        | S rH   )r  r(  )rB   r   s     rE   r(  zTensorWrapper.stride  s    ty&&rF   r#   r_   c                (    d| j          d| j         dS )NzTensorWrapper[r  r8  )r   r  rJ   s    rE   __str__zTensorWrapper.__str__  s    :
::di::::rF   c                4    | j                                         S rH   )r  element_sizerJ   s    rE   r8  zTensorWrapper.element_size  s    y%%'''rF   c                Z    t          | j                                        | j                  S rH   )r0  r  cpur   rJ   s    rE   r:  zTensorWrapper.cpu  s    TY]]__dj999rF   c                D    | j                             |j                    d S rH   )r  copy_)rB   others     rE   r<  zTensorWrapper.copy_  s    	
#####rF   c                Z    t          | j                                        | j                  S rH   )r0  r  cloner   rJ   s    rE   r?  zTensorWrapper.clone  s     TY__..
;;;rF   c                \    t          | j                            |          | j                  S rH   )r0  r  tor   )rB   r  s     rE   rA  zTensorWrapper.to  s"    TY\\&114:>>>rF   c                \    t          | j                            |          | j                  S rH   )r0  r  	new_emptyr   )rB   sizess     rE   rC  zTensorWrapper.new_empty   s$    TY0077DDDrF   Nr   )r\   rM   r   r4   r   r(  r6  r8  r:  r<  r?  rA  rC  r   rF   rE   r0  r0    s        % % %$ $ $' ' '; ; ; ;( ( (: : :$ $ $< < <? ? ?E E E E ErF   r0  c                   t          | t                    r,|| j        j        k    r| j        S t          | j        |          S t	          | d          rt          | |          S t          dt          |            d          )Nr   zCannot reinterpret a rT  )r0   r0  r  r   r   r   rl   )r   r   s     rE   reinterpretrF    s    &-(( AFK%%%; !e444		$	$ AVU+++?V???@@@rF   c                0   | }t          |t                    s|j        }t          |t                    |j        j        j        }|j        }t          |j                  D ]3\  }}|                                	                    d          r||z  } n4||fS )Nzdef )
r0   rY   rV  __code__co_filenamer\  r  r[  r   rR   )rV  base_fn	file_name
begin_lineidxlines         rE   get_jit_fn_file_linerO    s    G+.. * +.. 
#/I-J w//  	T::<<""6** 	#JE	 j  rF   c                      e Zd Zd Zd ZdS )BoundConstexprFunctionc                "    || _         || _        d S rH   )__self____func__)rB   instancerV  s      rE   r4   zBoundConstexprFunction.__init__'  s     rF   c                .     | j         | j        g|R i |S rH   )rT  rS  r  s      rE   r	  zBoundConstexprFunction.__call__+  s&    t}T]<T<<<V<<<rF   N)r\   rM   r   r4   r	  r   rF   rE   rQ  rQ  %  s2          = = = = =rF   rQ  c                  0     e Zd Z fdZd ZdddZ xZS )ConstexprFunctionc                J    t                                          |           d S rH   )r3   r4   )rB   rV  rD   s     rE   r4   zConstexprFunction.__init__1  s!    rF   c                *    |t          ||           S | S rH   )rQ  )rB   r4  objclasss      rE   __get__zConstexprFunction.__get__4  s    ?)#t444rF   N)	_semanticc                   ddl mm} fd|D             }fd|                                D             } | j        |i |}||S t
          j        j        r|S  ||          S )Nr   )_unwrap_if_constexprri   c                &    g | ]} |          S r   r   )r   r   r_  s     rE   r   z.ConstexprFunction.__call__.<locals>.<listcomp>=  s%    666A$$Q''666rF   c                .    i | ]\  }}| |          S r   r   )r   r`   r  r_  s      rE   r$  z.ConstexprFunction.__call__.<locals>.<dictcomp>>  s+    JJJ!Q!))!,,JJJrF   )r   r_  ri   r.  rV  r   r  r  )rB   r]  r   r  ri   r  r_  s         @rE   r	  zConstexprFunction.__call__:  s    HHHHHHHH6666666JJJJ6<<>>JJJ dgt&v&&J =" 	Jy~~rF   )r\   rM   r   r4   r\  r	  r   r   s   @rE   rX  rX  /  se               )-         rF   rX  c                     t          |           S )z
    Wraps an arbitrary Python function so that it can be called at
    compile-time on constexpr arguments in a Triton function and
    returns a constexpr result.
    )rX  rU  s    rE   constexpr_functionrc  M  s     R   rF   r   )Fr  )r  r  r  r  r   r  r   r  r  r  rX   r  r#   r  rH   )rV  r  r  r  r  r  r   r  r   r  r  r  rX   r  r#   r  )H
__future__r   r   ry   r1   r6   rO   r   r`  re  rc  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rN   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r>   r=   r    NodeVisitorr"   r   r   r   r  r  r  r  r6  rR  rW  rY   r  r  r  r  r  r0  rF  rO  rQ  rX  rc  r   rF   rE   <module>ro     s   , , , , , , , , 



            				  # # # # # # ! ! ! ! ! ! % % % % % % d d d d d d d d d d d d d d d d d d d d d d d d ; ; ; ; ; ;                         e e e e e e e e e e e e             ? ? ? ? ? ?!3GCLLb! b! b! b! b! b! b! b!TD D D D4/> /> /> /> /> /> /> />d 	 4 4 4n@ @ @ @	Z 	Z 	Z 	Z 	Zgaj 	Z 	Z 	Z	 	 	7* 7* 7*t0 0 0_1 _1 _1 _1 _1 _1 _1 _1D          BD BD BD BD BD+q1 BD BD BDT 
   
 
 #*.7;DH #
 
 
 
 
 

 4 #*.7;DH #4 4 4 4 4 4x       B"E "E "E "E "E "E "E "EJA A A! ! !$= = = = =[ = = =       <! ! ! ! !rF   