
    Pi>U                         d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlZd dlmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd d	lmZ d
efdZde
j         fdZ! ej"                    d             Z# ej"                    de$fd            Z%de$fdZ& ej"                    de$fd            Z' ej"        d          d             Z(de$fdZ) ed           G d d                      Z* G d de          Z+dS )    )BaseBackend	GPUTargetLanguage)irpassesllvmnvidia)knobs)
PTXASError)	dataclassN)AnyDictTupleOptional)
ModuleType)Pathtargetc                 L    dt           t          t          t          f         fd}|S )Nreturnc                 f    | j         j        }|j         j        }||k    s
J d            |dk    rdS dS )Nz%lhs and rhs bitwidth must be the same   )   r       )r   r      )scalarprimitive_bitwidth)lhs_typerhs_typelhs_bitwidthrhs_bitwidths       w/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/backends/nvidia/compiler.pycheck_dot_compatibilityz-min_dot_size.<locals>.check_dot_compatibility   sE    99|+++-T+++1::    )r   int)r   r"   s     r!   min_dot_sizer%      s0    uS#s]7K     #"r#   r   c                  $    t           j        j        S N)r
   r	   ptxas r#   r!   	get_ptxasr*   "   s    <r#   c                      t           j        j        } | | S t          j        t                      j        dg                              d          }|S )Nz	--versionutf-8)r
   r	   mock_ptx_version
subprocesscheck_outputr*   pathdecode)mock_verversions     r!   get_ptxas_versionr4   &   sF    |,H%y{{'7&EFFMMgVVGNr#   c                 4   t          | t                    sJ t          t          |                     d                    \  }}|dk    r|dk     rd|z   S d|z   dz
  S |dk    rd|z   S |dk    rd	|z   S |d
k    rd}||d
z
  dz  z   |z   S t          d| z             )zK
    Get the highest PTX version supported by the current CUDA driver.
    .      P   r      F   
   ?      Z   z?Triton only support CUDA 10.0 or higher, but got CUDA version: )
isinstancestrmapr$   splitRuntimeError)cuda_versionmajorminorbase_ptxs       r!   ptx_get_versionrI   /   s    
 lC(((((sL..s3344LE5{{199::>!{{Ez{{Ez{{52:++e33
X[gg
h
hhr#   archc                 \    | j         }|"t                      j        }t          |          }|S r'   )ptx_versionr*   r3   rI   )optionsrJ   rL   rE   s       r!   get_ptx_version_from_optionsrN   G   s/    %K {{*%l33r#   c                 P    t          | |          }t          d|          }d| }|S )NV   z+ptx)rN   min)rM   rJ   rL   llvm_ptx_versionfeaturess        r!   get_featuresrT   O   s6    .w==K 2{++(&((HOr#   c                     t          | d          5 }t          j        |                                                                          cd d d            S # 1 swxY w Y   d S )Nrb)openhashlibsha256read	hexdigest)r0   fs     r!   	file_hashr]   ]   s    	dD		 4Q~affhh''11334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4s   8AAA
capabilityc                 $    | dk    rdnd}d|  | S )Nr?   a sm_r)   )r^   suffixs     r!   sm_arch_from_capabilityrd   c   s(    "$$SS"F%%V%%%r#   T)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   d	Z	e
e         ed
<   dZeed<   d	Zeed<   d	Zeed<   d	Ze
e         ed<   dZeed<   dZeed<   dZeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   d	Zeed<   d	Zeed<   dZeed<   d Zeed!<   dZeed"<   d	Z eed#<   d$Z!eed%<   d& Z"d' Z#d	S )(CUDAOptions   	num_warpsr   num_ctas   
num_stagesr   	warp_sizeNmaxnreg)r   r   r   cluster_dimsrL   ptx_optionsir_overrideTenable_fp_fusionFlaunch_cooperative_grid
launch_pdl)fp8e5fp8e4b15supported_fp8_dtypesr)   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)ry   tf32x3ieeeallowed_dot_input_precisionsmax_num_imprecise_acc_defaultextern_libsdebugcudabackend_namesanitize_overflowrJ   ra   instrumentation_modec                    t          t                    j        dz  }| j        i nt	          | j                  }|                    dd           s&t          j        j        pt          |dz            |d<   t                              | dt          |                                                     | j        dk    r| j        | j        dz
  z  dk    s
J d            d S )Nlib	libdevicezlibdevice.10.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictgetr
   r	   libdevice_pathrA   object__setattr__tupleitemsri   )selfdefault_libdirr   s      r!   __post_init__zCUDAOptions.__post_init__   s    h.6 ,4bb$t?O:P:P{D11 	n',|'B'mc.[lJlFmFmK$4k6G6G6I6I0J0JKKK~!!t~!9K'LQR&R&R&R/ 'S&R&R&R&Rr#   c                 v   t          | j                  }t          d t          |d                   D                       |d<   d                    d t          |                                          D                       }t          j        |                    d                    	                                S )Nc              3   >   K   | ]\  }}|t          |          fV  d S r'   )r]   ).0kvs      r!   	<genexpr>z#CUDAOptions.hash.<locals>.<genexpr>   s1      (h(htq!!Yq\\):(h(h(h(h(h(hr#   r   _c                 "    g | ]\  }}| d | S )-r)   )r   namevals      r!   
<listcomp>z$CUDAOptions.hash.<locals>.<listcomp>   s&    SSSID#4#SSSr#   r,   )
r   __dict__r   sortedjoinr   rX   rY   encoder[   )r   	hash_dictkeys      r!   hashzCUDAOptions.hash   s    ''	#((h(hviXeNfGgGg(h(h(h#h#h	- hhSS	@Q@Q9R9RSSSTT~cjj1122<<>>>r#   )$__name__
__module____qualname__ri   r$   __annotations__rj   rl   rm   rn   r   ro   r   rL   rp   rA   rq   rr   boolrs   rt   rw   r   rx   rz   r}   r~   r   r   r   r   r   rJ   r   r   r   r)   r#   r!   rg   rg   i   s        IsHcJIs "GXc]!!!#L%###KK!%K#%%%!d!!!$)T)))J'<%*<<<46%uSz666'----/I %*III*.!4...KE4L#"t"""D# "#"""0 0 0? ? ? ? ?r#   rg   c                       e Zd ZdZedefd            Zd ZdefdZ	deddf fdZ
defdZd	 Zd
 Zdeeef         fdZd Zed             Zed             Zd Zd Zd Zd Zd Z ej                    d             Z xZS )CUDABackendNr   c                     | j         dk    S )Nr   )backend)r   s    r!   supports_targetzCUDABackend.supports_target   s    ~''r#   c                     d}t          j        ||          }|st          d|           t          |                    d                    S )Nz	^sm(\d+)$z(TRITON_OVERRIDE_ARCH must have the form r   )re	fullmatch
ValueErrorr$   group)r   rJ   patternmatchs       r!   _parse_archzCUDABackend._parse_arch   sP    Wd++ 	SQQQRRR5;;q>>"""r#   r   c                 @    |                      |j                  }d| S )Ncuda:)r   rJ   )r   rM   r^   s      r!   get_target_namezCUDABackend.get_target_name   s%    %%gl33
#z###r#   c                 X    t                                          |           d| _        d S )Ncubin)super__init__
binary_ext)r   r   	__class__s     r!   r   zCUDABackend.__init__   s&       !r#   c                    dt           j        j        pd| j        j         i}|                    fdt          j                                        D                        t          | 
                    |d                             }|                    dd          dk    r|dk     rt          d| d          d	|vrSt          t          j                  }|d
k    r|                    d           t!          t#          |                    |d	<   d|vr|dk    rd|d<   d|vrt           j        j        |d<   |dk    rdnd|d<   t          di |S )NrJ   smc                 :    i | ]}|v |         ||         S r'   r)   )r   r   optss     r!   
<dictcomp>z-CUDABackend.parse_options.<locals>.<dictcomp>   s7    uuuATUY]T]T]aefgahatQQatatatr#   rj   r   r?   zBnum_ctas > 1 requires NVIDIA SM90+ (Hopper). Current target is sm_zM. This configuration will fail. Please set num_ctas=1 or target an SM90+ GPU.rw   Y   fp8e4nvrx   )rv   rr   i   @r   r~   r)   )r
   runtimeoverride_archr   rJ   updaterg   __dataclass_fields__keysr$   r   r   r   setrw   addr   r   languagedefault_fp_fusion)r   r   argsr^   rw   s    `   r!   parse_optionszCUDABackend.parse_options   s   3N7NDK<L7N7NOuuuu)I)N)N)P)Puuuvvv))$v,7788
88J""Q&&:?? O6@O O O Q Q Q "--#&{'G#H#H R$((333+08L1M1M+N+ND'(.d::R<J89T))',~'GD#$9Cr9I9Iq,-""T"""r#   c                 r    |j         |j        |j        |j        d         |j        d         |j        d         fS )Nr   r      )ri   rj   sharedro   )r   metadatas     r!   pack_metadatazCUDABackend.pack_metadata   s>    O!!$!!$!!$
 	
r#   c                     dd l mc mc m} t	          |                     |j                            }|dk    r|j        n|j        t          | j
                  d}|S )Nr   r9   )convert_custom_typesr%   )triton.language.extra.cudar   extrar   r$   r   rJ   convert_custom_float8_sm80convert_custom_float8_sm70r%   r   )r   rM   r   r^   codegen_fnss        r!   get_codegen_implementationz&CUDABackend.get_codegen_implementation   s~    111111111111))',7788
 0:R/?/?D++TEd%%
 

 r#   c                     ddl m} d|iS )Nr   )r   ztriton.language.extra.libdevice)r   r   )r   r   s     r!   get_module_mapzCUDABackend.get_module_map   s    88888819==r#   c                     t          j        |           t          j        r!t          j                            |           d S d S r'   )r	   load_dialectsr   instrumentation)r   ctxs     r!   r   zCUDABackend.load_dialects   sE    S!!!& 	;'55c:::::	; 	;r#   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           |dz  dk     rt          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    |            | S )Nr<   	   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_symbol_dceadd_loop_unrollrun)modr   optr^   pms        r!   	make_ttirzCUDABackend.make_ttir   s   _S[))
!!"%%%..r222aK@@DDD''+++###))"---b!!!$$R(((##B'''
s
r#   c                    |j         E|                     dt          j        | j                                      |j                              t          j                    }|j        6|j        d         |_	        |j        d         |_
        |j        d         |_        t          j        | j                  }|                                }t          j                            |d| |j        d|j                   t          j                            |           |dz  dk    rt          j                            |           t          j        j                            ||           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            ||d	k               t          j        j                            |           t          j                            |           |dz  d
v r2t          j                            |           t          j                             |           t          j        !                    |           t          j                             |           t          j        "                    |           t          j        j#        $                    ||j%        |           t          j        &                    ||j%                   t          j        '                    |           t          j        (                    ||j%        |           n|dz  dk    rt          j                            |           t          j                             |           t          j        !                    |           t          j        )                    |           t          j        *                    |d           t          j        j        +                    |           t          j        &                    ||j%                   t          j        '                    |           t          j        ,                    ||j%                   t          j        (                    ||j%        |           t          j        "                    |           t          j        *                    |d           t          j        j        -                    |           nt          j        !                    |           t          j                             |           t          j                            |           t          j        .                    |           t          j                            ||d	k               t          j        /                    |           t          j        j        0                    |           t          j                            |           t          j        j        1                    |           t          j        2                    |           t          j        3                    |           t          j                            |           t          j        4                    |           |dz  dk    r$t          j        j        5                    |           t          j        j        6                    ||           t          j        j        7                    |           t          j        8                    |           t          j        9                    |           t          j                             |           |:                    |            |j	        |j
        |j        f|d<   | ;                                }||d<   | S )Nzttg.maxnregr   r   r   r   r   r<   r   r9   )r   r   FTr   ro   tensordesc_meta)<rn   set_attrr   builderr   get_int32_attrr	   ClusterInforo   clusterDimXclusterDimYclusterDimZr   r   r   r   add_convert_to_ttgpuirri   rj   ttgpuiradd_coalesceadd_f32_dot_tc	ttnvgpuiradd_plan_ctaadd_remove_layout_conversionsadd_optimize_thread_localityadd_accelerate_matmuladd_optimize_dot_operands add_optimize_descriptor_encodingadd_loop_aware_cseadd_fuse_nested_loopsr   r   add_triton_licm add_combine_tensor_select_and_ifhopperadd_hopper_warpspecrl   add_assign_latenciesadd_schedule_loopsadd_pipelineadd_optimize_accumulator_initadd_hoist_tmem_allocadd_promote_lhs_to_tmemadd_warp_specializeadd_remove_tmem_tokensadd_prefetchadd_coalesce_async_copyadd_optimize_tmem_layoutsadd_interleave_tmemadd_reduce_data_duplicationadd_reorder_instructionsr   add_tma_loweringadd_fence_insertionadd_lower_mmaadd_sccpr   r   get_tensordesc_metadata)r   r   r   r^   cluster_infor   dump_enabledr   s           r!   
make_ttgirzCUDABackend.make_ttgir   s    ;"LL
3;(?(?(N(Ns{([([\\\)++''*'7':L$'*'7':L$'*'7':L$_S[))((**2/Cz/C/CS]TVX[Xdeee##B'''q  N))"---,,R>>>44R88833B777,,R00044R88800Z25EFFF@@DDD&&r***v%%N00444M++B///K''+++M++B///N;;B???M 44RVVVN//CNCCCN--b111N''CNLIIII2##N00444M++B///K''+++N88<<<N//E:::M#;;B???N//CNCCCN--b111N..r3>BBBN''CNLIIIN;;B???N//D999M#::2>>>>K''+++''+++&&r***##B'''00Z25EFFF..r22299"===44R88833B777222666//333&&r***$$R(((q  M#44R88833B
CCC--b111r"""b!!!''+++
s$0$<l>VXdXp#q 5577&5"#
r#   c                 0   |}t          j        |j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j
                            |           t          j                            |           t          j                            |           |                    |           |                                |d<   |S )Nr   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   r'  r   r  r   r  r  r   r(  )r   srcr   rM   r^   r   r   s          r!   gluon_to_ttgirzCUDABackend.gluon_to_ttgirE  s    _S[))
  $$$//333r"""&&r***&&r***77;;;
s&)&A&A&C&C"#
r#   c                 :
   t          || j        j                  }|}t          j        |j                  }|                                 t          j        	                    |           t          j        
                    |           t          j                            |           t          j        j                            |||           t          j        j                            |           t"          j        j        rt          j                            |           t          j                            |           t          j        j                            ||           t.          j        r&t.          j                            d||j                   t          j        j                            |||           t          j                            |           t          j                            |           t          j        j                            |           t          j        j                            |           t          j                            |           t          j                            |           t          j                             |           t          j        !                    |           t"          j        j"        st          j#        $                    |           t.          j        r&t.          j                            d||j                   |%                    |           tM          j'                     tM          j                    }t"          j        j(        rtS          d          tM          j*        ||          }	tW          |          }
tY          || j        j                  }d}t          j-                     tM          j.        |	||
|           t          j/        |	           |j0        r:t          j1        |	          r&d |j0        D             }tM          j2        |	|           tM          j3        |	tL          j4                   |5                    d          }|||d<   |5                    d          |d	<   |5                    d
          |d<   |5                    d          |d<   |5                    d          |d<   |5                    d          pd|d<   |5                    d          pd|d<   tm          |	          }~	~|S )Nttgpuir_to_llvmirllvmir_to_llvmzYAddress Sanitizer Error: Address sanitizer is currently only supported on the AMD backendnvptx64-nvidia-cudac                     g | ]\  }}|S r)   r)   )r   r   r0   s      r!   r   z)CUDABackend.make_llir.<locals>.<listcomp>  s    BBBltTTBBBr#   zttg.total-num-warpsri   z
ttg.sharedr   zttg.tensor_memory_size	tmem_sizezttg.global_scratch_memory_sizeglobal_scratch_sizez#ttg.global_scratch_memory_alignmentglobal_scratch_alignzttg.profile_scratch_memory_sizer   profile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)7rN   r   rJ   r   r   r   r   r   r  r  add_allocate_warp_groupsconvertadd_scf_to_cfr	   add_allocate_shared_memory_nvr	  add_allocate_tensor_memoryr
   compilationenable_experimental_consanadd_concurrency_sanitizer"add_allocate_global_scratch_memoryadd_proxy_fence_insertionr   r   patchadd_to_llvmirr   r   r   add_nvgpu_to_llvmadd_warp_specialize_to_llvmr   add_nvvm_to_llvmdisable_line_infollvmiradd_di_scoper   r   init_targetsenable_asanrD   	to_modulerd   rT   set_short_ptrattach_datalayoutset_nvvm_reflect_ftzr   has_extern_depslink_extern_libsoptimize_moduleOPTIMIZE_O3get_int_attrrA   )r   r/  r   rM   r^   rL   r   r   r   llvm_modprocrS   triplepathstotal_num_warpsrets                   r!   	make_llirzCUDABackend.make_llirU  sO   27DK<LMM_S[))
77;;;//333$$R(((;;B
KXXX::2>>>7 	9N44R88899"===99"jIII& 	T'--.A2s{SSS++B
KHHH''+++b!!!11"555;;B???''+++b!!!$$R(((''+++ 2 	+M&&r***& 	Q'--.>CKPPP
s,..( 	mkm m m>#w//&z22)9::&xx@@@#H--- 	36#9(#C#C 	3BBg.ABBBE!(E222Xt'7888 **+@AA&$3H[! --l;; # 0 01I J J*-*:*:;[*\*\&'+.+;+;<a+b+b'(+.+;+;<]+^+^+cbc'(,/,<,<=c,d,d,ihi()(mm
r#   c           	      p   t          || j        j                  }d}t          |          }t	          || j        j                  }t          j        ||||g |j        d          }	t          j	        d|	          }
t          |
          dk    sJ |
d         |d<   |dz   d|dz   }t          j        d	d
| |	t          j                  }	t          j        dd| |	t          j                  }	t          j        dd|	          }	t          j        j        rt!          d           t!          |	           |	S )Nr4  Fz(.visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)r   r   r   r<   r6   z\.version \d+\.\d+z	.version )flagsz\.target sm_\d+z.target sm_z,\s*debug|debug,\s*ra   z // -----// NVPTX Dump //----- //)rN   r   rJ   rd   rT   r   translate_to_asmrr   r   findalllensub	MULTILINEr
   r	   
dump_nvptxprint)r   r/  r   r   r^   rL   rZ  rY  rS   r]  namess              r!   make_ptxzCUDABackend.make_ptx  s6   238HII&&z22T[%566#CxSEY[`aa
FLL5zzQ 8$b;;;r>;;f*,E,E,EsRTR^___f')Cz)C)CSPRP\]]]f+R55<" 	4555#JJJ
r#   c           
      |   t                      j        }t          j        ddd          5 }t          j        ddd          5 }|                    |           |                                 |j        dz   }g }	t          j        j	        r|	dd	gz  }	nt          j
        j        r|	d
gz  }	n|	dgz  }	|j        rg ndg}
t          |          }t          j
        j        rddgng }|j        r|j                            d          ng }|g|	|
d||d| |j        d|}	 t!          j        |dd|           t          j
        j        rMt'          |j                  5 }t)          |                                           d d d            n# 1 swxY w Y   t,          j                            |j                  rt-          j        |j                   t,          j                            |j                  rt-          j        |j                   n
# t           j        $ r}t'          |j                  5 }|                                }d d d            n# 1 swxY w Y   t,          j                            |j                  rt-          j        |j                   |j        dk    rd}n%|j        dt6          j        z   k    rd}n
d|j         }| d| dd                    |           d}t)          d| d| d           t=          |          d }~ww xY wt'          |d          5 }|                                }d d d            n# 1 swxY w Y   t,          j                            |          rt-          j        |           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |S ) NFwz.ptx)deletemoderc   rz.logz.oz	-lineinfoz-suppress-debug-infoz-gz--fmad=falsez--opt-level0 z-vz--gpu-name=z-oT)check	close_fdsstderr   z!Internal Triton PTX codegen error   z`ptxas` raised SIGSEGVz`ptxas` failed with error code z
`ptxas` stderr:
z
Repro command: 
zC

================================================================
z

zy
================================================================
please share the reproducer above with Triton project.
rV   )r*   r0   tempfileNamedTemporaryFilewriteflushr   r
   r@  rJ  r	   disable_ptxas_optrr   rd   rp   rC   r.   r   dump_ptxas_logrW   rg  rZ   osexistsremoveCalledProcessError
returncodesignalSIGSEGVr   r   )r   r/  r   r   r^   r(   fsrcflogfbin
debug_infofmadrJ   disable_optptx_extra_options	ptxas_cmdlog_fileelogerrorr\   r   s                        r!   
make_cubinzCUDABackend.make_cubin  sV    (COOO G	 SW'u3vNNNG	 RVJJsOOOJJLLL9t#DJ 2 ,{,BCC

/ ,tf$

 {m+
-C22N3CD*:66D 38,2PX=#..VXK ?Bo U 5 5c : : :SU "%)+/2=@QSgaeSgSgimirI$(ydSSSS<. /di /Hhmmoo.../ / / / / / / / / / / / / / / 7>>$),, )Idi(((7>>$),, )Idi(((0 ( ( ($)__ *"--//C* * * * * * * * * * * * * * *7>>$),, )Idi(((<3&&?EE\S6>%9994EELalLLE! C C-0C C+.88I+>+>C C C    
       !'''5(8 dD!! !Q! ! ! ! ! ! ! ! ! ! ! ! ! ! !w~~d##  	$OG	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	 P s   N1CN#=H "FHFHFA=HNL$L8I	LIL I!B6LLN/MNMNM6NN1N	N1!N	"N11N58N5c                                            j                  |t          j        k    r fd|d<    fd|d<   n|t          j        k    r
 fd|d<    fd|d<    fd|d	<    fd
|d<   d S )Nc                 4                         | |          S r'   )r   r/  r   r^   rM   r   s     r!   <lambda>z(CUDABackend.add_stages.<locals>.<lambda>  s    4>>#xQXZd3e3e r#   r   c                 4                         | |          S r'   )r+  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    DOOCSZ\f4g4g r#   ttgirc                 4                         | |          S r'   )r0  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    D4G4GXW^`j4k4k r#   c                 4                         | |          S r'   )r^  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    t~~c8WV`/a/a r#   llirc                 H                         | |j        j                  S r'   )ri  r   rJ   r/  r   rM   r   s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s     dmmC7TXT_Td.e.e r#   ptxc                 H                         | |j        j                  S r'   )r  r   rJ   r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s     XwX\XcXh0i0i r#   r   )r   rJ   r   TRITONGLUON)r   stagesrM   r   r^   s   ` ` @r!   
add_stageszCUDABackend.add_stages  s    %%gl33
x&&eeeeeeF6NggggggF7OO''kkkkkkF7Oaaaaaaveeeeeuiiiiiwr#   c                 @    t                      }| d| j        j         S )Nr   )r4   r   rJ   )r   r3   s     r!   r   zCUDABackend.hash
  s&    #%%..DK,...r#   )r   r   r   r   staticmethodr   r   r   rA   r   r   r   r   r   r   r   r   r   r   r   r+  r0  r^  ri  r  r  	functools	lru_cacher   __classcell__)r   s   @r!   r   r      s       O(	 ( ( ( \(# # #$# $ $ $ $"y "T " " " " " "#S # # # #6
 
 
  >S*_ 5 > > > >; ; ;
   \  M M \M^   F F FP  ,J J JX	j 	j 	j Y/ / / / / / /r#   r   ),triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   triton.runtime.errorsr   dataclassesr   r  typingr   r   r   r   typesr   rX   r   rw  r  r}  r.   pathlibr   r%   
NvidiaToolr*   r  r4   r$   rI   rN   rT   r]   rd   rg   r   r)   r#   r!   <module>r     s   E E E E E E E E E E 8 8 8 8 8 8 8 8 8 8 8 8       , , , , , , ! ! ! ! ! !     - - - - - - - - - - - -        				   				          # # # # #5#        iS i i i i.     
 
 
 
 
 T4 4 4
& & & & & $)? )? )? )? )? )? )? )?Xw/ w/ w/ w/ w/+ w/ w/ w/ w/ w/r#   