
    PiQ                        d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
Zd Zd Z ed           G d d                      Z G d de          ZdS )    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                     dS )N   r   r    )lhs_typerhs_types     t/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/backends/amd/compiler.py<lambda>z"get_min_dot_size.<locals>.<lambda>   s    i     r   r   s    r   get_min_dot_sizer      s     0//r   c                 f    t           j        j        | dk    p	| dk    o|du nt           j        j        S )Ngfx942gfx950T)r
   r	   use_block_pingpong)archuse_async_copys     r   is_pingpong_schedule_enabledr$      s:    -5 HM!1!Ln6L;@9;WXr   c                 R    t           j        j        | dk    nt           j        j        S )Nr   )r
   r	   use_in_thread_transposer"   s    r   is_in_thread_transpose_enabledr(      s#    !&!B!JDHPUPYPqqr   T)frozenc                   t   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	e
ed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   d$Zeed%<   d& Z d' Z!dS )(
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr"   )fp8e4nvfp8e5fp8e5b16fp8e4b8supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r<   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_name instrumentation_modenoneschedule_hintc                    t          | j        dd                   }|dk    rdnd}t                              | d|           | j        dk    r| j        | j        dz
  z  dk    s
J d	            | j        d
k    rD| j        dk    r9t          j        d| j         d           t                              | dd           t          t                    j
        dz  }| j        i nt          | j                  }dD ]}t          || dz            ||<   t                              | dt          |                                                     d S )N   
       @   	warp_sizer   r   znum_warps must be a power of 2r    zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.rB   lib)ocmlocklz.bcr2   )intr"   object__setattr__r-   rB   warningswarnr   __file__parentr2   dictstrtupleitems)self	gfx_majorrQ   default_libdirr2   rR   s         r   __post_init__zHIPOptions.__post_init__G   su   	!B$((	#r//BBr	4i888~!!t~!9K'LQR&R&R&R/ 'S&R&R I!!
aM zvz  wA  z  z  z   tWa000h.6 ,4bb$t?O:P:P# 	A 	AC">sKKK#?@@K4k6G6G6I6I0J0JKKKKKr   c                     d                     d | j                                        D                       }t          j        |                    d                                                    S )N_c                 "    g | ]\  }}| d | S )-r   ).0namevals      r   
<listcomp>z#HIPOptions.hash.<locals>.<listcomp>[   s&    OOOID#4#OOOr   zutf-8)join__dict__r_   hashlibsha256encode	hexdigest)r`   keys     r   hashzHIPOptions.hashZ   sX    hhOO9L9L9N9NOOOPP~cjj1122<<>>>r   )"__name__
__module____qualname__r-   rU   __annotations__r.   r0   r1   r2   r\   r3   r^   r4   boolr5   r"   r]   r:   r   r;   r=   r>   r?   r@   rA   rB   rC   rD   rF   rH   rJ   rc   rs   r   r   r   r+   r+      s        IsL#JHcK#L%###E4"t"""D#
 (S%*RRR46%uSz666'----/9 %*999!d!!!$)T))) !#!!!E3NNN$$$$)*!3***L# "#"""  M3L L L&? ? ? ? ?r   r+   c                       e Zd ZdZedefd            Zdeddf fdZdefdZ	de
fdZd Zd	 Zdeeef         fd
Zd Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zd Z ej                    d             Z xZS )
HIPBackendNr   c                     | j         dk    S )NrE   )backendr   s    r   supports_targetzHIPBackend.supports_targetb   s    ~&&r   returnc                     t                                          |           t          |j        t                    sJ d| _        d S )Nhsaco)super__init__
isinstancer"   r]   
binary_ext)r`   r   	__class__s     r   r   zHIPBackend.__init__f   s>       &+s+++++!r   c                     d|j          S )Nhip:r'   r`   optionss     r   get_target_namezHIPBackend.get_target_namek   s    $gl$$$r   c                 *   dt           j        j        p| j        j        i}                    dd          dk    rt          d          | j        j        dk    rNt          t          j	                  }|
                    dh           t          t          |                    |d<   dvr)t          t          t          j                            |d<   | j        j        d	k    rOt          t          j                  }|
                    d
dh           t          t          |                    |d<   dvrt           j        j        |d<   |
                    fdt          j                                        D                        t          di |S )Nr"   r1   r   z'num_ctas > 1 not supported for AMD GPUsr   tf32r>   r:   r    r8   r9   r;   r?   c                 :    i | ]}|v |         ||         S Nr   )rh   koptss     r   
<dictcomp>z,HIPBackend.parse_options.<locals>.<dictcomp>   s:     ; ; ;AT		d1g&9 Q&9&9&9r   r   )r
   runtimeoverride_archr   r"   get
ValueErrorsetr+   r>   updater^   sortedr:   r;   languagedefault_fp_fusion__dataclass_fields__keys)r`   r   argsr>   r;   s    `   r   parse_optionszHIPBackend.parse_optionsn   s   3Gt{7GH88J""Q&&FGGG ;x''+.z/V+W+W((//99938@\9]9]3^3^D/0!--+0
8W1X1X+Y+YD'(;x''03J4`0a0a--44j)5LMMM8=fEf>g>g8h8hD45T))',~'GD#$ ; ; ; ;)H)M)M)O)O ; ; ; 	< 	< 	<!!D!!!r   c                 r    |j         |j        |j        |j        d         |j        d         |j        d         fS )Nr   r   r/   )r-   r1   sharedr3   )r`   metadatas     r   pack_metadatazHIPBackend.pack_metadata   s>    O!!$!!$!!$
 	
r   c                 .    dt          | j                  iS )Nmin_dot_size)r   r   r   s     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s     0 = =>>r   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r`   r   s     r   get_module_mapzHIPBackend.get_module_map   s    77777719==r   c                     t          j        |           t          j        r!t          j                            |           d S d S r   )r	   load_dialectsrz   instrumentation)r`   ctxs     r   r   zHIPBackend.load_dialects   sE    #% 	:&44S99999	: 	:r   c                     dd l }d}t          | d          r|                                 |k    S t          | |j                  r:t          | d          r*|                                                                 |k    S dS )Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s    
3$$ 	1==??j00c5<(( 	>WS:K-L-L 	>&&((--//:==ur   c                 F    t          j        |           }d| v r|ddggz  }|S )NSztt.pointer_rangerO   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s3    $T**$;;',--C
r   c                     t          j        | |fi |}t          j        j        r%|dk    rt
                              |           r|dz  }|S )Ntensorr   )r   get_arg_specializationr
   r	   use_buffer_opsrz   r   )r   tykwargsr   s       r   r   z!HIPBackend.get_arg_specialization   sT    0bCCFCC 9# 	h:;S;STW;X;X3JC
r   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    |            | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirzHIPBackend.make_ttir   s   _S[))
!!"%%%..r222<<R@@@''+++###))"---b!!!##B'''$$R(((##B'''
s
r   c                 T   t          j        | j                  }|                                 t          j                            |d|j         |j        |j	        |j
                   |                    |            t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j                            |           t           j        j                            ||j        |j        |j                   t          j                            |           t           j        j                            |           t          j                            |d           t           j        j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t6          j        j        }t6          j        j        }t6          j        j        }t?          |j        |          }t           j        j                             ||j!        ||||           |r*t           j        j        "                    ||j                   t          j                            |           |j#        $                                dk    r*t           j        j        %                    ||j#                   t          j                            |d           t          j                            |           t          j        &                    |           tO          |j                  rCt           j        j        (                    |           t          j                            |           t           j        j        )                    |           |r5|j!        dk    r*t           j        j        *                    ||j!                   t6          j        j+        r}t           j        j        ,                    |           t          j                            |           t           j        j        -                    ||j        t6          j        j.                   t           j        j        /                    |           t          j                            |           t          j        0                    |           t          j        1                    |           |r*t           j        j        2                    ||j                   |                    |            | S )Nr   TrI   r   )3r   r   r   r   r   r   add_convert_to_ttgpuirr"   r-   rQ   r1   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulrA   rB   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   global_prefetchlocal_prefetchr#   r$   add_stream_pipeliner0   add_coalesce_async_copyrJ   lowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr(   add_in_thread_transposeadd_reorder_instructionsadd_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsuse_buffer_atomicsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   r   r   r   r   r#   r!   s           r   
make_ttgirzHIPBackend.make_ttgir   sM   _S[))
**2/Dgl/D/DgFWY`Yj+2+;	= 	= 	=
s_S[))
##B'''44R88833B777
00W\7C_ahanooo44R888
0044400T:::
77;;;,,R000''+++##B'''''+++)3119',WW
..r73EXfhv/A	C 	C 	C 	IJ66r7<HHH''+++ &&((F22J==b'BWXXX00T:::44R888222666)',77 	=J66r:::N88<<<
33B777 	J'"4q"8"8J11"g6HIII9# 	iJ88<<<M++B///J88W\59Kghhh
--b111''+++b!!!$$R((( 	MJ::2w|LLL
s
r   c                    | }t          j        |j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j
                            |           t          j                            |           t          j                            |           |                    |           |S r   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   add_sccpr   add_loop_aware_cser   r    add_combine_tensor_select_and_ifr   )srcr   r   r   r   s        r   gluon_to_ttgirzHIPBackend.gluon_to_ttgir  s    _S[))
  $$$//333r"""&&r***&&r***77;;;
s
r   c                 $   | }t          j        |j                  }|                                 d}t          j        j                            ||j        |           t
          j	        
                    |           t
          j	                            |           t          j        j                            |           t          j        r&t          j                            d||j                   d}t          j        j                            ||j        |           t
          j                            |           t
          j                            |           t
          j	                            |           t
          j	                            |           t
          j                            |           t
          j                            |           t
          j                            |           |j                                        dk    r0t          j        j                            ||j        |j                   t          j        r&t          j                            d||j                   t6          j        j        st
          j                            |           t          j        j                             ||           |!                    |           tE          j#                     tE          j                    }tE          j$        ||          t	          j%                   d}t6          j        j&        rd}tE          j'        t          j(        |j        |           t	          j)        |j                   t	          j*        d           t	          j+        d	d
           t	          j+        dd           t	          j+        dd
           t	          j+        d|j,        dk               d -                                D             }	|	d         .                    t          j/                   |	d         0                    dd|j1        |j,        z              |	d         0                    d|j2                    |j3        rdnd}
|	d         0                    d|
           t6          j        j&        r5|	d         4                    d           |	d         5                                 t	          j6        |	d                    t6          j        j&        rgto          tp                    j9        dz  }tu          |dz            tu          |dz            tu          |dz            g}tE          j;        |           nB|j<        r;fd|j<        D             }t{          |          dk    rtE          j;        |           tE          j>        tD          j?        |j        dg |j@                   t	          jA        |j                  rQ|	d         B                    d           |	d         B                    d           |	d         B                    d           t6          j        jC        rt	          jD        |	d                    | E                    d          |d<   | E                    d           pd|d!<   | E                    d"          pd#|d$<   t	          jF                   t	          jG                   tu                    S )%Nr   ttgpuir_to_llvmirTrI   llvmir_to_llvmrG   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rP   c                 :    g | ]}|                                 |S r   )is_declaration)rh   fns     r   rk   z(HIPBackend.make_llir.<locals>.<listcomp>]  s)    PPPbB<M<M<O<OPrPPPr   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr<   zdenormal-fp-math-f32rR   z
asanrtl.bczocml.bczockl.bcc                 D    g | ]\  }}t          j        |          |S r   )r	   need_extern_lib)rh   ri   pathllvm_mods      r   rk   z(HIPBackend.make_llir.<locals>.<listcomp>}  s1    iiiltTSEXYacgEhEhiTiiir   zamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr   zttg.profile_scratch_memory_sizeprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)Hr   r   r   r   r	   r   r   add_optimize_lds_usager"   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryrz   r   patchadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rJ   r   lower_instruction_sched_hintsr0   r
   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrQ   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr-   r.   rC   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rZ   r[   r]   link_extern_libsr2   lenoptimize_moduleOPTIMIZE_O3r?   has_architected_sgprsremove_fn_attrscalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   target_featuresfnsdenormal_moderb   pathsr  s                @r   	make_llirzHIPBackend.make_llir  s+   _S[))
 
11"glOTTT$$R(((**2...
55b999% 	S&,,-@"ckRRR 	
((W\9EEE''+++b!!!''+++**2...''+++b!!!$$R((( &&((F22J<<RwOabbb % 	P&,,-=r3;OOO 2 	+M&&r***
55b)DDD
s 	,..>#w// ***( 	'&Ox):GL/ZZZ 	Hgl333Hc***%h0H%PPP%h0QSWXXX%h0H%PPP%h0H'J[_aJabbb QPH2244PPPA >???A8:dw?PQXQb?b:d:deee 	A0W5I2KLLL+2+EQ6A1=AAA( 	&F((222F##%%%
 	 Q(((( 	7!(^^2U:NN\122NY.//NY.//E
 !(E2222  	7iiiig.AiiiE5zzA~~%h666Xt'7r2wOghhh $W\22 	>F!!"<===F!!"<===F!!"<===9* 	<3CF;;; !--l;;+.+;+;<]+^+^+cbc'(,/,<,<=c,d,d,ihi()$X... 	 ***8}}r   c           	         t          j        d|           }t          |          dk    sJ |d         |d<   g }|j        dk    r|                    d           d|j        v rdnd	}t          j        | t          j	        |j        |||j
        d
          }t          j        j        rt          d           t          |           |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   ri   	attentionzsink-insts-to-avoid-spillsgfx11z-real-true16rG   Fz!// -----// AMDGCN Dump //----- //)refindallr+  rJ   appendr"   r   translate_to_asmr	   r  r?   r
   dump_amdgcnprint)r   r   r   namesflagsfeaturesamdgcns          r   make_amdgcnzHIPBackend.make_amdgcn  s    
 
QSVWW5zzQ 8
  K//LL5666%,%<%<>>"&sC,=w|XW\^e^v',. .9  	5666&MMMr   c                 V   d}t           j        j        rd}t          j        | |j        |          }t          j                    5 }t          j                    5 }t          |j	        d          5 }|
                    |           d d d            n# 1 swxY w Y   t          j        |j	        |j	                   d d d            n# 1 swxY w Y   t          |j	        d          5 }|                                }	d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |	S )NrG   r   wbrb)r
   r  r  r	   assemble_amdgcnr"   tempfileNamedTemporaryFileopenri   write
link_hsacoread)
r   r   r   r7  r   tmp_outtmp_infd_infd_outr   s
             r   
make_hsacozHIPBackend.make_hsaco  s   ( 	'&O#CGG(** 	$g,.. :&&+t,, 'KK&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' 'v{GL999: : : : : : : : : : : : : : : glD)) $Vkkmm$ $ $ $ $ $ $ $ $ $ $ $ $ $ $	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 
s~   DC.BCBCB"C:DC
	
DC
	D&D;DD	DD	DD"%D"c                      |t           j        k    r fd|d<    fd|d<   n|t           j        k    r	 fd|d<    fd|d<    fd|d	<    fd
|d<   d S )Nc                 2                         | |          S r   )r   r   r   r   r`   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#xQX3Y3Y r   r   c                 2                         | |          S r   )r   r[  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DOOCSZ4[4[ r   ttgirc                 2                         | |          S r   )r   r[  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    D4G4GXW^4_4_ r   c                 2                         | |          S r   )r;  r[  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    t~~c8W/U/U r   llirc                 2                         | |          S r   )rI  r[  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y1Y r   rH  c                 2                         | |          S r   )rX  r[  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    Xw0W0W r   r   )r   TRITONGLUON)r`   stagesr   r   s   ` ` r   
add_stageszHIPBackend.add_stages  s    x&&YYYYYF6N[[[[[F7OO''_____F7OUUUUUvYYYYYxWWWWWwr   c                     | j          S r   r   )r`   s    r   rs   zHIPBackend.hash  s    +r   ) rt   ru   rv   r   staticmethodr   r}   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;  rI  rX  rf  	functools	lru_cachers   __classcell__)r   s   @r   rz   rz   _   s-       O'	 ' ' ' \'"y "T " " " " " "
%# % % % %"S " " " "4
 
 
? ? ?>S*_ 5 > > > >
: : :
   \   \   \   \  8 8 \8t   \ A A \AF   \.   \X X X Y             r   rz   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rn   rN  r?  ri  rX   pathlibr   r   r$   r(   r+   rz   r   r   r   <module>rs     s   E E E E E E E E E E 5 5 5 5 5 5 5 5 5 5 5 5       ! ! ! ! ! ! # # # # # # # # # #         				           0Y 0 0 0 0X X X
r r r $=? =? =? =? =? =? =? =?@n  n  n  n  n  n  n  n  n  n r   