
    Pi                     f    d dl mZ d dlmZmZmZmZmZ g dZedd            Z	ed	d            Z
dS )
    )builtin)async_copy_global_to_sharedasync_copy_shared_to_global
store_waittensor_descriptortensor_descriptor_type)async_gatherasync_scatterr   r   r   r   r   TNc                     |                     |          }|                     |          }|j                            | j        |j        |j        |j        |j        |j                   dS )a,  
    Asynchronously gather elements from global memory to shared memory using TMA.

    Args:
        tensor_desc (tensor_descriptor): The tensor descriptor.
        x_offsets (tensor): 1D tensor of X offsets.
        y_offset (int): Scalar Y offset.
        barrier (shared_memory_descriptor): Barrier that will be signaled when the operation is complete.
        result (tensor_memory_descriptor): Result shared memory, must have NVMMASharedLayout.
        pred (bool): Scalar predicate. Operation is skipped if predicate is False. Defaults to True.
    N)	to_tensorbuildercreate_async_tma_gatherhandle)tensor_desc	x_offsetsy_offsetbarrierresultpred	_semantics          /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/experimental/gluon/language/nvidia/blackwell/tma.pyr	   r	      ss     t$$D""8,,H--k.@)BRT\Tceles.4mT[J J J J J    c                     |                     |          }|j                            | j        |j        |j        |j                   dS )aW  
    Asynchronously scatter elements from shared memory to global memory using TMA.

    Args:
        tensor_desc (tensor_descriptor): The tensor descriptor.
        x_offsets (tensor): 1D tensor of X offsets.
        y_offset (int): Scalar Y offset.
        src (tensor_memory_descriptor): The source data, must be in NVMMASharedLayout.
    N)r   r   create_async_tma_scatterr   )r   r   r   srcr   s        r   r
   r
   (   sH     ""8,,H..{/A9CSU]Udfifpqqqqqr   )TN)N)(triton.experimental.gluon.language._corer   4triton.experimental.gluon.language.nvidia.hopper.tmar   r   r   r   r   __all__r	   r
    r   r   <module>r       s    < < < < < <                	J J J 	J$ 	r r r 	r r rr   