
    Piz
                         d dl mZ d dlmZmZ g dZ G d de          Zedd            Zedd            Zedd            Z	ed	ddd            Z
dS )    )SwizzledSharedLayout)builtin_unwrap_if_constexpr)arriveinit
invalidateMBarrierLayoutwaitc                   .     e Zd ZdZddedef fdZ xZS )r	   z
    Layout for mbarrier synchronization in Ampere and later architectures.

    Args:
        ctas_per_cga (int): CTAs per CGA grouping. Defaults to 1.
        cta_split_num (int): CTA split factor. Defaults to 1.
       ctas_per_cgacta_split_numc           	      `    t                                          ddddg|g|gdg           d S )Nr   r   )vec	per_phase	max_phaseorderr   r   	cta_order)super__init__)selfr   r   	__class__s      /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/experimental/gluon/language/nvidia/ampere/mbarrier.pyr   zMBarrierLayout.__init__   sK    #&(/c 	 	
 	
 	
 	
 	
    )r   r   )__name__
__module____qualname____doc__intr   __classcell__)r   s   @r   r	   r	      sZ         	
 	
S 	
S 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
r   r	   Nc                 d    t          |          }|j                            | j        |           dS )z
    Initialize an mbarrier with a specified count.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to initialize.
        count (int): The initial count for the barrier.
    N)r   buildercreate_mbarrier_inithandle)mbarriercount	_semantics      r   r   r      s2     !''E**8?EBBBBBr   c                 D    |j                             | j                   dS )z
    Invalidate an mbarrier, resetting its state.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to invalidate.
    N)r"   create_mbarrier_invalr$   )r%   r'   s     r   r   r   )   s#     ++HO<<<<<r   T c                     |                     |          }|                     |          }d |D             }|j                            | j        |j        |j        |           dS )a  
    Wait until the mbarrier object completes its current phase.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to wait on.
        phase (int): The phase index to wait for.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
        deps (Sequence[shared_memory_descriptor]): Dependent allocations barrier is waiting on. Used to track liveness of dependent allocations. Defaults to ().
    c                     g | ]	}|j         
S r*   )r$   ).0xs     r   
<listcomp>zwait.<locals>.<listcomp>A   s    ###AH###r   N)	to_tensorr"   create_mbarrier_waitr$   )r%   phasepreddepsr'   s        r   r
   r
   4   sg     &&Et$$D##d###D**8?EL$+W[\\\\\r   )r3   r'   c                    d}|                     |          }|j                            | j        ||j                   dS )a  
    Arrive on an mbarrier, signaling that a thread has reached the barrier.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to arrive on.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
    r   N)r0   r"   create_mbarrier_arriver$   )r%   r3   r'   r&   s       r   r   r   E   sA     Et$$D,,X_eT[QQQQQr   )N)Tr*   N)+triton.experimental.gluon.language._layoutsr   (triton.experimental.gluon.language._corer   r   __all__r	   r   r   r
   r   r*   r   r   <module>r:      s   L L L L L L R R R R R R R R
D
D
D
 
 
 
 
) 
 
 
* 		C 	C 	C 		C 	= = = 	= 	] ] ] 	]  	!T 
R 
R 
R 
R 	
R 
R 
Rr   