
    Pi_                         d dl Z d dlmZ d dlmZ ddZe j        d             Ze j        ddej	        fd            Z
e j        ddej	        fd            ZdS )	    N)TensorDescriptorc                     t          |          }t           j                  }t          |          }|dk     r||z  }d|cxk    r	|dz
  k     sn J d            |dk    s
J d            t          |          |k    s
J d            d}d}||         |k    s
J d	            |||<                        |          }d
|z
  |g fdt	          |          D             z   }||g|z   }	ddg|z   }
t           |	||
          S )a  
    Given a 2- or 3-dimensional tensor T, this creates a 'ragged descriptor'
    which behaves like a concatenation (along the first axis) of subarrays
    of potentially unequal size.

    The load_ragged and store_ragged device functions can be used to read
    and write from subarrays T[batch_offset : batch_offset + batch_size]
    with hardware bounds-checking preventing any sort of leakage outside
    the subarray.
    r      zlast dimension cannot be ragged   z<read-write ragged descriptors must have at most 3 dimensionsz1block shape must have same length as tensor shapei     @z#number of rows may not exceed 2**30l        c                 :    g | ]}                     |          S  )stride).0iTs     o/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/triton/tools/ragged_tma.py
<listcomp>z,create_ragged_descriptor.<locals>.<listcomp>)   s#    :\:\:\1188A;;:\:\:\    )listshapelenr
   ranger   )r   block_shape
ragged_dimtensor_shaperankmax_intbillionragged_stride
tma_stride	tma_shape	box_shapes   `          r   create_ragged_descriptorr      sQ    {##K==L|DA~~d

%%%%TAX%%%%%'H%%%1999T999{t###%X###GG
#w...0U...&LHHZ((M -'7:\:\:\:\PUVZP[P[:\:\:\\J7#l2IQ+%IAy*i@@@r   c                 *    d}||z
  |z   }| |z   }|||fS )z;
    Helper function for load_ragged and store_ragged.
    r   r	   )batch_offset
batch_sizerowr   xys         r   to_ragged_indicesr&   0   s.     G*s"Az!AAq=r   r   c                 ^   t          j        t          | j                  t          |          dz   k    d           t	          ||||                   \  }}}|                     ||g|d|         z   |gz   ||dz   d         z             }t          j        ||j        dd                   }|S )z
    Read from a subarray T[batch_offset : batch_offset + batch_size] with
    hardware bounds-checking, where reading outside the subarray gives zeros.

    Coords should be an appropriately-sized list of integers, just like in
    TMA.load().
       z*TMA must be a read-write ragged descriptorNr   )tlstatic_assertr   r   r&   loadreshape)	TMAr!   r"   coordsr   c0c1c2datas	            r   load_raggedr3   =   s     S^^s6{{Q68deee"<VJ=OPPJBB88RHvkzk22bT9F:PQ>??<SSTTD:dDJqrrN++DKr   c                     t          ||||                   \  }}}t          j        |ddg|j        z             }|                     ||g|d|         z   |gz   ||dz   d         z   |           dS )a  
    Write to a subarray T[batch_offset : batch_offset + batch_size] with
    hardware bounds-checking, where writes outside the subarray are masked
    correctly.

    Coords should be an appropriately-sized list of integers, just like in
    TMA.store().
    r   N)r&   r)   r,   r   store)	r-   r!   r"   r.   r2   r   r/   r0   r1   s	            r   store_raggedr6   O   s     #<VJ=OPPJBB:dQFTZ/00DIIr2h,,t3fZ!^__6MMtTTTTTr   )r   )tritontriton.languagelanguager)   triton.tools.tensor_descriptorr   r   jitr&   	constexprr3   r6   r	   r   r   <module>r=      s           ; ; ; ; ; ;
%A %A %A %AP 	 	 	  2<    " U U", U U U U U Ur   