
    wi@<              
          d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ  ej        e          Zh dZh d	Ze	 G d
 d                      Zdeej        ef         deeef         fdZ deeej        f         deeeeeee!f         f                  ddfdZ"deeej        f         deeej        f         ddfdZ#dej$        dedeeej        e!f         ddfdZ%deeee!f         de!fdZ&dedefdZ'dedee         ddfdZ(dej$        dej)        de*fdZ+dS )     N)contextmanager)	dataclassfield)Path)AnyDict	GeneratorIterableTupleUnion   )DDUFCorruptedFileErrorDDUFExportErrorDDUFInvalidEntryNameError>   .txt.json.model.safetensors>   config.jsonscheduler_config.jsontokenizer_config.jsonpreprocessor_config.jsonc                       e Zd ZU dZeed<   eed<   eed<    ed          Ze	ed<   e
deed	d	f         fd
            ZddedefdZd	S )	DDUFEntrya  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    filenamelengthoffsetF)repr	dduf_pathreturnNc              #   B  K   | j                             d          5 }t          j        |                                dt          j                  5 }|| j        | j        | j        z            V  ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        rbr   )r   accessN)r   openmmapfilenoACCESS_READr   r   )selffmms      {/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/huggingface_hub/serialization/_dduf.pyas_mmapzDDUFEntry.as_mmap9   s7      ^  && 	B!188::a8HIII BRt{T['@@AAAAB B B B B B B B B B B B B B B	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	Bs5   4BA<0B<B 	 BB 	BBButf-8encodingc                     | j                             d          5 }|                    | j                   |                    | j                                      |          cddd           S # 1 swxY w Y   dS )zRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r"   )r.   N)r   r$   seekr   readr   decode)r(   r.   r)   s      r+   	read_textzDDUFEntry.read_textJ   s     ^  && 	A!FF4;66$+&&--x-@@	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	As   AA00A47A4)r-   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r	   bytesr,   r3        r+   r   r   "   s           MMMKKKKKKe'''It'''B5$#45 B B B ^B A A# AC A A A A A Ar=   r   r   r    c                    i }t          |           } t                              d|             t          j        t          |           d          5 }|                                D ]}t                              d|j                    |j	        t          j
        k    rt          d          	 t          |j                   n*# t          $ r}t          d|j                   |d}~ww xY wt          ||          }t          |j        ||j        |           ||j        <   	 ddd           n# 1 swxY w Y   d|vrt          d	          t#          j        |d                                                   }t)          ||                                           t                              d
|  dt-          |           d           |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   model_index.json7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   loggerinfozipfileZipFiler8   infolistdebugr   compress_type
ZIP_STOREDr   _validate_dduf_entry_namer   _get_data_offsetr   	file_sizejsonloadsr3   _validate_dduf_structurekeyslen)r   entrieszfrC   er   indexs          r+   read_dduf_filerV   Z   s   N GYI
KK0Y00111	Y	-	- KKMM 	 	DLL9$-99:::!W%777,-XYYYi)$-8888, i i i,-`QUQ^-`-`aaghhi &b$//F%.vdnXa& & &GDM""	              $ (($%^___Jw12<<>>??EUGLLNN333
KKS)SSS\\SSSTTTNs7   AD1/CD1
C+C&&C++9D11D58D5rR   c                    t                               d|  d           t                      }d}t          j        t          |           dt          j                  5 }|D ]\  }}||v rt          d|           |                    |           |dk    r\	 t          j
        t          |                                                    }n'# t          j        $ r}t          d          |d}~ww xY w	 t          |          }n%# t          $ r}t          d|           |d}~ww xY wt                               d	| d
           t#          |||           	 ddd           n# 1 swxY w Y   |t          d          	 t%          ||           n"# t&          $ r}t          d          |d}~ww xY wt                               d|             dS )a  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file ''NwzCan't add duplicate entry: r@   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filerA   zInvalid DDUF file structure.zDone writing DDUF file )rB   rC   setrD   rE   r8   rI   r   addrM   rN   _load_contentr2   JSONDecodeErrorrJ   r   rG   _dump_content_in_archiverO   r   )r   rR   	filenamesrU   archiver   contentrT   s           r+   export_entries_as_ddufrb      sy   t KK4	444555IE	Yg.@	A	A AW!( 	A 	AHg9$$%&NH&N&NOOOMM(###---X J}W'='='D'D'F'FGGEE+ X X X)*OPPVWWXP4X>>, P P P%&GX&G&GHHaOPLLB(BBBCCC$Wh@@@@!	AA A A A A A A A A A A A A A A( }WXXXE 	2222! E E E<==1DE KK5)5566666sr   9E$3C
	E$
C.C))C..E$2DE$
D$DD$$3E$$E(+E(F 
F1F,,F1folder_pathc                     t                    dt          t          t          t           f                  ffd}t	          |  |                       dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r    c               3     K   t                                        d          D ]} |                                 s| j        t          vrt
                              d|  d           D|                               }t          |j	                  dk    rt
                              d|  d           |
                                | fV  d S )Nz**/*zSkipping file 'z' (file type not allowed)   z"' (nested directories not allowed))r   globis_filesuffixDDUF_ALLOWED_ENTRIESrB   rG   relative_torQ   partsas_posix)pathpath_in_archiverc   s     r+   _iterate_over_folderz3export_folder_as_dduf.<locals>._iterate_over_folder  s      %%**622 
	3 
	3D<<>> {"666NtNNNOOO"..{;;O?())Q..WtWWWXXX!**,,d22222
	3 
	3r=   N)r   r
   r   r8   rb   )r   rc   rp   s    ` r+   export_folder_as_ddufrq      sg    $ {##K3(5d+;"< 3 3 3 3 3 3 9&:&:&<&<=====r=   r`   r   ra   c                    |                      |dd          5 }t          |t          t          f          rSt          |          }|                     d          5 }t	          j        ||d           d d d            n# 1 swxY w Y   n>t          |t                    r|                    |           nt          d| d          d d d            d S # 1 swxY w Y   d S )NrY   T)force_zip64r"   i   zInvalid content type for z. Must be str, Path or bytes.)	r$   
isinstancer8   r   shutilcopyfileobjr;   writer   )r`   r   ra   
archive_fhcontent_path
content_fhs         r+   r^   r^     s   	h	6	6 g*gT{++ 	g==L""4(( LJ":z?KKKL L L L L L L L L L L L L L L'' 	gW%%%%!"eh"e"e"efffg g g g g g g g g g g g g g g g g gs7   ACA=1C=B	CB	ACCCc                     t          | t          t          f          r!t          |                                           S t          | t                    r| S t          dt          |            d          )zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got .)rt   r8   r   
read_bytesr;   r   type)ra   s    r+   r\   r\   *  so    
 'C;'' iG}}'')))	GU	#	# igW[\cWdWdggghhhr=   
entry_namec                 $   d|                      d          d         z   t          vrt          d|            d| v rt          d|  d          |                     d          } |                     d          dk    rt          d|  d          | S )	Nr|   zFile type not allowed: \z0Entry names must use UNIX separators ('/'). Got /   z-DDUF only supports 1 level of directory. Got )splitrj   r   stripcount)r   s    r+   rJ   rJ   7  s    
Zc""2&&.BBB'(N*(N(NOOOz'(h[e(h(h(hiii!!#&&Jq  '(eXb(e(e(efffr=   rU   entry_namesc                 <   t          | t                    s t          dt          |            d          d D             }|D ]U| vrt          d d          t	          fdt
          D                       st          d dt
           d          Vd	S )
a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got r|   c                 L    h | ]!}d |v |                     d           d         "S )r   r   )r   ).0entrys     r+   	<setcomp>z+_validate_dduf_structure.<locals>.<setcomp>W  s.    QQQEC5LLEKK$$Q'LLLr=   zMissing required entry 'z' in 'model_index.json'.c              3   *   K   | ]} d | v V  dS )r   Nr<   )r   required_entryr   folders     r+   	<genexpr>z+_validate_dduf_structure.<locals>.<genexpr>[  s6      rr>f//~//;>rrrrrrr=   z!Missing required file in folder 'z!'. Must contains at least one of N)rt   dictr   r~   anyDDUF_FOLDER_REQUIRED_ENTRIES)rU   r   dduf_foldersr   s    ` @r+   rO   rO   B  s    $ eT"" v$%tfjkpfqfq%t%t%tuuuQQ[QQQL  ()dF)d)d)deeerrrrrUqrrrrr 	(|F||]y|||  	 r=   rS   rC   c                    | j         t          d          |j        }| j                             |           | j                             d          }t          |          dk     rt          d          t                              |dd         d          }t                              |dd         d          }|dz   |z   |z   }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.   zIncomplete local file header.      little)fpr   header_offsetr0   r1   rQ   r:   
from_bytes)rS   rC   r   local_file_headerfilename_lenextra_field_lendata_offsets          r+   rK   rK   a  s     
u}$%RSSS &M EJJ}

2
""$%DEEE >>"3BrE":HEELnn%6r"u%=xHHO  "$|3oEKr=   ),rM   loggingr%   osru   rD   
contextlibr   dataclassesr   r   pathlibr   typingr   r   r	   r
   r   r   errorsr   r   r   	getLoggerr4   rB   rj   r   r   PathLiker8   rV   r;   rb   rq   rE   r^   r\   rJ   rO   ZipInfor:   rK   r<   r=   r+   <module>r      s"      				   % % % % % % ( ( ( ( ( ( ( (       ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? W W W W W W W W W W 
	8	$	$          4A 4A 4A 4A 4A 4A 4A 4AnBeBK$45 B$sI~:N B B B BJX7S"+%&X719%U3PTV[K[E\@\:]1^X7	X7 X7 X7 X7v!>U3+;%< !>5QTVXVaQaKb !>gk !> !> !> !>H	ggo 	g 	guUXZ\ZeglUlOm 	grv 	g 	g 	g 	g
i5dE!12 
iu 
i 
i 
i 
i# #    C hsm     >" " "C " " " " " "r=   