
    BiL                         d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ ddl	m
Z
 d Zd Zdd	Zd
 Zd Zd dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z e!dk    r e              dS dS )!    N)DictListSet   )pymupdfc                 4    d| z                       dd          S )Nz %s K   -)center)xs    h/var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/pymupdf/__main__.pymycenterr      s    QJr3'''    c                    |d         }|d         }|dk    r|                      |          S d }t          j        | |          }t          j        | |          }	 |j        |j        k    r%|j        |j        cxk    rdk    rn n|j        dk    s:t          j        d||fz             t          j        |           d} ||          S t          j        |          }|                    |j                   dx}} ||          S )zReturn image for a given XREF.r   r   c                 h    | j         j        dk    r| S t          j        t          j        |           }|S )N   )
colorspacenr   PixmapcsRGB)pixtpixs     r   getimagezrecoverpix.<locals>.getimage   s/    >q  J~gmS11r   z&Warning: unsupported /SMask %i for %i:N)	extract_imager   r   irectalphar   message	set_alphasamples)docitemr   sr   pix1pix2r   s           r   
recoverpixr%      s$   QAQAAvv  ###   >#q!!D>#q!!D
 J$*$$tz)F)F)F)FQ)F)F)F)F)F46UV;;@Aq6IJJJx~~
.

CMM$,D4 8C==r   FTc                 \   t          j        |           }|j        s|du rt          j        d           d}|j        s|S |rO|                    |          }|st          j        d           |du rt          j        |dk    rddz  nd           nt          j        d	|j        z             |S )
z!Open and authenticate a document.Tz$this command supports PDF files onlyzauthentication unsuccessful   zauthenticated as %sowneruserz'%s' requires a password)	r   openis_pdfsysexit
needs_passauthenticater   name)filenamepasswordshowpdfr    rcs         r   	open_filer7   9   s    
,x
 
 C: 9#++7888	B> 
 8h'' 	4H23334<<OrAvv1G;;6RRR+ch6777Jr   c                     t          d |                                 D                       dz   }|                                 D ]3\  }}|                    |          d|}t	          j        |           4dS )zPrint a Python dictionary.c                 ,    g | ]}t          |          S  )len).0ks     r   
<listcomp>zprint_dict.<locals>.<listcomp>N   s    )))SVV)))r   r   : N)maxkeysitemsrjustr   r   )r!   lr=   vmsgs        r   
print_dictrG   L   s}    ))TYY[[)))**Q.A

  1''!****aa( r   c                    t          j        d|z             |                     |          }t          j        |           |                     |          r|                                }	 |                    d          dz   }||         }|                    d          rd}n# t          $ r d}Y nw xY wt          j        d|z             t          j        d           t          j        d           d	S )
zPrint an object given by XREF number.

    Simulate the PDF source in "pretty" format.
    For a stream also print its size.
    z%i 0 objz/Lengthr   z0 Runknownzstream
...%s bytes	endstreamendobjN)r   r   xref_objectxref_is_streamsplitindexendswith	Exception)r    xrefxref_strtempidxsizes         r   
print_xrefrW   T   s     OJ%&&&t$$HOH
$ 
%~~	**Y''!+C9D}}U## !  	 	 	DDD	-4555$$$OHs   +7B# #B21B2pagec           	         t          |dz
            }|                     d|                              dd          } |                     d          }g }t          |          D ]\  }}|dz   }|                                r\t          |          }	d|	cxk    r|k     r&n n#|                    t          |                     nt          j        d||fz             {	 |                    d          \  }
}t          |
          }
t          |          }n)# t          $ r t          j        d||fz             Y nw xY wd|
cxk    r|k     rn nd|cxk    r|k     sn t          j        d||fz             |
|k    r|                    |
           1|
|k     r%|t          t          |
|dz                       z  }\|t          t          |
|dz
  d	                    z  }|S )
aK  Transform a page / xref specification into a list of integers.

    Args
    ----
        rlist: (str) the specification
        limit: maximum number, i.e. number of pages, number of objects
        what: a string to be used in error messages
    Returns
    -------
        A list of integers representing the specification.
    r   N  ,zbad %s specification at item %ir
   z%bad %s range specification at item %ir'   )strreplacerN   	enumerate	isdecimalintappendr-   r.   rQ   listrange)rlistlimitwhatrZ   	rlist_arrout_listseqr!   r   ii1i2s               r   get_listro   k   s    	EAIAMM#q!!))#r22EC  IHy)) 4 4	T!G>> 	D		AA~~~~~~~~~D		****:dAYFGGG	JZZ__FBRBRBB 	J 	J 	JH<ayHIIIII	J R%AOOOOeOOOOH<ayHIII88OOB77U2rAv..///HHU2rAvr22333HHOs   6D#D<;D<c                    t          | j        | j        d          }t          j                            | j                  dz  }d}|dk    r|dz  }d}t          |d          }|j        }t          j	        d| j        |j
        |                                dz
  |||d         |d	         fz             |j        }|d
k    r5|                                }t          j	        d||dk    rdndfz             |                                }|d
k    rt          j	        d|z             t          j	                     | j        rXt          j	        t!          d                     |                                }t%          ||           t          j	                     | j        rHt          j	        t!          d                     t'          |j                   t          j	                     | j        rrt          j	        t!          d                     t+          | j        |                                d          }|D ]%}t%          ||           t          j	                     &| j        rt          j	        t!          d                     t+          | j        |j
        dz             }	|	D ]V}
|
dz
  }|                    |          }t          j	        d|
z             t%          ||           t          j	                     W| j        rZt          j	        t!          d                     t          j	        |                                           t          j	                     |                                 d S )NTi   KBi  MBr   z7'%s', pages: %i, objects: %i, %g %s, %s, encryption: %sformat
encryptionr   z5document contains %i root form fields and is %ssigned   znot r\   z#document contains %i embedded fileszPDF catalogzPDF metadatazobject informationrR   )rh   zpage informationzPage %i:zPDF trailer)r7   inputr3   ospathgetsizeroundmetadatar   r   
page_countxref_lengthis_form_pdfget_sigflagsembfile_countcatalogr   pdf_catalogrW   rG   xrefsro   pages	page_xreftrailerpdf_trailerclose)argsr    rV   flagmetar   r"   rR   xreflpagelpnos              r   r4   r4      s=   
DJt
4
4C7??4:&&-DDd{{q>>D<DOAJNOO!N
		
   	A1uuCAFF&&+,	
 	
 	
 	A1uu=ABBBO| //000  3} 001113<   z !566777S__%6%6VDDD 	 	DsD!!!Oz !344555S^a%788 	 	CaA==##DOJ,---sD!!!O| //000))***IIKKKKKr   c                    t          | j        | j        d          }| j        }d                    |          }| j        sT|                    | j        | j        | j	        | j
        | j        | j        | j        || j        | j        | j                   d S t#          | j        |j        dz             }t'          j                    }|D ]}|dz
  }|                    |||            |                    | j        | j        | j	        | j
        | j        | j        | j        || j        | j        | j                   |                                 |                                 d S )NTr5   keepnonezrc4-40zrc4-128zaes-128zaes-256)
garbagedeflateprettycleanasciilinearrt   owner_pwuser_pwpermissionsr   	from_pageto_page)r7   rv   r3   rt   rO   r   saveoutputr   compressr   sanitizer   r   r)   r*   
permissionro   r|   r   r+   
insert_pdfr   )r   r    rt   encryptr   outdocr   r   s           r   r   r      sh   
DJ4
8
8
8CJIOO G : KLM;-*;ZI 	 	
 	
 	
 	 TZ!!344E\^^F 7 7!G#A6666
KK{mj{	O     IIKKK
LLNNN
Fr   c                 ~   | j         }t          j                    }|D ]}|                    d          }t	          |          dk    r|d         nd}t          |d         |d          }d                    |dd                   }|r4t          d                    |dd                   |j        dz             }nt          d|j        dz             }|D ] }	|
                    ||	dz
  |	dz
             !|                                 |                    | j        d	d
           |                                 dS )z&Join pages from several PDF documents.r]   r   Nr   Tr   r(   r   r   )r   r   )rv   r   r+   rN   r;   r7   joinro   r|   re   r   r   r   r   )
r   doc_listr    src_itemsrc_listr3   srcr   	page_listrl   s
             r   doc_joinr     s:   zH
,..C  >>#&&"%h--!"3"38A;;X4888!""&& 	5 (122,!7!7!9KLLIIa!!344I 	@ 	@ANN3!a%QN????		HHT[!TH222IIKKKKKr   c           	      X   t          | j        | j        d          }|                                s+| j        r| j        | j        k    rt          j        d           t          | j        | j                  }| j	        rt          | j	                  nt                      }t          |                                          }|r||k    st          j        d           n|}|st          j        d           |t          |                                          z  }|r$t          j        dt          |          z             |D ]x}|                    |          }|                    |          }|                    |||d         |d         |d	         
           t!          j        d|d|j	        d           y|                                 | j        r-| j        | j        k    r|                    | j        d           n|                                 |                                 dS )z!Copy embedded files between PDFs.Tr   cannot save PDF incrementallyz%not all names are contained in sourceznothing to copyz2following names already exist in receiving PDF: %sr2   	ufilenamedescr2   r   r   zcopied entry 'z' from ''ru   r   N)r7   rv   r3   can_save_incrementallyr   r-   r.   source	pwdsourcer1   setembfile_namesr^   embfile_infoembfile_getembfile_addr   r   r   r   saveIncr)	r   r    r   names	src_names	intersectr!   infobuffs	            r   embedded_copyr     s   
DJ4
8
8
8C%%'' 2K2;$*440111
DK
0
0C"i2C	NNNSUUEC%%''((I 	!!H<=== $"###C--//000I XEIVWWW 
J 
J%%t$$*%;'f 	 	
 	
 	
 	sxxxHIIIIIIKKK{ t{dj00a((((IIKKKKKr   c                 :   t          | j        | j        d          }|                                s+| j        r| j        | j        k    rt          j        d           	 |                    | j                   nG# t          t          j        j        f$ r)}t          j        d| j        d|            Y d}~nd}~ww xY w| j        r| j        | j        k    r|                                 n|                    | j        d           |                                 dS )	zDelete an embedded file entry.Tr   r   no such embedded file r?   Nr   r   )r7   rv   r3   r   r   r-   r.   embfile_delr1   
ValueErrorr   mupdfFzErrorBaser   r   r   )r   r    es      r   embedded_delr   E  s   
DJ4
8
8
8C%%'' 2K2;$*440111>	""""12 > > ><$)<<<<========>; )$+33a(((IIKKKKKs   A8 8B<B77B<c                 F   t          | j        | j        d          }	 |                    | j                  }|                    | j                  }nG# t          t          j        j	        f$ r)}t          j        d| j        d|            Y d}~nd}~ww xY w| j        r| j        n|d         }t          |d          5 }|                    |           ddd           n# 1 swxY w Y   t          j        d| j        d	|d
           |                                 dS )z&Retrieve contents of an embedded file.Tr   r   r?   Nr2   wbzsaved entry 'z' as 'r   )r7   rv   r3   r   r1   r   r   r   r   r   r-   r.   r   r+   writer   r   )r   r    streamdr   r2   r   s          r   embedded_getr   X  s`   
DJ4
8
8
8C>++TY''12 > > ><$)<<<<========>"k<t{{q}H	h		 V              OO$)))XXXFGGGIIKKKKKs)   4A B.BB C""C&)C&c                    t          | j        | j        d          }|                                s+| j        | j        | j        k    rt          j        d           	 |                    | j                   t          j        d| j        z             n# t          $ r Y nw xY wt          j                            | j                  r$t          j                            | j                  st          j        d| j        z             t          | j        d          5 }|                                }ddd           n# 1 swxY w Y   | j        }|}| j        s|}n| j        }|                    | j        ||||           | j        r| j        | j        k    r|                                 n|                    | j        d	
           |                                 dS )zInsert a new embedded file.Tr   Nr   zentry '%s' already existszno such file '%s'rbr   ru   r   )r7   rv   r3   r   r   r-   r.   r   r1   rQ   rw   rx   existsisfiler+   readr   r   r   r   r   r   r    fr   r2   r   r   s          r   embedded_addr   g  s   
DJ4
8
8
8C%%'' 2t{dj880111	""",ty89999    7>>$)$$ 2BGNN49,E,E 2$ty0111	di		 !              yHI9 yOO	6H	     ; )$+33a(((IIKKKKKs$   6B 
B! B!D>>EEc                    t          | j        | j        d          }|                                s+| j        | j        | j        k    rt          j        d           	 |                    | j                   n,# t          $ r t          j        d| j        z             Y nw xY w| j
        t          j
                            | j
                  rft          j
                            | j
                  rBt          | j
        d          5 }|                                }ddd           n# 1 swxY w Y   nd}| j        r| j        }nd}| j        r| j        }n| j        r| j        }nd}| j        r| j        }nd}|                    | j        ||||           | j        | j        | j        k    r|                                 n|                    | j        d	           |                                 dS )
z0Update contents or metadata of an embedded file.Tr   Nr   no such embedded file '%s'r   r   ru   r   )r7   rv   r3   r   r   r-   r.   r   r1   rQ   rx   rw   r   r   r+   r   r2   r   r   embfile_updr   r   r   r   s          r   embedded_updr     s4   
DJ4
8
8
8C%%'' 2t{dj880111;#### ; ; ;-	9:::::; 		GNN49%% 	GNN49%% 	 $)T"" 	aVVXXF	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 } =~ N			 M			y yOO	6H	     {dkTZ77a(((IIKKKKKs$   A8 8&B! B!D))D-0D-c                    t          | j        | j        d          }|                                }| j        | j        |vrt          j        d| j        z             nt          j                     t          j        dt          |          t          |          dk    rdndfz             t          j                     t          |                    | j                             t          j                     dS |st          j        d	|j        z             dS t          |          dk    rd
|j        t          |          fz  }n
d|j        z  }t          j        |           t          j                     |D ]h}| j        st          j        |           |                    |          }t          |                    |                     t          j                     i|                                 dS )zList embedded files.Tr   Nr   z!printing 1 of %i embedded file%s:r   r"   r\   z'%s' contains no embedded filesz-'%s' contains the following %i embedded filesz)'%s' contains the following embedded file)r7   rv   r3   r   r1   r-   r.   r   r   r;   rG   r   detailr   )r   r    r   rF   r1   _s         r   embedded_listr     s   
DJ4
8
8
8CEy9E!!H1DI=>>>>OO3u::c%jj1nnss"=>   Os''	22333OF 9CHDEEE
5zzA~~=3u::@VV9CHDOCO  { 	OD!!!T""3##D))***IIKKKKKr   c           
         | j         s| j        st          j        d           t	          | j        | j        d          }| j        rt          | j        |j	        dz             }nt          d|j	        dz             }| j        s*t          j                            t          j                  }n\| j        }t          j                            |          rt          j                            |          st          j        d|z             t%                      }t%                      }|D ]1}| j         r|                    |dz
            }|D ]}|d         }	|	|vr|                    |	           |                    |	          \  }
}}}|dk    s|sFt          j                            ||
                    dd	           d	|	 d
|           }t1          |d          5 }|                    |           ddd           n# 1 swxY w Y   d}| j        rC|                    |dz
            }|D ]'}|d         }	|	|vr|                    |	           t7          ||          }t9          |          t:          u rs|d         }|d         }t          j                            |d|	|fz            }t1          |d          5 }|                    |           ddd           n# 1 swxY w Y   t          j                            |d|	z            }|j        j        dk     r|ntA          j!        t@          j"        |          }|#                    |           )3| j         r&tA          j$        dtK          |          |fz             | j        r&tA          j$        dtK          |          |fz             |&                                 dS )z)Extract images and / or fonts from a PDF.z"neither fonts nor images requestedTr   r   z"output directory %s does not existr   zn/ar[   r
   .r   Nextimagez	img-%i.%sz
img-%i.pngr   zsaved %i fonts to '%s'zsaved %i images to '%s')'fontsimagesr-   r.   r7   rv   r3   r   ro   r|   re   r   rw   rx   abspathcurdirr   isdirr   get_page_fontsaddextract_fontr   r_   r+   r   get_page_imagesr%   typedictr   r   r   r   r   r   r   r;   r   )r   r    r   out_dir
font_xrefsimage_xrefsr   itemlistr!   rR   fontnamer   r   bufferoutnameoutfiler   imgdatar$   s                      r   extract_objectsr     s*   : 7dk 75666
DJ4
8
8
8Cz -S^a%788a!+,,; E'//"),,+w'' 	EBGMM',B,B 	EH9GCDDDJ%%K $+ $+: 	"))#'22H  " "Awz))NN4(((/2/?/?/E/E,Hc1fe||6|  gllH$4$4S#$>$>!M!M!M!M!M!M G gt,, .f---. . . . . . . . . . . . . . .!F; 	+**3733H  + +Aw{**OOD)))$S$//CCyyD((!%j"%g,"$',,wtSk8Q"R"R!'400 3G#MM'2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 #%',,wt8K"L"L  #~/!33  C!(s!C!C 
 		'***z O0C
OOW3MMNNN{ Q1S5E5Ew4OOPPPIIKKKKKs$   G??HH?K!!K%(K%c                    |rdnt          dg          }|                     d|          }|s|s|                    |           d S |                    |                    dd                     |                    |           d S )N   
   textflagsutf8surrogatepasserrors)bytesget_textr   encode)	rX   textoutGRIDfontsize
noformfeed
skip_emptyr  eopr   s	            r   page_simpler  !  s    
.%%5";;C==u=--D  	MM#MM$++f_+==>>>MM#
Fr   c                 \   |rdnt          dg          }|                     d|          }|g k    r|s|                    |           d S |                    d            |D ]2}	|                    |	d                             dd	
                     3|                    |           d S )Nr   r   blocksr  c                 "    | d         | d         fS )Nru   r   r:   )bs    r   <lambda>z page_blocksort.<locals>.<lambda>4  s    qtQqTl r   keyr   r  r  r  )r  r  r   sortr	  )
rX   r
  r  r  r  r  r  r  r  r  s
             r   page_blocksortr  -  s    
.%%5";;C]]85]11F|| 	MM#
KK**K+++ C Cadkk&kAABBBBMM#
Fr   c                   ! |rdnt          dg          }dt          t                   dt          dt          fd}dt          t                   dt          fd}	d	t          t                   d
t
          j        f!fd}
dt          dt          fd!d }|                     d|          d	         } |
||           \  }}}}}|g k    r|s|	                    |           d S  |	||          }|
                    d            i }|D ]E}|\  }}}} |||          }|                    |g           }|                    |           |||<   Ft          |                                          }|
                                 ||z
  }i }|D ]l}||         }t          |          }|dk     rd||<   %d |D             }|
                                 t!          j        |          }||k     r|}|d         ||<   m||d         |d         z
  z  |t          |          z  z  dz  }|d         }|	                    d           |D ]t}||k     r |	                    d           ||z  }||k       |||||         ||                   } |	                    | dz                       dd                     ||z   }u|	                    |           d S )Nr   r   valuesvaluereturnc                 n    t          j        | |          }|r| |dz
           S t          d|| fz            )zFind the right row coordinate.

        Args:
            values: (list) y-coordinates of rows.
            value: (int) lookup for this value (y-origin of char).
        Returns:
            y-ccordinate of appropriate line for value.
        r   zLine for %g not found in %s)bisectbisect_rightRuntimeError)r  r  rl   s      r   find_line_indexz$page_layout.<locals>.find_line_index?  sD     .. 	!!a%= 8E6?JKKKr   rowsc                     t          |           } |                                  | d         g}| dd          D ]&}||d         |z   k    r|                    |           '|S )Nr   r   r'   )rd   r  rc   )r#  r  nrowshs       r   curate_rowsz page_layout.<locals>.curate_rowsN  se    Dzz		a	abb 	  	 AE"I$$$Qr   r  rX   c           
      \   t                      }|j        j        }|j        j        }|}|}d}g }| D ]s}	|	d         D ]f}
|
d         dk    r|
d         \  }}}}|dk     s||j        j        k    r4||z
  }||k    r|}|
d         D ]}|d         k    r|d         D ]}|d         \  }}}}||z
  }|d	         \  }}t	          t          |                    }|                    |           |d
         }||k    r|dk    r|}||k     r|}|dk    rs|g k    rm|d         \  }}}}||k    rZ|t          d          k    r ||z             }n.|dk    rt          d          }n|dk    rt          d          }n|}||||f|d<   |                    ||||f           hu|||||fS )Nr   linesdir)r   r   bboxspansrV   charsorigincr[   r'      rl     rD     )	r   rectwidthheightrb   rz   r   chrrc   )r  rX   r#  
page_widthpage_height	rowheightleftrightr-  blocklinex0y0x1y1r5  spanr/  r   cwidthoxoychold_chold_oxold_oy
old_cwidthligr  joinligatures                               r   process_blocksz#page_layout.<locals>.process_blocksW  s1   uuY_
i&	 )	; )	;Eg (; (;;&((!%fBB66R$)"222bv%% &I M ; ;DF|x// !'] ; ;'(yAr1!#b!"8B r^^sV"99s#%D 2::$&E!Q;;5B;;AFr>FFFJ%||#)S[[#8#8*6,v{*C*CCC%'3YY*-f++CC%'3YY*-f++CC*0C-0&&*,Mb	 (b"b&%9::::5;;(;R dD%22r   rK  c                 ,   | dk    rt          d          S | dk    rt          d          S | dk    rt          d          S | dk    rt          d          S | d	k    rt          d
          S | dk    rt          d          S | dk    rt          d          S | S )zReturn ligature character for a given pair / triple of characters.

        Args:
            lig: (str) 2/3 characters, e.g. "ff"
        Returns:
            Ligature, e.g. "ff" -> chr(0xFB00)
        ffr0  fii  fli  ffir1  fflr2  fti  sti  )r6  )rK  s    r   rL  z!page_layout.<locals>.joinligature  s     $;;v;;D[[v;;D[[v;;E\\v;;E\\v;;D[[v;;D[[v;;
r   c                    d}d}d}d}|t           j        k    rt          d|z            |D ]}|\  }	}
}}|
| z
  }
|
|z   }||	k    r|
|z
  |dz  k    r&|	dk    r||
z
  |z  dk    r9|	}|
||z   k     r
||	z  }|}|
}N|	dk    rUt          |
|z            t	          |          z
  }|
|k    r|dk    r|d|z  z  }||	z  }|}|
}|                                S )a  Produce the text of one output line.

        Args:
            left: (float) left most coordinate used on page
            slot: (float) avg width of one character in any font in use.
            minslot: (float) min width for the characters in this line.
            chars: (list[tuple]) characters of this line.
        Returns:
            text: (str) text string for this line
        r\   r   z%program error: minslot too small = %gg?r[   g?r   )r   EPSILONr!  rb   r;   rstrip)r:  slotminslotlcharsr   old_charold_x1rH  r/  charrD  r   rC  r@  deltas                  r   make_textlinez"page_layout.<locals>.make_textline  s@    go%%FPQQQ  	  	A"#D"adBfB 4BK6C<$?$? s{{v5;;HFW$$$
 s{{T	NNSYY.EF{{uqyye#DLDFFF{{}}r   rawdictr  c                     | d         S )Nr   r:   )r/  s    r   r  zpage_layout.<locals>.<lambda>  s
    QqT r   r  r(   r   c                     g | ]
}|d          S )ru   r:   )r<   r/  s     r   r>   zpage_layout.<locals>.<listcomp>  s    '''1!A$'''r   r   r'   g333333?
r  r  r  )r  r   rb   r   r   r   Pager^   r  r   r  getrc   rd   rA   r;   
statisticsmedianr	  )"rX   r
  r  r  r  r  r  r  r"  r'  rM  r`  r  r-  r#  r:  r;  r9  r)  r/  r   rE  yr[  rA   rY  minslotsr=   ccountwidths	this_slotrowposr   rL  s"      `                             @r   page_layoutro  ;  sy   
.%%5";;CLS	 L# L# L L L L#c( T    23tDz 23 23 23 23 23 23 23 23h# #    43 3 3l ]]9E]228<F*8.*F*F'E4ui{{ 	MM#;tT""D 
JJ>>J""" E  1b!OD"%%1b!!aa 

DIIKKK 4<DH    qVA::HQK'''''%f--	tDQi T"XQ/0ID		4IJSPI!WFMM%  qjjMM%   iF qjj }T4!eAh??td{**6/*JJKKKYMM#r   c                    t          | j        | j        d          }t          | j        |j        dz             }| j        }|,t          j        	                    |j
                  \  }}|dz   }t          |d          5 }t          j        t          j        z  }| j        r|t          j        z  }| j        r|t          j        z  }| j        r|t          j        z  }t&          t(          t*          d}|D ]>}	||	dz
           }
 || j                 |
|| j        | j        | j        | j        |           ?	 d d d            d S # 1 swxY w Y   d S )NFr   r   z.txtr   simpler  layoutr  )r7   rv   r3   ro   r   r|   r   rw   rx   splitextr1   r+   r   TEXT_PRESERVE_LIGATURESTEXT_PRESERVE_WHITESPACEconvert_whitenoligaturesextra_spacesTEXT_INHIBIT_SPACESr  r  ro  modegridr  r  r  )r   r    r   r   r2   r   r
  r  funcr   rX   s              r   gettextr~    s   
DJ5
9
9
9CTZ!!344E[F~g&&sx00!F"	fd		 w/'2RR 	6W55E 	5W44E 	1W00E!$!
 

  
	 
	CsQw<DDO	    
	                 s   ?B2D??EEc                 V    t          j        d           t          j        d           d S )NzThis is from PyMuPDF message().zThis is from PyMuPDF log().)r   r   log)r   s    r   	_internalr  =  s*    O5666K-.....r   c                  n   t          j        dt          d                    } |                     dd          }|                    dt          d          	          }|                    d
t          d           |                    dd           |                    ddd           |                    ddd           |                    ddd           |                    dt          d           |                    dt          d           |                    t                     |                    dt          d          	          }|                    d
t          d           |                    dt          d            |                    dd           |                    d!d"d#d$%           |                    d&t          d'           |                    d(t          d)           |                    d*t          d+t          d,          d-.           |                    d/dd0d12           |                    d3dd0d42           |                    d5dd0d62           |                    d7t          d8d9:           |                    d;dd0d<2           |                    d=dd0d>2           |                    dd?           |                    t                     |                    d@t          dA          dBC          }|                    d
dDdEF           |                    dGdHdIJ           |                    t                     |                    dKt          dL          	          }|                    d
t          d           |                    dMddN           |                    dOddP           |                    dGdQ           |                    dd           |                    dt          dR           |                    t                     |                    dSt          dT          	          }|                    d
d           |                    dUdV           |                    dWddX           |                    dd           |                    t                     |                    dYt          dZ          	          }|                    d
d           |                    dd           |                    dGd[           |                    dUdHd\J           |                    d]dHd^J           |                    d_d`           |                    t                     |                    dat          db          	          }|                    d
d           |                    dd           |                    dGd[           |                    dUdHdcJ           |                    t                      |                    ddt          de          dfC          }|                    d
d           |                    dUdHdgJ           |                    dd           |                    dGdh           |                    d]di           |                    djdk           |                    dldm           |                    d_dn           |                    t"                     |                    dot          dp          	          }	|	                    d
t          d           |	                    dUdHdgJ           |	                    dd           |	                    dGdq           |	                    t$                     |                    drt          ds          	          }
|
                    d
t          dt           |
                    ddu           |
                    dGdv           |
                    dwdHdxJ           |
                    dydz           |
                    dUdDd{F           |
                    t&                     |                    d|t          d}          	          }|                    d
t          d~           |                    dd           |                    dt          ddd.           |                    dt          dd           |                    dddd0           |                    dddd0           |                    dddd0           |                    dddd0           |                    dddd0           |                    dGd           |                    dt(          dd           |                    dt(          dd           |                    t*                     |                    dt          d          	          }|                    t,                     |                                 }t1          |d          s|                                  dS |                    |           dS )zDefine command configurations.r   zBasic PyMuPDF Functions)progdescriptionSubcommandsz/Enter 'command -h' for subcommand specific help)titlehelpr4   zdisplay PDF information)r  rv   zPDF filename)r   r  z	-passwordr3   )r  z-catalog
store_truezshow PDF catalog)actionr  z-trailerzshow PDF trailerz	-metadatazshow PDF metadataz-xrefsz&show selected objects, format: 1,5-7,Nz-pagesz'show selected pages, format: 1,5-7,50-N)r}  r   z.optimize PDF, or create sub-PDF if pages givenr   zoutput PDF filenamez-encryptionzencryption methodr   r   )r  choicesdefaultz-ownerzowner passwordz-userzuser passwordz-garbagezgarbage collection level   r   )r   r  r  r  z	-compressFzcompress (deflate) output)r  r  r  z-asciizASCII encode binary dataz-linearzformat for fast web displayz-permissionr'   zinteger with permission levels)r   r  r  z	-sanitizezsanitize / clean contentsz-prettyzprettify PDF structurez/output selected pages pages, format: 1,5-7,50-Nr   zjoin PDF documentsz3specify each input as 'filename[,password[,pages]]')r  epilog*zinput filenames)nargsr  z-outputTzoutput filename)requiredr  extractz extract images and fonts to diskz-imageszextract imagesz-fontszextract fontsz-folder to receive output, defaults to currentz-consider these pages only, format: 1,5-7,50-Nz
embed-infozlist embedded filesz-namezif given, report only this onez-detailzdetail informationz	embed-addzadd embedded filez-output PDF filename, incremental save if nonezname of new entryz-pathzpath to data for new entryz-desczdescription of new entryz	embed-delzdelete embedded filezname of entry to deletez	embed-updzupdate embedded filez*except '-name' all parameters are optionalzname of entryz-Output PDF filename, incremental save if nonezpath to new data for entryz	-filenameznew filename to store in entryz
-ufilenamez&new unicode filename to store in entryz!new description to store in entryzembed-extractzextract embedded file to diskz'output filename, default is stored namez
embed-copyz copy embedded files between PDFszPDF to receive embedded fileszpassword of inputz2output PDF, incremental save to 'input' if omittedz-sourcezcopy embedded files from herez
-pwdsourcezpassword of 'source' PDFzrestrict copy to these entriesr~  z(extract text in various formatting modeszinput document filenamezpassword for input documentz-modez-mode: simple, block sort, or layout (default)rq  rs  z select pages, format: 1,5-7,50-Nz1-N)r   r  r  z-noligaturesz*expand ligature characters (default False))r  r  r  z-convert-whitez6convert whitespace characters to white (default False)z-extra-spacesz%fill gaps with spaces (default False)z-noformfeedz-write linefeeds, no formfeeds (default False)z-skip-emptyz+suppress pages with no text (default False)z3store text in this file (default inputfilename.txt)z-gridz+merge lines if closer than this (default 2)r(   z	-fontsizez4only include text with a larger fontsize (default 3)ru   internalzinternal testingr}  N)argparseArgumentParserr   add_subparsers
add_parseradd_argumentr^   set_defaultsr4   rb   re   r   r   r   r   r   r   r   r   r   floatr~  r  
parse_argshasattr
print_helpr}  )parsersubpsps_showps_cleanps_join
ps_extractps_embed_addps_embed_delps_embed_updps_embed_extractps_embed_copy
ps_gettextps_internalr   s                 r   mainr  A  s   $677  F !!"S "  E v8<U3V3VWWGs@@@:666L?QRRRL?QRRR\@STTTs!I     s!J     d###
 X&VWW    H '.AAA(3HIII+J777 K	     (3CDDD'/BBB'a     (	     u;U     *	     C2R     (	     ,<T     H     u%%%
 122D   G
 2CDDDT8IJJJh'''
 !!x(JKK "  J G#NCCCIlAQRRRH\PPPG     Kj999s!P     111
 (+@"A"A   G ~666'GHHH<>RSSS:666m,,,
 ##*=!>!> $  L gN;;;k
;;;G     g;NOOOg;WXXXg,FGGG<000
 ##*@!A!A $  L gN;;;k
;;;G     g;TUUU<000
 ##344; $  L
 gN;;;g?KKKk
;;;G     g,HIIIk0PQQQC     g,OPPP<000
 ''X.M%N%N (   !!'.!III!!'D!OOO!!+J!???!!A "    !!|!444
 $$(+M"N"N %  M wS7VWWW{1DEEEL     D'F     |2LMMMs!A     M222
 !!x(RSS "  J G#4MNNNK.KLLL<.     /	     9	     E	     4	     <	     :	     B     :	     C	     )))
 ""); < < #  K ),,,
 D4   		$r   __main__)FT)rX   )"r  r  rw   r-   rg  typingr   r   r   r\   r   r   r%   r7   rG   rW   ro   r4   r   r   r   r   r   r   r   r   r   r  r  ro  r~  r  r  __name__r:   r   r   <module>r     s     				 



     " " " " " " " " " "      ( ( (! ! !H   &    ., , , ,^< < <~, , ,^  *% % %P  &    D/ / /d! ! !H? ? ?D	 	 	  ^ ^ ^B  B/ / /o o od	 zDFFFFF r   