
    Kiaf                        d dl mZmZmZ d dlZd dlZd dlZd dlZd dlZej	        j
        dk    reZeZd dlmZ d dlmZ neZd dlmZ  ej        e          Zd Zd Z	 dZ	 dZ	  G d	 d
          Zd Z G d de          ZdS )    )print_functionabsolute_importunicode_literalsN   )MappingProxyType)getfullargspec)
getargspecc                  ,    ddl m}  t          |           S )aL  
    Return a dictionary that contains the default collection of known LaTeX
    escape sequences for unicode characters.

    The keys of the dictionary are integers that correspond to unicode code
    points (i.e., `ord(char)`).  The values are the corresponding LaTeX
    replacement strings.

    The returned dictionary may not be modified.  To alter the behavior of
    :py:func:`unicode_to_latex()`, you should specify custom rules to a new
    instance of :py:class:`UnicodeToLatexEncoder`.

    .. versionadded:: 2.0

       This function was introduced in `pylatexenc 2.0`.
       )	uni2latex)_uni2latexmapr   _MappingProxyType)
_uni2latexs    /var/www/development/aibuddy-work/election-extract/venv/lib/python3.11/site-packages/pylatexenc/latexencode/_unicode_to_latex_encoder.pyget_builtin_uni2latex_dictr   6   s$    " 766666Z(((    r   c                   $    e Zd ZdZ	 	 ddZd ZdS )UnicodeToLatexConversionRulea*  
    Specify a rule how to convert unicode characters into LaTeX escapes.

    .. py:attribute:: rule_type
    
       One of :py:data:`RULE_DICT`, :py:data:`RULE_REGEX`, or
       :py:data:`RULE_CALLABLE`.

    .. py:attribute:: rule

       A specification of the rule itself.  The `rule` attribute is an object
       that depends on what `rule_type` is set to.  See below.

    .. py:attribute:: replacement_latex_protection

       If non-`None`, then the setting here will override any
       `replacement_latex_protection` set on
       :py:class:`UnicodeToLatexConversionRule` objects.  By default the value
       is `None`, and you can set a replacement_latex_protection globally for
       all rules on the :py:class:`UnicodeToLatexEncoder` object.

       The use of this attribute is mainly in case you have a fancy rule in
       which you already guarantee that whatever you output is valid LaTeX even
       if concatenated with the remainder of the string; in this case you can
       set `replacement_latex_protection='none'` to avoid unnecessary or
       unwanted braces around the generated code.

       .. versionadded:: 2.10

          The `replacement_latex_protection` attribute was introduced in
          `pylatexenc 2.10`.


    Constructor syntax::
    
        UnicodeToLatexConversionRule(RULE_XXX, <...>)
        UnicodeToLatexConversionRule(rule_type=RULE_XXX, rule=<...>)

        UnicodeToLatexConversionRule(..., replacement_latex_protection='none')

    Note that you can get some built-in rules via the
    :py:func:`get_builtin_conversion_rules()` function::

        conversion_rules = get_builtin_conversion_rules('defaults') # all defaults


    Rules types:
    
      - `RULE_DICT`: If `rule_type` is `RULE_DICT`, then `rule` should be a
        dictionary whose keys are integers representing unicode code points
        (e.g., `0x210F`), and whose values are corresponding replacement strings
        (e.g., ``r'\hbar'``).  See :py:func:`get_builtin_uni2latex_dict()` for
        an example.

      - `RULE_REGEX`: If `rule_type` is `RULE_REGEX`, then `rule` should be an
        iterable of tuple pairs `(compiled_regular_expression,
        replacement_string)` where `compiled_regular_expression` was obtained
        with `re.compile(...)` and `replacement_string` is anything that can be
        specified as the second (`repl`) argument of `re.sub(...)`.  This can be
        a replacement string that includes escapes (like ``\1, \2, \g<name>``)
        for captured sub-expressions or a callable that takes a match object as
        argument.

        .. note::
    
           The replacement string is parsed like the second argument to
           `re.sub()` and backslashes have a special meaning because they can
           refer to captured sub-expressions.  For a literal backslash, use two
           backslashes ``\\`` in raw strings, four backslashes in normal
           strings.

        Example::

          regex_conversion_rule = UnicodeToLatexConversionRule(
              rule_type=RULE_REGEX,
              rule=[
                  # protect acronyms of capital letters with braces,
                  # e.g.: ABC -> {ABC}
                  (re.compile(r'[A-Z]{2,}'), r'{\1}'),
                  # Additional rules, e.g., "..." -> "\ldots"
                  (re.compile(r'...'), r'\\ldots'), # note double \\
              ]
          )

      - `RULE_CALLABLE`: If `rule_type` is `RULE_CALLABLE`, then `rule` should
        be a callable that accepts two arguments, the unicode string and the
        position in the string (an integer).  The callable will be called with
        the original unicode string as argument and the position of the
        character that needs to be encoded.  If this rule can encode the given
        character at the given position, it should return a tuple
        `(consumed_length, replacement_string)` where `consumed_length` is the
        number of characters in the unicode string that `replacement_string`
        represents.  If the character(s) at the given position can't be encoded
        by this rule, the callable should return `None` to indicate that further
        rules should be attempted.

        If the callable accepts an additional argument called `u2lobj`, then the
        :py:class:`UnicodeToLatexEncoder` instance is provided to that argument.

        For example, the following callable should achieve the same effect as
        the previous example with regexes::

          def convert_stuff(s, pos):
              m = re.match(r'[A-Z]{2,}', s, pos)
              if m is not None:
                  return (m.end()-m.start(), '{'+m.group()+'}')
              if s.startswith('...', pos): # or  s[pos:pos+3] == '...'
                  return (3, r'\ldots')
              return None


    .. versionadded:: 2.0

       This class was introduced in `pylatexenc 2.0`.
    Nc                 0    || _         || _        || _        d S N)	rule_typerulereplacement_latex_protection)selfr   r   r   s       r   __init__z%UnicodeToLatexConversionRule.__init__   s      #	,H)))r   c                     d                     | j        j        | j        t	          | j                  j        t          | j                            S )Nz>{}(rule_type={!r}, rule=<{}>, replacement_latex_protection={}))format	__class____name__r   typer   reprr   )r   s    r   __repr__z%UnicodeToLatexConversionRule.__repr__   sA    OVVN#T^T$)__5M233
 
 	
r   )NN)r   
__module____qualname____doc__r   r"    r   r   r   r   l   sP        r rf (,.2I I I I
 
 
 
 
r   r   c                     | dk    r#t          t          t                                gS | dk    r"ddlm} t          t          |j                  gS t          d                    |                     )a>  
    Return a built-in set of conversion rules specified by a given name
    `builtin_name`.

    There are two builtin conversion rules, with the following names:

      - `'defaults'`: the default conversion rules, a custom-curated list of
        unicode chars to LaTeX escapes.

      - `'unicode-xml'`: the conversion rules derived from the `unicode.xml` file
        maintained at https://www.w3.org/TR/xml-entity-names/#source by David
        Carlisle.

    The return value is a list of :py:class:`UnicodeToLatexConversionRule`
    objects that can be either directly specified to the `conversion_rules=`
    argument of :py:class:`UnicodeToLatexEncoder`, or included in a larger list
    that can be provided to that argument.
    
    .. versionadded:: 2.0

       This function was introduced in `pylatexenc 2.0`.
    defaults)r   r   zunicode-xmlr   )_uni2latexmap_xmlzUnknown builtin rule set: {})r   	RULE_DICTr    r)   r   
ValueErrorr   )builtin_namer)   s     r   get_builtin_conversion_rulesr.      s    . z!!-	3M3O3OQ Q Q S 	S}$$''''''-	3D3NP P P R 	R
3::<HH
I
IIr   c                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z xZS )UnicodeToLatexEncoderu  
    Encode a string with unicode characters into a LaTeX snippet.

    The following general attributes can be specified as keyword arguments to
    the constructor.  Note: These attributes must be specified to the
    constructor and may NOT be subsequently modified.  This is because in the
    constructor we pre-compile some rules and flags to optimize calls to
    :py:meth:`unicode_to_text()`.

    .. py:attribute:: non_ascii_only

       Whether we should convert only non-ascii characters into LaTeX sequences,
       or also all known ascii characters with special LaTeX meaning such as
       '\\\\', '$', '&', etc.

       If `non_ascii_only` is set to `True` (the default is `False`), then
       conversion rules are not applied at positions in the string where an
       ASCII character is encountered.

    .. py:attribute:: conversion_rules

       The conversion rules, specified as a list of
       :py:class:`UnicodeToLatexConversionRule` objects.  For each position in
       the string, the rules will be applied in the given sequence until a
       replacement string is found.

       Instead of a :py:class:`UnicodeToLatexConversionRule` object you may also
       specify a string specifying a built-in rule (e.g., 'defaults'), which
       will be expanded to the corresponding rules according to
       :py:func:`get_builtin_conversion_rules()`.
    
       If you specify your own list of rules using this argument, you will
       probably want to include presumably at the end of your list the element
       'defaults' to include all built-in default conversion rules.  To override
       built-in rules, simply add your custom rules earlier in the list.
       Example::

         conversion_rules = [
             # our custom rules
             UnicodeToLatexConversionRule(RULE_REGEX, [
                 # double \\ needed, see UnicodeToLatexConversionRule
                 ( re.compile(r'...'), r'\\ldots' ),
                 ( re.compile(r'î'), r'\\^i' ),
             ]),
             # plus all the default rules
             'defaults'
         ]
         u = UnicodeToLatexEncoder(conversion_rules=conversion_rules)

    .. py:attribute:: replacement_latex_protection

       How to "protect" LaTeX replacement text that looks like it could be
       interpreted differently if concatenated to arbitrary strings before and
       after.

       Currently in the default scheme only one situation is recognized: if the
       replacement string ends with a latex macro invocation with a non-symbol
       macro name, e.g. ``\textemdash`` or ``\^\i``.  Indeed, if we naively
       replace these texts in an arbitrary string (like ``maître``), we might
       get an invalid macro invocation (like ``ma\^\itre`` which causes un known
       macro name ``\itre``).

       Possible protection schemes are:

         - 'braces' (the default):  Any suspicious replacement text (that
           might look fragile) is placed in curly braces ``{...}``.

         - 'braces-all':  All replacement latex escapes are surrounded in
           protective curly braces ``{...}``, regardless of whether or not they
           might be deemed "fragile" or "unsafe".

         - 'braces-almost-all':  Almost all replacement latex escapes are
           surrounded in protective curly braces ``{...}``.  This option
           emulates closely the behavior of `brackets=True` of the function
           `utf8tolatex()` in `pylatexenc 1.x`, though I'm not sure it is really
           useful.  [Specifically, all those replacement strings that start with
           a backslash are surrounded by curly braces].

         - 'braces-after-macro':  In the situation where the replacement latex
           code ends with a string-named macro, then a pair of empty braces is
           added at the end of the replacement text to protect the macro.

         - 'none': No protection is applied, even in "unsafe" cases.  This is
           not recommended, as this will likely result in invalid LaTeX
           code. (Note this is the string 'none', not Python's built-in `None`.)

         - any callable object: The callable should take a single argument, the
           replacement latex string associated with a piece of the input (maybe
           a special character) that has been encoded; it should return the
           actual string to append to the output string.

         .. versionadded:: 2.10 

            You can specify a callable object to `replacement_latex_protection`
            since `pylatexenc 2.10`.

    .. py:attribute:: unknown_char_policy

       What to do when a non-ascii character is encountered without any known
       substitution macro.  The attribute `unknown_char_policy` can be set to one of:

         - 'keep': keep the character as is;

         - 'replace': replace the character by a boldface question mark;

         - 'ignore': ignore the character from the input entirely and don't
           output anything for it;

         - 'fail': raise a `ValueError` exception;

         - 'unihex': output the unicode hexadecimal code (U+XXXX) of the
           character in typewriter font;

         - a Python callable --- will be called with argument the character that
           could not be encoded.  (If the callable accepts a second argument
           called 'u2lobj', then the `UnicodeToLatexEncoder` instance is
           provided to that argument.)  The return value of the callable is used
           as LaTeX replacement code.

    .. py:attribute:: unknown_char_warning

       In addition to the `unknown_char_policy`, this attribute indicates
       whether or not (`True` or `False`) one should generate a warning when a
       nonascii character without any known latex representation is
       encountered. (Default: True)

    .. py:attribute:: latex_string_class

       The return type of :py:meth:`unicode_to_latex()`.  Normally this is a
       simple unicode string (`str` on `Python 3` or `unicode` on `Python 2`).

       But you can specify your custom string type via the `latex_string_class`
       argument.  The `latex_string_class` will be invoked with no arguments to
       construct an empty object (so `latex_string_class` can be either an
       object that can be constructed with no arguments or it can be a function
       with no arguments that return a fresh object instance).  The object must
       support the operation "+=", i.e., you should overload the ``__iadd__()``
       method.

       For instance, you can record the chunks that would have been appended
       into a single string as follows::

           class LatexChunkList:
               def __init__(self):
                   self.chunks = []

               def __iadd__(self, s):
                   self.chunks.append(s)
                   return self

           u = UnicodeToLatexEncoder(latex_string_class=LatexChunkList,
                                     replacement_latex_protection='none')
           result = u.unicode_to_latex("é → α")
           # result.chunks == [ r"\'e", ' ', r'\textrightarrow', ' ',
           #                    r'\ensuremath{\alpha}' ]

    .. warning::
      
       None of the above attributes should be modified after constructing the
       object.  The values specified to the class constructor are final and
       cannot be changed.  [Indeed, the class constructor "compiles" these
       attribute values into a data structure that makes
       :py:meth:`unicode_to_text()` slightly more efficient.]

    .. versionadded:: 2.0

       This class was introduced in `pylatexenc 2.0`.
    c                    |                     dd          | _        |                     ddg          | _        |                     dd          | _        |                     dd          | _        |                     d	d
          | _        |                     dt                    | _        |r@t          	                    dd
                    |                                                      t          t          |           j        di | t          j                            d | j        D                       }g | _        |D ]8}|j        t(          k    r9| j                            t-          j        | j        |j        |                     L|j        t4          k    r9| j                            t-          j        | j        |j        |                     |j        t8          k    rn|j        }dt;          |          d         v rt-          j        |j        |           }| j                            t-          j        | j        ||                     t?          d                     |j                            tC          | j        tD                    r#| #                    d| j        d          | _$        ntK          | j                  rL| j        }dt;          |          d         v r!t-          j        | j        |           | _$        n4| j        | _$        n't?          d                     | j                            | j        sd | _&        | j        r| j'        | _(        nd | _(        | )                    | j                  | _*        d S )Nnon_ascii_onlyFconversion_rulesr(   r   bracesunknown_char_policykeepunknown_char_warningTlatex_string_classz&Ignoring unknown keyword arguments: %s,c              3   d   K   | ]+}t          |t                    rt          |          n|gV  ,d S r   )
isinstance
basestringr.   ).0rs     r   	<genexpr>z1UnicodeToLatexEncoder.__init__.<locals>.<genexpr>  s^       B
 B
 1;1j0I0IT)!,,,QRuB
 B
 B
 B
 B
 B
r   u2lobjr   )r@   zInvalid rule type: {}do_unknown_charwhatz.Invalid argument for unknown_char_policy: {!r}c                     d S r   r&   )chs    r   <lambda>z0UnicodeToLatexEncoder.__init__.<locals>.<lambda>  s    D r   c                     dS )NFr&   )sps     r   rF   z0UnicodeToLatexEncoder.__init__.<locals>.<lambda>  s    % r   r&   )+popr2   r3   r   r5   r7   unicoder8   loggerwarningjoinkeyssuperr0   r   	itertoolschainfrom_iterable_compiled_rulesr   r*   append	functoolspartial_apply_rule_dictr   
RULE_REGEX_apply_rule_regexRULE_CALLABLEr   _apply_rule_callable	TypeErrorr   r;   r<   _get_method_fn_do_unknown_charcallable_do_warn_unknown_char_check_do_skip_ascii_maybe_skip_ascii_get_replacement_latex_fn_apply_protection)r   kwargsexpanded_conversion_rulesr   thecallablefnr   s         r   r   zUnicodeToLatexEncoder.__init__  s   $jj)95AA &

+=
| L L,2JJ7UW_,`,`)#)::.CV#L#L $*JJ/Et$L$L!"(**-A7"K"K 	^NNCSXXfkkmmE\E\]]]3#T**3==f=== %.O$A$A B
 B
*B
 B
 B
 %
 %
!  "- 	P 	PD~**$++%d&;TYMM    :--$++%d&<diNN    =00"i~k::1==="+"3DId"K"K"KK$++%d&?dSS      7 > >t~ N NOOO d.
;; 	?$($7$7!(* %8 % %D!!
 d.// 	?)B>"--a000(1(9$:R[_(`(`(`%%(,(@%%L#VD$<==? ? ? ( 	9)8D&  	8%)%>D""%7%7D" "&!?!?-"
 "
r   c                     d|z   dz   |                     dd          z   }t          | |          s#t          d                    ||                    t	          | |          S )N_-zInvalid {}: {})replacehasattrr,   r   getattr)r   basenamerC   selfmethnames        r   r^   z$UnicodeToLatexEncoder._get_method_fn	  se    TzC'$,,sC*@*@@t\** 	B-44T4@@AAAt\***r   c                 T    t          |          r|S |                     d|d          S )Napply_protectionr   rB   )r`   r^   )r   r   s     r   rd   z/UnicodeToLatexEncoder._get_replacement_latex_fn  s?    011 	0//""(/ # 
 
 	
r   c                    t          |          }t          j        d|          } G d d          } |            }|                                 |_        d|_        |j        t          |          k     r|                     ||          r/| j        D ]} |||          r n||j                 }t          |          }|dk    r|dk    s|dv r!|xj        |z  c_        |xj        dz  c_        nH| 
                    |           |xj        |                     |          z  c_        |xj        dz  c_        |j        t          |          k     |j        S )	z
        Convert unicode characters in the string `s` into latex escape sequences,
        according to the rules and options given to the constructor.
        NFCc                       e Zd ZdS )3UnicodeToLatexEncoder.unicode_to_latex.<locals>._NSN)r   r#   r$   r&   r   r   _NSrx   !  s          r   ry   r          z
	r   )rK   unicodedata	normalizer8   latexposlenrc   rT   ordra   r_   )r   rH   ry   rI   compiledrulerE   os          r   unicode_to_latexz&UnicodeToLatexEncoder.unicode_to_latex  sa    AJJ!%++CEE))++ec!ffnn%%a++  $ 4  <1%% E quXGGGGSbHnnGGrMGGEEQJEEE..r222GGt44R888GGEEQJEE) ec!ffnn, wr   c                     t          ||j                           dk     r-|xj        ||j                 z  c_        |xj        dz  c_        dS dS )Nr{   r   TF)r   r   r~   )r   rH   rI   s      r   rb   z*UnicodeToLatexEncoder._check_do_skip_ascii?  sH    qx==3GGqxGGEEQJEE4ur   c                     t          ||j                           }||v r |                     |||         d|           dS d S )Nr   T)r   r   _apply_replacement)r   ruledictr   rH   rI   r   s         r   rX   z&UnicodeToLatexEncoder._apply_rule_dictH  sD    !%MM==##Ax{At<<<4tr   c                 4   |D ]\  }}|                     ||j                  }|rt          |          r ||          }n|                    |          }|                     |||                                |                                z
  |            dS d S NT)matchr   r`   expandr   endstart)	r   ruleregexesr   rH   rI   regexreplmreplstrs	            r   rZ   z'UnicodeToLatexEncoder._apply_rule_regexN  s    & 	 	KE4Aqu%%A}D>> -"d1ggGGhhtnnG''7AEEGGaggii4GNNNtt  tr   c                 j     |||j                   }|d S |\  }}|                     ||||           dS r   )r   r   )r   rulecallabler   rH   rI   resconsumedr   s           r   r\   z*UnicodeToLatexEncoder._apply_rule_callableY  sH    l1ae$$;4444888tr   c                     | j         }|j        |                     |j                  } ||          }|xj        |z  c_        |xj        |z  c_        d S r   )re   r   rd   r~   r   )r   rI   r   numcharsruleobj
protect_fns         r   r   z(UnicodeToLatexEncoder._apply_replacementa  sf     +
 /;774 J z$	4	r   c                     |S r   r&   r   r   s     r   _apply_protection_nonez,UnicodeToLatexEncoder._apply_protection_nonep  s    r   c                     |                     d          }|dk    r'||dz   d                                          rd|z   dz   S |S )N\r   r   {}rfindisalphar   r   ks      r   _apply_protection_bracesz.UnicodeToLatexEncoder._apply_protection_bracess  sL    JJt66d1Q344j((**6:##r   c                 2    |dd         dk    rd|z   dz   S |S )Nr   r   r   r   r   r&   r   s     r   #_apply_protection_braces_almost_allz9UnicodeToLatexEncoder._apply_protection_braces_almost_ally  s)    !9:##r   c                     d|z   dz   S )Nr   r   r&   r   s     r   _apply_protection_braces_allz2UnicodeToLatexEncoder._apply_protection_braces_all}  s    TzCr   c                     |                     d          }|dk    r$||dz   d                                          r|dz   S |S )Nr   r   r   z{}r   r   s      r   $_apply_protection_braces_after_macroz:UnicodeToLatexEncoder._apply_protection_braces_after_macro  sG    JJt66d1Q344j((**6$;r   c                     |S r   r&   r   rE   s     r   _do_unknown_char_keepz+UnicodeToLatexEncoder._do_unknown_char_keep  s    	r   c                     dS )Nz{\bfseries ?}r&   r   s     r   _do_unknown_char_replacez.UnicodeToLatexEncoder._do_unknown_char_replace  s    r   c                     dS )Nr+   r&   r   s     r   _do_unknown_char_ignorez-UnicodeToLatexEncoder._do_unknown_char_ignore  s    rr   c                 D    t          dt          |          |fz            Nu>   No known latex representation for character: U+%04X - ‘%s’)r,   r   r   s     r   _do_unknown_char_failz+UnicodeToLatexEncoder._do_unknown_char_fail  s)    Yr77B-( ) ) 	)r   c                 &    dt          |          z  S )Nz7\ensuremath{\langle}\texttt{U+%04X}\ensuremath{\rangle})r   r   s     r   _do_unknown_char_unihexz-UnicodeToLatexEncoder._do_unknown_char_unihex  s    I3r77SSr   c                 X    t                               dt          |          |           d S r   )rL   rM   r   r   s     r   ra   z+UnicodeToLatexEncoder._do_warn_unknown_char  s0    W2ww	$ 	$ 	$ 	$ 	$r   )r   r#   r$   r%   r   r^   rd   r   rb   rX   rZ   r\   r   r   r   r   r   r   r   r   r   r   r   ra   __classcell__)r   s   @r   r0   r0     sl       g gPK
 K
 K
 K
 K
Z+ + +
 
 
$ $ $N    
 
 
                          ) ) )T T T$ $ $ $ $ $ $r   r0   ) 
__future__r   r   r   r|   loggingsysrV   rQ   version_infomajorstrrK   r<   typesr   r   inspectr   dictr	   	getLoggerr   rL   r   r*   rY   r[   r   r.   objectr0   r&   r   r   <module>r      s}  4 I H H H H H H H H H      



        AGJ;;;;;;&&&&&&&444444		8	$	$) ) ), 	 
 
 
 
 
 
 
 
 
HJ J JFF$ F$ F$ F$ F$F F$ F$ F$ F$ F$r   