3
(h(                 @   s  d Z ddlmZ ddlZddlZye W n ek
r@   eZY nX dZi Z	ej
djZej
djZydd	lmZ ed
dZW n* ek
r   ejjejjedZY nX edd ejeD Zdd ZG dd deZG dd deZG dd deZG dd deZ dS )zY

Pyphen
======

Pure Python module to hyphenate text, inspired by Ruby's Text::Hyphen.

    )unicode_literalsNPyphen	LANGUAGESlanguage_fallbackz\^{2}([0-9a-f]{2})z
(\d?)(\D?))resource_filenameZpyphenZdictionariesc             c   s2   | ]*}|j d r|dd tjjt|fV  qdS )z.dic      N)endswithospathjoindictionaries_root).0filename r   3/tmp/pip-install-q3hcpn_q/Pyphen/pyphen/__init__.py	<genexpr>3   s   r   c             C   s>   | j ddjd}x&|r8dj|} | tkr.| S |j  qW dS )a	  Get a fallback language available in our dictionaries.

    http://www.unicode.org/reports/tr35/#Locale_Inheritance

    We use the normal truncation inheritance. This function needs aliases
    including scripts for languages with multiple regions available.

    -_N)replacesplitr   r   pop)languagepartsr   r   r   r   8   s    	
c               @   s    e Zd ZdZdd Zdd ZdS )AlternativeParserzParser of nonstandard hyphen pattern alternative.

    The instance returns a special int with data about the current position in
    the pattern when called with an odd value.

    c             C   sL   |j d}|d | _t|d | _t|d | _|jdrH|  jd7  _d S )N,r         .)r   changeintindexcut
startswith)selfpatternalternativer   r   r   __init__P   s    


zAlternativeParser.__init__c             C   s<   |  j d8  _ t|}|d@ r4t|| j| j | jfS |S d S )Nr   )r"   r!   DataIntr    r#   )r%   valuer   r   r   __call__X   s
    zAlternativeParser.__call__N)__name__
__module____qualname____doc__r(   r+   r   r   r   r   r   I   s   r   c               @   s   e Zd ZdZdddZdS )r)   zE``int`` with some other data can be stuck to in a ``data`` attribute.Nc             C   s.   t j| |}|r$t|tr$|j|_n||_|S )zCreate a new ``DataInt``.

        Call with ``reference=dataint_object`` to use the data from another
        ``DataInt``.

        )r!   __new__
isinstancer)   data)clsr*   r2   	referenceobjr   r   r   r0   c   s
    
zDataInt.__new__)NN)r,   r-   r.   r/   r0   r   r   r   r   r)   a   s   r)   c               @   s    e Zd ZdZdd Zdd ZdS )HyphDictzHyphenation patterns.c       
         sV  i | _ t|d}|j j jd}|j dkr6d}x|D ]}|j|j }| s<|jds<|jdrjq<tdd |}d	|kr|jd	d
\}}t	|| nt
 t fddt|D  \}}t|dkrq<dt| }}	x|| s|d
7 }qW x||	d
  s
|	d
8 }	qW ||||	 f| j dj|< q<W W dQ R X i | _tdd | j D | _dS )zpRead a ``hyph_*.dic`` and parse its patterns.

        :param filename: filename of hyph_*.dic to read

        rbasciizmicrosoft-cp1251cp1251%#c             S   s   t t| jddS )Nr      )unichrr!   group)matchr   r   r   <lambda>   s    z#HyphDict.__init__.<locals>.<lambda>/r   c                s    g | ]\}}| |pd fqS )0r   )r   istring)factoryr   r   
<listcomp>   s   z%HyphDict.__init__.<locals>.<listcomp>r    Nc             s   s   | ]}t |V  qd S )N)len)r   keyr   r   r   r      s    z$HyphDict.__init__.<locals>.<genexpr>)patternsopenreadlinestripdecodelowerr$   	parse_hexr   r   r!   zipparsemaxrH   r   cachemaxlen)
r%   r   streamcharsetr&   r'   tagsvaluesstartendr   )rE   r   r(   u   s<    




*zHyphDict.__init__c             C   s   |j  }| jj|}|dkrd| }dgt|d  }xtt|d D ]~}xxt|d t|| j t|d D ]R}| jj||| }|rp|\}}	t|| || t|	 }
t	t
|	||
 ||
< qpW qHW dd t|D }|| j|< |S )a  Get a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        E.g. for the dutch word 'lettergrepen' this method returns ``[3, 6,
        9]``.

        Each position is a ``DataInt`` with a data attribute.

        If the data attribute is not ``None``, it contains a tuple with
        information about nonstandard hyphenation at that point: ``(change,
        index, cut)``.

        change
          a string like ``'ff=f'``, that describes how hyphenation should
          take place.

        index
          where to substitute the change, counting from the current point

        cut
          how many characters to remove while substituting the nonstandard
          hyphenation

        Nz.%s.r   r   c             S   s(   g | ] \}}|d  rt |d |dqS )r   r   )r4   )r)   )r   rC   r4   r   r   r   rF      s   z&HyphDict.positions.<locals>.<listcomp>)rO   rT   getrH   rangeminrU   rJ   slicemaprS   	enumerate)r%   wordZpointsZpointed_wordZ
referencesrC   jr&   offsetrY   Zslice_r   r   r   	positions   s$    $
zHyphDict.positionsN)r,   r-   r.   r/   r(   re   r   r   r   r   r6   r   s   3r6   c               @   sB   e Zd ZdZdddZdd Zd	d
 ZdddZdddZeZ	dS )r   zEHyphenation class, with methods to hyphenate strings in various ways.Nr   Tc             C   sD   |st t| }|| _|| _| s*|tkr6t|t|< t| | _dS )a  Create an hyphenation instance for given lang or filename.

        :param filename: filename of hyph_*.dic to read
        :param lang: lang of the included dict to use if no filename is given
        :param left: minimum number of characters of the first syllabe
        :param right: minimum number of characters of the last syllabe
        :param cache: if ``True``, use cached copy of the hyphenation patterns

        N)r   r   leftrighthdcacher6   hd)r%   r   langrf   rg   rT   r   r   r   r(      s    
zPyphen.__init__c                s*   t |j   fddjj|D S )zGet a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        See also ``HyphDict.positions``. The points that are too far to the
        left or right are removed.

        c                s*   g | ]"}j |  ko kn  r|qS r   )rf   )r   rC   )rg   r%   r   r   rF      s    z$Pyphen.positions.<locals>.<listcomp>)rH   rg   ri   re   )r%   rb   r   )rg   r%   r   re      s    	zPyphen.positionsc             c   s   xt | j|D ]~}|jrt|j\}}}||7 }|j r>|j }|jd\}}|d| | |||| d  fV  q|d| ||d fV  qW dS )zIterate over all hyphenation possibilities, the longest first.

        :param word: unicode string of the word to hyphenate

        =N)reversedre   r2   isupperupperr   )r%   rb   positionr    r"   r#   Zc1c2r   r   r   iterate   s    (zPyphen.iterater   c             C   s@   |t |8 }x.| j|D ] \}}t ||kr|| |fS qW dS )a  Get the longest possible first part and the last part of a word.

        :param word: unicode string of the word to hyphenate
        :param width: maximum length of the first part
        :param hyphen: unicode string used as hyphen character

        The first part has the hyphen already attached.

        Returns ``None`` if there is no hyphenation point before ``width``, or
        if the word could not be hyphenated.

        N)rH   rq   )r%   rb   widthhyphenZw1Zw2r   r   r   wrap  s    zPyphen.wrapc             C   sz   t |}xft| j|D ]T}|jr`|j\}}}||7 }|j rF|j }|jd||||| < q|j|| qW dj|S )a  Get the word as a string with all the possible hyphens inserted.

        :param word: unicode string of the word to hyphenate
        :param hyphen: unicode string used as hyphen character

        E.g. for the dutch word ``'lettergrepen'``, this method returns the
        unicode string ``'let-ter-gre-pen'``. The hyphen string to use can be
        given as the second parameter, that defaults to ``'-'``.

        rk   rG   )	listrl   re   r2   rm   rn   r   insertr   )r%   rb   rs   Z	word_listro   r    r"   r#   r   r   r   inserted  s    zPyphen.inserted)NNr   r   T)r   )r   )
r,   r-   r.   r/   r(   re   rq   rt   rw   r+   r   r   r   r   r      s   


)r   r   r   )!r/   
__future__r   r   rer=   	NameErrorchr__all__rh   compilesubrP   findallrR   pkg_resourcesr   r   ImportErrorr   r   dirname__file__dictlistdirr   r   objectr   r!   r)   r6   r   r   r   r   r   <module>   s0   
g