3
(h*                 @   s@  d Z ddlmZ ddlZddlZddlZye W n ek
rH   eZY nX dZ	i Z
ejdjZejdjZzFy(ddlZejjejd	d
dd	dfZW n ek
r   f ZY nX W deejjejdd	dejjejjedf7 ZX edd eD Zdd ZG dd deZG dd deZG dd deZ G dd deZ!dS )zY

Pyphen
======

Pure Python module to hyphenate text, inspired by Ruby's Text::Hyphen.

    )unicode_literalsNPyphen	LANGUAGESlanguage_fallbackz\^{2}([0-9a-f]{2})z
(\d?)(\D?)Zpyphen ZshareZdictionariesc             c   sN   | ]F}t jj|rt j|D ]*}|jd r|dd t jj||fV  qqdS )z.dic      N)ospathisdirlistdirendswithjoin).0Zdictionaries_rootfilename r   */tmp/pip-install-q3hcpn_q/Pyphen/pyphen.py	<genexpr>@   s   r   c             C   s>   | j ddjd}x&|r8dj|} | tkr.| S |j  qW dS )a	  Get a fallback language available in our dictionaries.

    http://www.unicode.org/reports/tr35/#Locale_Inheritance

    We use the normal truncation inheritance. This function needs aliases
    including scripts for languages with multiple regions available.

    -_N)replacesplitr   r   pop)languagepartsr   r   r   r   G   s    	
c               @   s    e Zd ZdZdd Zdd ZdS )AlternativeParserzParser of nonstandard hyphen pattern alternative.

    The instance returns a special int with data about the current position in
    the pattern when called with an odd value.

    c             C   sL   |j d}|d | _t|d | _t|d | _|jdrH|  jd7  _d S )N,r         .)r   changeintindexcut
startswith)selfpatternalternativer   r   r   __init___   s    


zAlternativeParser.__init__c             C   s<   |  j d8  _ t|}|d@ r4t|| j| j | jfS |S d S )Nr   )r#   r"   DataIntr!   r$   )r&   valuer   r   r   __call__g   s
    zAlternativeParser.__call__N)__name__
__module____qualname____doc__r)   r,   r   r   r   r   r   X   s   r   c               @   s   e Zd ZdZdddZdS )r*   zE``int`` with some other data can be stuck to in a ``data`` attribute.Nc             C   s.   t j| |}|r$t|tr$|j|_n||_|S )zCreate a new ``DataInt``.

        Call with ``reference=dataint_object`` to use the data from another
        ``DataInt``.

        )r"   __new__
isinstancer*   data)clsr+   r3   	referenceobjr   r   r   r1   r   s
    
zDataInt.__new__)NN)r-   r.   r/   r0   r1   r   r   r   r   r*   p   s   r*   c               @   s    e Zd ZdZdd Zdd ZdS )HyphDictzHyphenation patterns.c       
         sV  i | _ t|d}|j j jd}|j dkr6d}x|D ]}|j|j }| s<|jds<|jdrjq<tdd |}d	|kr|jd	d
\}}t	|| nt
 t fddt|D  \}}t|dkrq<dt| }}	x|| s|d
7 }qW x||	d
  s
|	d
8 }	qW ||||	 f| j dj|< q<W W dQ R X i | _tdd | j D | _dS )zpRead a ``hyph_*.dic`` and parse its patterns.

        :param filename: filename of hyph_*.dic to read

        rbasciizmicrosoft-cp1251cp1251%#c             S   s   t t| jddS )Nr      )unichrr"   group)matchr   r   r   <lambda>   s    z#HyphDict.__init__.<locals>.<lambda>/r   c                s    g | ]\}}| |pd fqS )0r   )r   istring)factoryr   r   
<listcomp>   s   z%HyphDict.__init__.<locals>.<listcomp>r   r   Nc             s   s   | ]}t |V  qd S )N)len)r   keyr   r   r   r      s    z$HyphDict.__init__.<locals>.<genexpr>)patternsopenreadlinestripdecodelowerr%   	parse_hexr   r   r"   zipparsemaxrH   r   cachemaxlen)
r&   r   streamcharsetr'   r(   tagsvaluesstartendr   )rF   r   r)      s<    




*zHyphDict.__init__c             C   s   |j  }| jj|}|dkrd| }dgt|d  }xtt|d D ]~}xxt|d t|| j t|d D ]R}| jj||| }|rp|\}}	t|| || t|	 }
t	t
|	||
 ||
< qpW qHW dd t|D }|| j|< |S )a  Get a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        E.g. for the dutch word 'lettergrepen' this method returns ``[3, 6,
        9]``.

        Each position is a ``DataInt`` with a data attribute.

        If the data attribute is not ``None``, it contains a tuple with
        information about nonstandard hyphenation at that point: ``(change,
        index, cut)``.

        change
          a string like ``'ff=f'``, that describes how hyphenation should
          take place.

        index
          where to substitute the change, counting from the current point

        cut
          how many characters to remove while substituting the nonstandard
          hyphenation

        Nz.%s.r   r   c             S   s(   g | ] \}}|d  rt |d |dqS )r   r   )r5   )r*   )r   rD   r5   r   r   r   rG      s   z&HyphDict.positions.<locals>.<listcomp>)rO   rT   getrH   rangeminrU   rJ   slicemaprS   	enumerate)r&   wordZpointsZpointed_wordZ
referencesrD   jr'   offsetrY   Zslice_r   r   r   	positions   s$    $
zHyphDict.positionsN)r-   r.   r/   r0   r)   re   r   r   r   r   r7      s   3r7   c               @   sB   e Zd ZdZdddZdd Zd	d
 ZdddZdddZeZ	dS )r   zEHyphenation class, with methods to hyphenate strings in various ways.Nr   Tc             C   sD   |st t| }|| _|| _| s*|tkr6t|t|< t| | _dS )a  Create an hyphenation instance for given lang or filename.

        :param filename: filename of hyph_*.dic to read
        :param lang: lang of the included dict to use if no filename is given
        :param left: minimum number of characters of the first syllabe
        :param right: minimum number of characters of the last syllabe
        :param cache: if ``True``, use cached copy of the hyphenation patterns

        N)r   r   leftrighthdcacher7   hd)r&   r   langrf   rg   rT   r   r   r   r)      s    
zPyphen.__init__c                s*   t |j   fddjj|D S )zGet a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        See also ``HyphDict.positions``. The points that are too far to the
        left or right are removed.

        c                s*   g | ]"}j |  ko kn  r|qS r   )rf   )r   rD   )rg   r&   r   r   rG     s    z$Pyphen.positions.<locals>.<listcomp>)rH   rg   ri   re   )r&   rb   r   )rg   r&   r   re      s    	zPyphen.positionsc             c   s   xt | j|D ]~}|jrt|j\}}}||7 }|j r>|j }|jd\}}|d| | |||| d  fV  q|d| ||d fV  qW dS )zIterate over all hyphenation possibilities, the longest first.

        :param word: unicode string of the word to hyphenate

        =N)reversedre   r3   isupperupperr   )r&   rb   positionr!   r#   r$   Zc1c2r   r   r   iterate	  s    (zPyphen.iterater   c             C   s@   |t |8 }x.| j|D ] \}}t ||kr|| |fS qW dS )a  Get the longest possible first part and the last part of a word.

        :param word: unicode string of the word to hyphenate
        :param width: maximum length of the first part
        :param hyphen: unicode string used as hyphen character

        The first part has the hyphen already attached.

        Returns ``None`` if there is no hyphenation point before ``width``, or
        if the word could not be hyphenated.

        N)rH   rq   )r&   rb   widthhyphenZw1Zw2r   r   r   wrap  s    zPyphen.wrapc             C   sz   t |}xft| j|D ]T}|jr`|j\}}}||7 }|j rF|j }|jd||||| < q|j|| qW dj|S )a  Get the word as a string with all the possible hyphens inserted.

        :param word: unicode string of the word to hyphenate
        :param hyphen: unicode string used as hyphen character

        E.g. for the dutch word ``'lettergrepen'``, this method returns the
        unicode string ``'let-ter-gre-pen'``. The hyphen string to use can be
        given as the second parameter, that defaults to ``'-'``.

        rk   r   )	listrl   re   r3   rm   rn   r   insertr   )r&   rb   rs   Z	word_listro   r!   r#   r$   r   r   r   inserted-  s    zPyphen.inserted)NNr   r   T)r   )r   )
r-   r.   r/   r0   r)   re   rq   rt   rw   r,   r   r   r   r   r      s   


)r   r   r   )"r0   
__future__r   r
   sysrer>   	NameErrorchr__all__rh   compilesubrP   findallrR   pkg_resourcesr   r   resource_filenameZdictionaries_rootsImportErrorprefixdirname__file__dictr   r   objectr   r"   r*   r7   r   r   r   r   r   <module>   s<   


g