3
NWh.                 @   s  d Z ddlmZmZ ddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ejZejej eedrejjejZnejjeZeejjedddZ eje dZ!ej"de! Z#dd Z$dd Z%i Z&dd Z'dd Z(dd Z)e(ddd Z*e(ddd  Z+e(d!d"d# Z,d<d$d%Z-e(d&d'd( Z.e(d)d*d+ Z/e(d,e(d-d.d/ Z0e(d0d1d2 Z1d3d4 Z2d5d6 Z3d7d8 Z4ej"d9ej5Z6d:d; Z7dS )=a  
    weasyprint.html
    ---------------

    Specific handling for some HTML elements, especially replaced elements.

    Replaced elements (eg. <img> elements) are rendered externally and
    behave as an atomic opaque box in CSS. In general, they may or may not
    have intrinsic dimensions. But the only replaced elements currently
    supported in WeasyPrint are images with intrinsic dimensions.

    :copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
    :license: BSD, see LICENSE for details.

    )divisionunicode_literalsN   )get_child_text)boxes)get_url_attribute)xrangeurljoin)LOGGER)CSSfrozencsszhtml5_ua.css)filenamez 	
z[^%s]+c             C   s   | j dj jdS )a  Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.

    :param string: An Unicode string.
    :returns: A new Unicode string.

    This is used for `ASCII case-insensitive
    <http://whatwg.org/C#ascii-case-insensitive>`_ matching.

    This is different from the :meth:`~py:str.lower` method of Unicode strings
    which also affect non-ASCII characters,
    sometimes mapping them into the ASCII range:

    >>> keyword = u'Bac\N{KELVIN SIGN}ground'
    >>> assert keyword.lower() == u'background'
    >>> assert ascii_lower(keyword) != keyword.lower()
    >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'

    utf8)encodelowerdecode)string r   O/var/www/html/enquirykeeper_venv/lib/python3.6/site-packages/weasyprint/html.pyascii_lower3   s    r   c                s$   t  fddtj| jddD S )z
    Return whether the given element has a ``rel`` attribute with the
    given link type.

    :param link_type: Must be a lower-case string.

    c             3   s   | ]}t | kV  qd S )N)r   ).0token)	link_typer   r   	<genexpr>R   s    z(element_has_link_type.<locals>.<genexpr>rel )anyHTML_SPACE_SEPARATED_TOKENS_REfindallget)elementr   r   )r   r   element_has_link_typeJ   s    r"   c             C   s&   |j tkrt| j | ||S |gS dS )zbHandle HTML elements that need special care.

    :returns: a (possibly empty) list of boxes.
    N)Zelement_tagHTML_HANDLERStag)r!   boxget_image_from_urir   r   r   handle_elementZ   s    
r'   c                s    fdd}|S )zDReturn a decorator registering a function handling ``tag`` elements.c                s   | t  < | S )z;Decorator registering a function handling ``tag`` elements.)r#   )function)r$   r   r   	decoratorg   s    zhandler.<locals>.decoratorr   )r$   r)   r   )r$   r   handlere   s    r*   c             C   s.   |j jdkrtj}ntj}|| j| j|j |S )zWrap an image in a replaced box.

    That box is either block-level or inline-level, depending on what the
    element should be.

    block	list-itemtable)r+   r,   r-   )styleZdisplayr   ZBlockReplacedBoxZInlineReplacedBoxr$   
sourceline)r!   r%   imagetype_r   r   r   make_replaced_boxn   s    r2   Zimgc             C   s   t | d}| jd}|rp||}|dk	r6t| ||gS |rR|jtjj||ggS |dkr^g S |dksjtg S n |r|jtjj||ggS g S dS )zHandle ``<img>`` elements, return either an image or the alt-text.

    See: http://www.w3.org/TR/html5/embedded-content-1.html#the-img-element

    srcaltNr   )r   r    r2   Zcopy_with_childrenr   ZTextBoxanonymous_fromAssertionError)r!   r%   r&   r3   r4   r0   r   r   r   
handle_img}   s"    

r7   Zembedc             C   sB   t | d}| jddj }|r>|||}|dk	r>t| ||gS g S )zHandle ``<embed>`` elements, return either an image or nothing.

    See: https://www.w3.org/TR/html5/embedded-content-0.html#the-embed-element

    r3   typer   N)r   r    stripr2   )r!   r%   r&   r3   r1   r0   r   r   r   handle_embed   s    

r:   objectc             C   sD   t | d}| jddj }|r>|||}|dk	r>t| ||gS |gS )zHandle ``<object>`` elements, return either an image or the fallback
    content.

    See: https://www.w3.org/TR/html5/embedded-content-0.html#the-object-element

    datar8   r   N)r   r    r9   r2   )r!   r%   r&   r<   r1   r0   r   r   r   handle_object   s    

r=   c             C   sN   | j |dj }|rJyt|}W n tk
r4   Y nX ||krJt||| dS )zLRead an integer attribute from the HTML element and set it on the box.

    r   N)r    r9   int
ValueErrorsetattr)r!   r%   nameminimumvaluer   r   r   integer_attribute   s    rD   Zcolgroupc                sR   t  tjrLtdd | D r&d _n&t|  d  fddt jD  _ gS )zHandle the ``span`` attribute.c             s   s   | ]}|j d kV  qdS )colN)r$   )r   childr   r   r   r      s    z"handle_colgroup.<locals>.<genexpr>Nspanc             3   s   | ]}t jj g V  qd S )N)r   TableColumnBoxr5   )r   _i)r%   r   r   r      s   )
isinstancer   ZTableColumnGroupBoxr   rG   rD   r   children)r!   r%   _get_image_from_urir   )r%   r   handle_colgroup   s    
rM   rE   c                s@   t  tjr:t|  d  jdkr: fddt jD S  gS )zHandle the ``span`` attribute.rG   r   c                s   g | ]} j  qS r   )copy)r   rI   )r%   r   r   
<listcomp>   s    zhandle_col.<locals>.<listcomp>)rJ   r   rH   rD   rG   r   )r!   r%   rL   r   )r%   r   
handle_col   s
    
rP   thtdc             C   s.   t |tjr(t| |d t| |ddd |gS )z/Handle the ``colspan``, ``rowspan`` attributes.ZcolspanZrowspanr   )rB   )rJ   r   ZTableCellBoxrD   )r!   r%   rL   r   r   r   	handle_td   s    rS   ac             C   s   t | d|_|gS )zHandle the ``rel`` attribute.
attachment)r"   is_attachment)r!   r%   rL   r   r   r   handle_a   s    rW   c             C   s>   t t| jdd}|dk	r:|jddj }|r:t||S |S )zkReturn the base URL for the document.

    See http://www.w3.org/TR/html5/urls.html#document-base-url

    baseNhrefr   )nextiterr    r9   r	   )html_documentZfallback_base_urlZfirst_base_elementrY   r   r   r   find_base_url  s    
r]   c          
   C   s  d}d}d}g }g }d}d}g }xh| j dddD ]T}	|	jdkrT|dkrTt|	}q2|	jdkr:t|	jdd}
|	jdd}|
dkrxtt|jd	D ]}||kr|j| qW n|
d
kr|j| nr|
dkr|dkr|}n\|
dkr|dkr|}nF|
dkr|dkrt	|
|	j
|}n"|
dkr|dkrt	|
|	j
|}q2|	jdkr2t|	dr2t|	d}|	jdd}|dkrztjd q2|j||f q2W t||||||||dS )a  
    Relevant specs:

    http://www.whatwg.org/html#the-title-element
    http://www.whatwg.org/html#standard-metadata-names
    http://wiki.whatwg.org/wiki/MetaExtensions
    http://microformats.org/wiki/existing-rel-values#HTML5_link_type_extensions

    NtitlemetalinkrA   r   contentkeywords,authordescription	generatorzdcterms.createdzdcterms.modifiedrU   rY   z'Missing href in <link rel="attachment">)r^   re   rf   rb   authorscreatedmodifiedattachments)r[   r$   r   r   r    mapstrip_whitespacesplitappendparse_w3c_dater/   r"   r   r
   warningdict)r\   r^   re   rf   rb   rg   rh   ri   rj   r!   rA   ra   keywordurlr   r   r   get_html_metadata  sN    



rt   c             C   s
   | j dS )zUse the HTML definition of "space character",
    not all Unicode Whitespace.

    http://www.whatwg.org/html#strip-leading-and-trailing-whitespace
    http://www.whatwg.org/html#space-character

    z 	
)r9   )r   r   r   r   rl   C  s    rl   aG  
    ^
    [ 	
]*
    (?P<year>\d\d\d\d)
    (?:
        -(?P<month>0\d|1[012])
        (?:
            -(?P<day>[012]\d|3[01])
            (?:
                T(?P<hour>[01]\d|2[0-3])
                :(?P<minute>[0-5]\d)
                (?:
                    :(?P<second>[0-5]\d)
                    (?:\.\d+)?  # Second fraction, ignored
                )?
                (?:
                    Z |  # UTC
                    (?P<tz_hour>[+-](?:[01]\d|2[0-3]))
                    :(?P<tz_minute>[0-5]\d)
                )
            )?
        )?
    )?
    [ 	
]*
    $
c             C   s"   t j|r|S tjd| || dS )z"http://www.w3.org/TR/NOTE-datetimez,Invalid date in <meta name="%s"> line %i: %rN)W3C_DATE_REmatchr
   rp   )Z	meta_nameZsource_liner   r   r   r   ro   q  s    
ro   )r   )8__doc__
__future__r   r   os.pathosloggingsysrer   r   Zformatting_structurer   Zurlsr   compatr   r	   loggerr
   r   r   levelsetLevelERRORhasattrpathdirname
executableroot__file__joinZHTML5_UA_STYLESHEETZHTML_WHITESPACEcompiler   r   r"   r#   r'   r*   r2   r7   r:   r=   rD   rM   rP   rS   rW   r]   rt   rl   VERBOSEru   ro   r   r   r   r   <module>   sR   

	"
4
