3
(h'                 @   sT  d Z dddgZddlZddlmZmZ y0ddlmZmZm	Z	m
Z
mZmZmZ eefZW n8 ek
r   ddlmZmZm	Z	m
Z
mZmZ eZY nX dd	dZdd
dZdddZdd ZejdejjZG dd dZdd Zdd ZyddlmZ W n" ek
r   ddlmZ Y nX ejdjZye  W n e!k
rF   e"Z Y nX dd Z#dS )z5External interface to the BeautifulSoup HTML parser.

fromstringparseconvert_tree    N)etreehtml)BeautifulSoupTagCommentProcessingInstructionNavigableStringDeclarationDoctype)r   r   r	   r
   r   r   c             K   s   t | ||f|S )a  Parse a string of HTML data into an Element tree using the
    BeautifulSoup parser.

    Returns the root ``<html>`` Element of the tree.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    )_parse)databeautifulsoupmakeelementbsargs r   6/tmp/pip-install-q3hcpn_q/lxml/lxml/html/soupparser.pyr      s    c             K   s,   t | dst| } t| ||f|}tj|S )aY  Parse a file into an ElemenTree using the BeautifulSoup parser.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    read)hasattropenr   r   ElementTree)filer   r   r   rootr   r   r   r   $   s    	
c             C   s.   t | |}|j }x|D ]}|j| qW |S )a  Convert a BeautifulSoup tree to a list of Element trees.

    Returns a list instead of a single root Element to support
    HTML-like soup with more than one root element.

    You can pass a different Element factory through the `makeelement`
    keyword.
    )_convert_treegetchildrenremove)beautiful_soup_treer   r   childrenchildr   r   r   r   3   s
    	

c             K   s   |d krt }t|dr&d|kr&d|d< t|dr@d|kr@d|d< || f|}t||}t|dkrx|d jdkrx|d S d|_|S )	NZHTML_ENTITIESZconvertEntitiesr   ZDEFAULT_BUILDER_FEATURESfeatureszhtml.parser   r   )r   r   r   lentag)sourcer   r   r   treer   r   r   r   r   E   s    


r   z`(?:\s|[<!])*DOCTYPE\s*HTML(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?(?:\s+(\'[^\']*\'|"[^"]*"))?c               @   s   e Zd Zdd Zdd ZdS )
_PseudoTagc             C   s   d| _ g | _|| _d S )Nr   )nameattrscontents)selfr*   r   r   r   __init__`   s    z_PseudoTag.__init__c             C   s
   | j j S )N)r*   __iter__)r+   r   r   r   r-   e   s    z_PseudoTag.__iter__N)__name__
__module____qualname__r,   r-   r   r   r   r   r'   ^   s   r'   c             C   s  |d krt jj}d  }}d  }}xft| D ]Z\}}t|trn|d krH|}|}|d kr|jr|jj dkr|}q*|d kr*t|tr*|}q*W |d krg  }}	| j	}
n2| j	d | }| j	||d  }
| j	|d d  }	|d k	r|
j
|}|
d | |j	 |
|d d   |_	nt|
}t|}||}|}x2t|D ]&}||}|d k	r2|j| |}q2W |}x.|	D ]&}||}|d k	rf|j| |}qfW |d k	ry|j }W n tk
r   |j}Y nX t|}|sn>|j \}}|j j}|o|dd |_|o|dd |_|S )Nr   r"   r1   )r   html_parserr   	enumerate
isinstancer   r(   lower_DECLARATION_OR_DOCTYPEr*   indexr'   _init_node_convertersreversedZaddpreviousZaddnextZoutput_readyAttributeErrorstring_parse_doctype_declarationgroupsZgetroottreedocinfo	public_idZ
system_url)r   r   Zfirst_element_idxZlast_element_idxZ	html_rootZdeclarationieZpre_rootZ	post_rootrootsconvert_nodeZres_rootprevZ	convertedZdoctype_stringmatchZexternal_idZsys_urir>   r   r   r   r   i   sd    


&








r   c                s   i g fdd}fdddfdd	dd d	d
  |t tfdd}|tdd }|tdd }|t fdd}S )Nc                 s    fdd}|S )Nc                s$   xD ]}|  |< j | qW | S )N)append)handlert)
convertersordered_node_typestypesr   r   add   s    
z5_init_node_converters.<locals>.converter.<locals>.addr   )rK   rL   )rI   rJ   )rK   r   	converter   s    z(_init_node_converters.<locals>.converterc                s$   xD ]}t | |r | S qW d S )N)r4   )noderH   )rI   rJ   r   r   find_best_converter   s    

z2_init_node_converters.<locals>.find_best_converterc                sP   y t |  }W n( tk
r8   |  } t | < Y nX |d krFd S || |S )N)typeKeyError)bs_nodeparentrG   )rI   rO   r   r   rC      s    z+_init_node_converters.<locals>.convert_nodec             S   sX   t | trFi }xD| j D ](\}}t |tr4dj|}t|||< qW ndd | D }|S )N c             S   s   i | ]\}}t ||qS r   )unescape).0kvr   r   r   
<dictcomp>   s    z<_init_node_converters.<locals>.map_attrs.<locals>.<dictcomp>)r4   dictitemslistjoinrU   )Zbs_attrsattribsrW   rX   r   r   r   	map_attrs   s    


z(_init_node_converters.<locals>.map_attrsc             S   s:   t | dkr| jpd| | _n| d jp*d| | d _d S )Nr    r"   r1   r1   )r#   texttail)rS   ra   r   r   r   append_text   s    z*_init_node_converters.<locals>.append_textc                s   | j }|d k	r2|r|nd }tj|| j|d}n|r>|ni }| j|d}xP| D ]H}yt| }W n tk
r~   Y nX |d k	rV||| qV || qVW |S )N)attrib)r)   r   
SubElementr(   rP   rQ   )rR   rS   r)   r^   resr    rG   )rC   rI   r   r_   r   r   convert_tag   s     

z*_init_node_converters.<locals>.convert_tagc             S   s    t j| }|d k	r|j| |S )N)r   ZHtmlCommentrF   )rR   rS   rf   r   r   r   convert_comment  s    

z._init_node_converters.<locals>.convert_commentc             S   s>   | j dr| d d } tj| jdd }|d k	r:|j| |S )N?r"   rT   r1   )endswithr   r
   splitrF   )rR   rS   rf   r   r   r   
convert_pi  s    

z)_init_node_converters.<locals>.convert_pic                s   |d k	r |t |  d S )N)rU   )rR   rS   )rc   r   r   convert_text  s    z+_init_node_converters.<locals>.convert_text)N)r   r'   r	   r
   r   )r   rM   rg   rh   rl   rm   r   )rc   rC   rI   rO   r   r_   rJ   r   r8      s    
r8   )name2codepointz&(\w+);c             C   s   | sdS dd }t || S )Nr`   c             S   s2   yt t| jd S  tk
r,   | jdS X d S )Nr"   r   )unichrrn   grouprQ   )mr   r   r   unescape_entity5  s    z!unescape.<locals>.unescape_entity)handle_entities)r;   rr   r   r   r   rU   1  s    rU   )NN)NN)N)$__doc____all__relxmlr   r   Zbs4r   r   r	   r
   r   r   r   r6   ImportErrorr   r   r   r   compile
IGNORECASErE   r<   r'   r   r8   html.entitiesrn   htmlentitydefssubrs   ro   	NameErrorchrrU   r   r   r   r   <module>   s:   
$ 




Uc
