
    hE                    2   d Z ddlmZ dZdgZddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z# erddl$m%Z% ddlm&Z& ddl'm(Z(m)Z)m*Z* dZ+e	ee,e,f   e,e,gdf   Z- G d dee      Z. G d de       Z/y)zCUse the HTMLParser library to parse HTML files that aren't too bad.    )annotationsMITHTMLParserTreeBuilder)
HTMLParser)AnyCallablecastDictIterableListOptionalTYPE_CHECKINGTupleTypeUnion)AttributeDictCDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup)BeautifulSoup)NavigableString)	_Encoding
_Encodings
_RawMarkupzhtml.parserNc                      e Zd ZU dZded<   dZded<   	 ed	 	 	 	 	 	 	 ddZd
ed	<   ded<   ded<   ddZ	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 ddZ	dddZ
d dZd!dZd!dZd dZd dZd dZd dZy)"BeautifulSoupHTMLParserreplacestrREPLACEignoreIGNOREon_duplicate_attributesoupr    r-   &Union[str, _DuplicateAttributeHandler]c                   || _         || _        |j                  j                  | _        t	        j
                  | g|i | g | _        | j                          y N)r.   r-   builderattribute_dict_classr   __init__already_closed_empty_element_initialize_xml_detector)selfr.   r-   argskwargss        /var/labsstorage/home/kalaiyarasan.offl.2004/htdocs/course/socket/myenv/lib/python3.12/site-packages/bs4/builder/_htmlparser.pyr4   z BeautifulSoupHTMLParser.__init__T   sS     	&<#$(LL$E$E!D24262 -/)%%'    z	List[str]r5   c                    t        |      r1   r   )r7   messages     r:   errorzBeautifulSoupHTMLParser.erroro   s     #7++r;   c                N    | j                  ||d       | j                  |       y)zHandle an incoming empty-element tag.

        html.parser only calls this method when the markup looks like
        <tag/>.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r7   nameattrss      r:   handle_startendtagz*BeautifulSoupHTMLParser.handle_startendtag   s'     	T5uE4 r;   c                V   | j                         }|D ]_  \  }}|d}||v rM| j                  }|| j                  k(  r*|d| j                  fv r|||<   @t	        t
        |      } ||||       [|||<   a | j                  j                  j                  r| j                         \  }}	ndx}}	| j                  j                  |dd|||	      }
|
r<|
j                  r0|r.| j                  |d       | j                  j                  |       | j                  | j!                  |       yy)zHandle an opening tag, e.g. '<tag>'

        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N )
sourceline	sourceposF)check_already_closed)r3   r-   r+   r)   r	   _DuplicateAttributeHandlerr.   r2   store_line_numbersgetposrA   is_empty_elementrB   r5   append_root_tag_name_root_tag_encountered)r7   rC   rD   r@   	attr_dictkeyvalueon_duperH   rI   tags              r:   rA   z'BeautifulSoupHTMLParser.handle_starttag   s@    $(#<#<#>	 	'JC }i 55dkk)t|| 44%*IcN"#=wGGIsE2!&	#%	', 99//$(KKM!J	%))Jii''$iJ) ( 
 3'',@ t%@ --44T:&&&t, 'r;   c                    |r*|| j                   v r| j                   j                  |       y| j                  j                  |       y)zHandle a closing tag, e.g. '</tag>'

        :param name: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r5   remover.   rB   )r7   rC   rJ   s      r:   rB   z%BeautifulSoupHTMLParser.handle_endtag   s<      DD,M,M$M
 --44T:II##D)r;   c                :    | j                   j                  |       y)z4Handle some textual data that shows up between tags.N)r.   handle_datar7   datas     r:   rZ   z#BeautifulSoupHTMLParser.handle_data   s    		d#r;   c                   |j                  d      rt        |j                  d      d      }n8|j                  d      rt        |j                  d      d      }nt        |      }d}|dk  r<| j                  j                  dfD ]!  }|s	 t        |g      j                  |      }# |s	 t        |      }|xs d}| j                  |       y# t        $ r Y Uw xY w# t        t        f$ r Y 8w xY w)zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XN   zwindows-1252u   �)
startswithintlstripr.   original_encoding	bytearraydecodeUnicodeDecodeErrorchr
ValueErrorOverflowErrorrZ   )r7   rC   	real_namer\   encodings        r:   handle_charrefz&BeautifulSoupHTMLParser.handle_charref   s     ??3DKK,b1I__S!DKK,b1ID	Is? "YY88.I $i[188BD	 9~ 22 * 
 . s$   C,C 	CCC0/C0c                x    t         j                  j                  |      }||}nd|z  }| j                  |       y)zHandle a named entity reference by converting it to the
        corresponding Unicode character(s) and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetrZ   )r7   rC   	characterr\   s       r:   handle_entityrefz(BeautifulSoupHTMLParser.handle_entityref
  s>     '??CCDI	 D 4<Dr;   c                    | j                   j                          | j                   j                  |       | j                   j                  t               y)zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r.   endDatarZ   r   r[   s     r:   handle_commentz&BeautifulSoupHTMLParser.handle_comment  s8    
 					d#		'"r;   c                    | j                   j                          |t        d      d }| j                   j                  |       | j                   j                  t               y)zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r.   ru   lenrZ   r   r[   s     r:   handle_declz#BeautifulSoupHTMLParser.handle_decl&  sI    
 			C
O%&		d#		'"r;   c                   |j                         j                  d      rt        }|t        d      d }nt        }| j
                  j                          | j
                  j                  |       | j
                  j                  |       y)z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperrb   r   rx   r   r.   ru   rZ   )r7   r\   clss      r:   unknown_declz$BeautifulSoupHTMLParser.unknown_decl0  sf     ::<""8,CH(DC				d#		#r;   c                    | j                   j                          | j                   j                  |       | j                  |       | j                   j                  t               y)z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r.   ru   rZ   _document_might_be_xmlr   r[   s     r:   	handle_piz!BeautifulSoupHTMLParser.handle_pi?  sG    
 					d###D)		/0r;   N)r.   r    r8   r   r-   r/   r9   r   )r=   r(   returnNone)rC   r(   rD   List[Tuple[str, Optional[str]]]r   r   )T)rC   r(   rD   r   r@   boolr   r   )rC   r(   rJ   r   r   r   )r\   r(   r   r   )rC   r(   r   r   )__name__
__module____qualname__r)   __annotations__r+   r4   r>   rE   rA   rB   rZ   rn   rs   rv   ry   r}   r    r;   r:   r&   r&   =   s     GS FC$ JQ	(( ( !G	(
 (. CB"++
, !! ?!	!& &*	<-<- /<- #	<-
 
<-|*$$&P&##1r;   r&   c                       e Zd ZU dZdZded<   dZded<   eZded<   ee	e
gZd	ed
<   ded<   dZded<   	 	 d	 	 	 	 	 d fdZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZ xZS )r   zA Beautiful soup `bs4.builder.TreeBuilder` that uses the
    :py:class:`html.parser.HTMLParser` parser, found in the Python
    standard library.

    Fr   is_xmlT	picklabler(   NAMEzIterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]]parser_argsTRACKS_LINE_NUMBERSc                    t               }dD ]  }||v s|j                  |      }|||<    t        t        |   di | |xs g }|xs i }|j                  |       d|d<   ||f| _        y)a  Constructor.

        :param parser_args: Positional arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        r,   Fconvert_charrefsNr   )dictpopsuperr   r4   updater   )r7   r   parser_kwargsr9   extra_parser_kwargsargrT   	__class__s          r:   r4   zHTMLParserTreeBuilder.__init__[  s    $ #f. 	1Cf}

3+0#C(	1 	#T3=f=!'R%+01,1()'7r;   c              #  @  K   t        |t              r	|dddf yg }|r|j                  |       g }|r|j                  |       t        |||d|      }|j                  t        d      |j                  |j                  |j                  |j                  f yw)a2  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples: (markup, encoding, declared encoding,
             has undergone character replacement)

            Each 4-tuple represents a strategy for parsing the document.
            This TreeBuilder uses Unicode, Dammit to convert the markup
            into Unicode, so the ``markup`` element of the tuple will
            always be a string.
        NFT)known_definite_encodingsuser_encodingsis_htmlexclude_encodingszPCould not convert input to Unicode, and html.parser will not accept bytestrings.)	
isinstancer(   rO   r   unicode_markupr   re   declared_html_encodingcontains_replacement_characters)r7   markupuser_specified_encodingdocument_declared_encodingr   r   r   dammits           r:   prepare_markupz$HTMLParserTreeBuilder.prepare_markupy  s     2 fc"4u-- 57 "
 %++,CD*,% !!"<=%=)/
   ( 'b 
 %%((--66	 s   BBc                   | j                   \  }}t        |t              sJ | j                  J t	        | j                  g|i |}	 |j                  |       |j                          g |_	        y # t        $ r}t        |      d }~ww xY wr1   )
r   r   r(   r.   r&   feedcloseAssertionErrorr   r5   )r7   r   r8   r9   parseres         r:   r   zHTMLParserTreeBuilder.feed  s    ''f &#&&&
 yy$$$(DTDVD	*KKLLN /1+  	* 'q))		*s   
!A3 3	B<BB)NN)r   zOptional[Iterable[Any]]r   zOptional[Dict[str, Any]]r9   r   )NNN)
r   r$   r   Optional[_Encoding]r   r   r   zOptional[_Encodings]r   zDIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]])r   r$   r   r   )r   r   r   __doc__r   r   r   
HTMLPARSERr   r   r   r   r   r4   r   r   __classcell__)r   s   @r:   r   r   J  s     FDItD##T62Hm255 !%$ 04268,8 08 	8B 8<:>26FF "5F %8	F
 0F 
NFP1r;   )0r   
__future__r   __license____all__html.parserr   typingr   r   r	   r
   r   r   r   r   r   r   r   bs4.elementr   r   r   r   r   r   
bs4.dammitr   r   bs4.builderr   r   r   r   bs4.exceptionsr   bs4r    r!   bs4._typingr"   r#   r$   r   r(   rK   r&   r   r   r;   r:   <module>r      s    I "   #     9  0!+  
%tCH~sC&@$&FG J1j*@ J1ZP1O P1r;   