
    h              	          U d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ erdd	lmZ ddlZddlZddlZddlZddlZddlZd!dZd"d
ddededdfdZ G d de      Zd
eddfdZ dZ!ee"d<   dZ#ee"d<   d#de$defdZ%d$de$defdZ&d%de$defdZ'd&de$ddfdZ(d'de$deddfdZ)e*d k(  r! eejV                  jY                                yy)(z=Diagnostic functions, mainly for use when doing tech support.MIT    N)BytesIO)
HTMLParser)BeautifulSoup__version__)builder_registry)AnyIOListOptionalTupleTYPE_CHECKING)_IncomingMarkupdatar   returnc           	         t        dt        z         t        dt        j                  z         g d}|D ]F  }t        j
                  D ]  }||j                  v s ' |j                  |       t        d|z         H d|v rM|j                  d       	 ddl	m
} t        d	d
j                  t        t        |j                              z         d|v r	 ddl}t        d|j                  z         t#        | d      r| j%                         } |D ]V  }t        d|z         d}	 t'        | |      }d}|r't        d|z         t        j/                                t        d       X y# t        $ r t        d       Y w xY w# t        $ r t        d       Y w xY w# t(        $ r% t        d|z         t+        j,                          Y w xY w)zDiagnostic suite for isolating common problems.

    :param data: Some markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.r   zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.r   NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   buildersr   removeappendr   r   joinmapstrLXML_VERSIONImportErrorr   hasattrr   r   	Exception	traceback	print_excprettify)	r   basic_parsersnamebuilderr   r   parsersuccesssoups	            t/var/labsstorage/home/kalaiyarasan.offl.2004/htdocs/course/socket/myenv/lib/python3.12/site-packages/bs4/diagnose.pydiagnoser3       s    
4{BD	,.7M '00 	Gw'''	   &NQUU Z(	D"*SXXc#u?Q?Q6R-SSU ]"	H.1E1EEG tVyy{ 4v=?	" 7DG 86AC4==?$x  	DBC	D  	HFG	H  	"3f<>!	"s6   ;E( F "F(E?>E?FF+G
	G
htmlkwargsc                 8   ddl m} |j                  dd      }t        | t              r| j                  d      } t        | t              st        |       } |j                  f||d|D ]-  \  }}t        |d|j                  dd|j                         / y	)
a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   recoverTutf8)r4   r7   z, z>4N)r   r   pop
isinstancer$   encoder
   r   	iterparser   tagtext)r   r4   r5   r   r7   readereventelements           r2   
lxml_tracerB   X   s     jjD)G${{6"dB)%//&WtWWPVW Dww{{GLLACD    c            	           e Zd ZdZdeddfdZ	 ddedeeeee   f      de	ddfd	Z
dded
e	ddfdZdeddfdZdeddfdZdeddfdZdeddfdZdeddfdZdeddfdZdeddfdZy)AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    sr   Nc                     t        |       y )N)r   )selfrF   s     r2   _pzAnnouncingParser._pw   s	    arC   r-   attrshandle_empty_elementc                 2    | j                  | d| d       y )N z STARTrI   )rH   r-   rJ   rK   s       r2   handle_starttagz AnnouncingParser.handle_starttagz   s     	4&%'(rC   check_already_closedc                 ,    | j                  d|z         y )Nz%s ENDrN   )rH   r-   rP   s      r2   handle_endtagzAnnouncingParser.handle_endtag   s    4 rC   r   c                 ,    | j                  d|z         y )Nz%s DATArN   rH   r   s     r2   handle_datazAnnouncingParser.handle_data       	D !rC   c                 ,    | j                  d|z         y )Nz
%s CHARREFrN   rH   r-   s     r2   handle_charrefzAnnouncingParser.handle_charref       t#$rC   c                 ,    | j                  d|z         y )Nz%s ENTITYREFrN   rX   s     r2   handle_entityrefz!AnnouncingParser.handle_entityref   s    %&rC   c                 ,    | j                  d|z         y )Nz
%s COMMENTrN   rT   s     r2   handle_commentzAnnouncingParser.handle_comment   rZ   rC   c                 ,    | j                  d|z         y )Nz%s DECLrN   rT   s     r2   handle_declzAnnouncingParser.handle_decl   rV   rC   c                 ,    | j                  d|z         y )Nz%s UNKNOWN-DECLrN   rT   s     r2   unknown_declzAnnouncingParser.unknown_decl   s    !D()rC   c                 ,    | j                  d|z         y )Nz%s PIrN   rT   s     r2   	handle_pizAnnouncingParser.handle_pi   s    $rC   T)__name__
__module____qualname____doc__r$   rI   r   r   r   boolrO   rR   rU   rY   r\   r^   r`   rb   rd    rC   r2   rE   rE   o   s    C D  &*	)) E#x},-.) #	)
 
)!# !T !T !" " "%3 %4 %'S 'T '%3 %4 %" " "* * * c  d  rC   rE   c                 :    t               }|j                  |        y)zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)rE   feed)r   r/   s     r2   htmlparser_tracern      s     F
KKrC   aeiou_vowelsbcdfghjklmnpqrstvwxyz_consonantslengthc                     d}t        |       D ]/  }|dz  dk(  rt        }nt        }|t        j                  |      z  }1 |S )z<Generate a random word-like string.

    :meta private:
        r   )rangerr   rp   randomchoice)rs   rF   its       r2   rwordr|      sK    
 	A6] q5A:AA	V]]1 HrC   c                 D    dj                  d t        |       D              S )z@Generate a random sentence-like string.

    :meta private:
    rM   c              3   Z   K   | ]#  }t        t        j                  d d             % yw)   	   N)r|   rx   randint).0rz   s     r2   	<genexpr>zrsentence.<locals>.<genexpr>   s      GAE&..A./Gs   )+)r"   rw   )rs   s    r2   	rsentencer      s    
 88GvGGGrC   num_elementsc           	         g d}g }t        |       D ]  }t        j                  dd      }|dk(  r*t        j                  |      }|j	                  d|z         H|dk(  r/|j	                  t        t        j                  dd                   ||dk(  st        j                  |      }|j	                  d|z          d	d
j                  |      z   dz   S )zDRandomly generate an invalid HTML document.

    :meta private:
    )pdivspanrz   bscripttabler      z<%s>   r   rv   z</%s>z<html>
z</html>)rw   rx   r   ry   r!   r   r"   )r   	tag_nameselementsrz   ry   tag_names         r2   rdocr      s    
 BIH<  01%Q;}}Y/HOOFX-.q[OOIfnnQ&:;<q[}}Y/HOOGh./0 dii))I55rC   c                    t        dt        z         t        |       }t        dt        |      z         dddgddfD ]Q  }d}	 t	        j                         }t        ||       t	        j                         }d}|s?t        d
|z
  fz         S ddl	m
} t	        j                         }|j                  |       t	        j                         }t        d||z
  z         ddl}|j                         }t	        j                         }|j                  |       t	        j                         }t        d||z
  z         y# t        $ r% t        d	|z         t        j                          Y w xY w)z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r   r4   r   r   FTr   z"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.Nz(Raw html5lib parsed the markup in %.2fs.)r   r   r   lentimer   r(   r)   r*   r   r   HTMLr   r   parse)	r   r   parser_namer0   ar   r   r   r/   s	            r2   benchmark_parsersr      s?   	>LND	@3t9LN 0*mL Q	"		A$,		AG 7;A:NNPQ 		A	JJt		A	1QU;=  "F		A
LL		A	5Q?A'  	"3kAC!	"s   6E  +E.-E.r/   c                    t        j                         }|j                  }t        |       }t	        t
        ||      }t        j                  d|||       t        j                  |      }|j                  d       |j                  dd       y)z7Use Python's profiler on a randomly generated document.)bs4r   r/   zbs4.BeautifulSoup(data, parser)
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler-   r   dictr   cProfilerunctxpstatsStats
sort_statsprint_stats)r   r/   
filehandlefilenamer   varsstatss          r2   profiler      sp    ,,.JHDCd62DOO5tT8LLL"E	\"	or*rC   __main__)r   r   r   Nre   )   )r   )i  )順 )r   r   )-ri   __license__r   ior   html.parserr   r   r   r   bs4.builderr   typingr	   r
   r   r   r   r   bs4._typingr   r   rx   r   r   r)   r   r3   rj   rB   rE   r$   rn   rp   __annotations__rr   intr|   r   r   r   r   rf   stdinr   rk   rC   r2   <module>r      s7   C    " 
 * (  +      
5pD& Dd DS DT D.) z ) X	3 	4 	  *S *# c Hc H# H6s 6c 6, BC  BT  BF+# + + +  zSYY^^ rC   