eaiovnaovbqoebvqoeavibavo 3 #6]@s dZdZddlZddlmZddlmZddlZddlmZm Z ddl m Z ddl Z ddl Z ddlZddlZddlZddlZddlZddlZdd Zd#d d ZGd ddeZddZdZdZd$ddZd%ddZd&ddZd'ddZd(d d!Zed"kreejj dS))z=Diagnostic functions, mainly for use when doing tech support.ZMITN)StringIO) HTMLParser) BeautifulSoup __version__)builder_registryc ;CsXtdttdtjdddg}x>|D]6}x0tjD]}||jkr6Pq6W|j|td|q*Wd|kr|jdy*dd l m }td d j t t |jWn*tk r}ztd WYd d }~XnXd|krydd l}td|jWn,tk r}ztdWYd d }~XnXt|dr4|j}n|jdsL|jdrdtd|tdd Sy:tjj|rtd|t|}|j}Wd QRXWntk rYnXtx|D]}td|d} yt||d} d} Wn8tk r"}ztd|tjWYd d }~XnX| rBtd|t| jtddqWd S)z/Diagnostic suite for isolating common problems.z'Diagnostic running on Beautiful Soup %szPython version %sz html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr)etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file.z#Trying to parse your markup with %sF)featuresTz%s could not parse the markup.z#Here's what %s did with the markup:-P)printrsysversionrZbuildersr removeappendrr joinmapstrZ LXML_VERSION ImportErrorrhasattrr startswithospathexistsopen ValueErrorr Exception traceback print_excZprettify) dataZ basic_parsersnameZbuilderr erfpparsersuccesssoupr)/usr/lib/python3.6/diagnose.pydiagnosesj                     r+TcKsNddlm}x<|jt|fd|i|D]\}}td||j|jfq(WdS)zPrint out the lxml events that occur during parsing. This lets you see how lxml parses a document when no Beautiful Soup code is running. r)r htmlz %s, %4s, %sN)rr Z iterparserrtagtext)r"r,kwargsr Zeventelementr)r)r* lxml_traceZs $r1c@s`eZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ ddZ dS)AnnouncingParserz?Announces HTMLParser parse events, without doing anything else.cCs t|dS)N)r)selfsr)r)r*_pgszAnnouncingParser._pcCs|jd|dS)Nz%s START)r5)r3r#Zattrsr)r)r*handle_starttagjsz AnnouncingParser.handle_starttagcCs|jd|dS)Nz%s END)r5)r3r#r)r)r* handle_endtagmszAnnouncingParser.handle_endtagcCs|jd|dS)Nz%s DATA)r5)r3r"r)r)r* handle_datapszAnnouncingParser.handle_datacCs|jd|dS)Nz %s CHARREF)r5)r3r#r)r)r*handle_charrefsszAnnouncingParser.handle_charrefcCs|jd|dS)Nz %s ENTITYREF)r5)r3r#r)r)r*handle_entityrefvsz!AnnouncingParser.handle_entityrefcCs|jd|dS)Nz %s COMMENT)r5)r3r"r)r)r*handle_commentyszAnnouncingParser.handle_commentcCs|jd|dS)Nz%s DECL)r5)r3r"r)r)r* handle_decl|szAnnouncingParser.handle_declcCs|jd|dS)Nz%s UNKNOWN-DECL)r5)r3r"r)r)r* unknown_declszAnnouncingParser.unknown_declcCs|jd|dS)Nz%s PI)r5)r3r"r)r)r* handle_piszAnnouncingParser.handle_piN)__name__ __module__ __qualname____doc__r5r6r7r8r9r:r;r<r=r>r)r)r)r*r2dsr2cCst}|j|dS)zPrint out the HTMLParser events that occur during parsing. This lets you see how HTMLParser parses a document when no Beautiful Soup code is running. N)r2Zfeed)r"r&r)r)r*htmlparser_tracesrCZaeiouZbcdfghjklmnpqrstvwxyzcCs>d}x4t|D](}|ddkr$t}nt}|tj|7}qW|S)z#Generate a random word-like string.r)range _consonants_vowelsrandomchoice)lengthr4itr)r)r*rwords rOcCsdjddt|DS)z'Generate a random sentence-like string. css|]}ttjddVqdS)rP N)rOrJrandint).0rMr)r)r* szrsentence..)rrG)rLr)r)r* rsentencesrVcCsdddddddg}g}x~t|D]r}tjdd }|dkrRtj|}|jd |q |d krr|jttjd d q |d kr tj|}|jd|q Wddj|dS)z+Randomly generate an invalid HTML document.pZdivspanrMbZscripttablerz<%s>rPrFzz z)rGrJrSrKrrVr) num_elementsZ tag_nameselementsrMrKZtag_namer)r)r*rdocs   ra順c Cs(tdtt|}tdt|xdddgddgD]z}d}y"tj}t||}tj}d}Wn6tk r}ztd |tjWYd d }~XnX|r6td |||fq6Wd d l m }tj}|j |tj}td||d d l } | j }tj}|j|tj}td||d S)z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).rr,rz html.parserFTz%s could not parse the markup.Nz"BS4+%s parsed the markup in %.2fs.r)r z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)rrralentimerrr r!rr ZHTMLrrparse) r_r"r&r'ar(rZr$r rr)r)r*benchmark_parserss4      rgrcCsXtj}|j}t|}tt||d}tjd|||tj |}|j d|j dddS)N)bs4r"r&zbs4.BeautifulSoup(data, parser)Z cumulativez _html5lib|bs42) tempfileZNamedTemporaryFiler#radictrhcProfileZrunctxpstatsZStatsZ sort_statsZ print_stats)r_r&Z filehandlefilenamer"varsZstatsr)r)r*profiles  rp__main__)T)rD)rP)rW)rb)rbr)!rBZ __license__rliorZ html.parserrrhrrZ bs4.builderrrrmrJrjrdr rr+r1r2rCrIrHrOrVrargrpr?stdinr r)r)r)r*s8   C !