PNG  IHDRX cHRMz&u0`:pQ<bKGD pHYsodtIME MeqIDATxw]Wug^Qd˶ 6`!N:!@xI~)%7%@Bh&`lnjVF29gΨ4E$|>cɚ{gk= %,a KX%,a KX%,a KX%,a KX%,a KX%,a KX%, b` ǟzeאfp]<!SJmɤY޲ڿ,%c ~ع9VH.!Ͳz&QynֺTkRR.BLHi٪:l;@(!MԴ=žI,:o&N'Kù\vRmJ雵֫AWic H@" !: Cé||]k-Ha oݜ:y F())u]aG7*JV@J415p=sZH!=!DRʯvɱh~V\}v/GKY$n]"X"}t@ xS76^[bw4dsce)2dU0 CkMa-U5tvLƀ~mlMwfGE/-]7XAƟ`׮g ewxwC4\[~7@O-Q( a*XGƒ{ ՟}$_y3tĐƤatgvێi|K=uVyrŲlLӪuܿzwk$m87k( `múcE)"@rK( z4$D; 2kW=Xb$V[Ru819קR~qloѱDyįݎ*mxw]y5e4K@ЃI0A D@"BDk_)N\8͜9dz"fK0zɿvM /.:2O{ Nb=M=7>??Zuo32 DLD@D| &+֎C #B8ַ`bOb $D#ͮҪtx]%`ES`Ru[=¾!@Od37LJ0!OIR4m]GZRJu$‡c=%~s@6SKy?CeIh:[vR@Lh | (BhAMy=݃  G"'wzn޺~8ԽSh ~T*A:xR[ܹ?X[uKL_=fDȊ؂p0}7=D$Ekq!/t.*2ʼnDbŞ}DijYaȲ(""6HA;:LzxQ‘(SQQ}*PL*fc\s `/d'QXW, e`#kPGZuŞuO{{wm[&NBTiiI0bukcA9<4@SӊH*؎4U/'2U5.(9JuDfrޱtycU%j(:RUbArLֺN)udA':uGQN"-"Is.*+k@ `Ojs@yU/ H:l;@yyTn}_yw!VkRJ4P)~y#)r,D =ě"Q]ci'%HI4ZL0"MJy 8A{ aN<8D"1#IJi >XjX֔#@>-{vN!8tRݻ^)N_╗FJEk]CT՟ YP:_|H1@ CBk]yKYp|og?*dGvzنzӴzjֺNkC~AbZƷ`.H)=!QͷVTT(| u78y֮}|[8-Vjp%2JPk[}ԉaH8Wpqhwr:vWª<}l77_~{s۴V+RCģ%WRZ\AqHifɤL36: #F:p]Bq/z{0CU6ݳEv_^k7'>sq*+kH%a`0ԣisqにtү04gVgW΂iJiS'3w.w}l6MC2uԯ|>JF5`fV5m`Y**Db1FKNttu]4ccsQNnex/87+}xaUW9y>ͯ骵G{䩓Գ3+vU}~jJ.NFRD7<aJDB1#ҳgSb,+CS?/ VG J?|?,2#M9}B)MiE+G`-wo߫V`fio(}S^4e~V4bHOYb"b#E)dda:'?}׮4繏`{7Z"uny-?ǹ;0MKx{:_pÚmFמ:F " .LFQLG)Q8qN q¯¯3wOvxDb\. BKD9_NN &L:4D{mm o^tֽ:q!ƥ}K+<"m78N< ywsard5+вz~mnG)=}lYݧNj'QJS{S :UYS-952?&O-:W}(!6Mk4+>A>j+i|<<|;ر^߉=HE|V#F)Emm#}/"y GII웻Jі94+v뾧xu~5C95~ūH>c@덉pʃ1/4-A2G%7>m;–Y,cyyaln" ?ƻ!ʪ<{~h~i y.zZB̃/,雋SiC/JFMmBH&&FAbϓO^tubbb_hZ{_QZ-sύodFgO(6]TJA˯#`۶ɟ( %$&+V'~hiYy>922 Wp74Zkq+Ovn錄c>8~GqܲcWꂎz@"1A.}T)uiW4="jJ2W7mU/N0gcqܗOO}?9/wìXžΏ0 >֩(V^Rh32!Hj5`;O28؇2#ݕf3 ?sJd8NJ@7O0 b־?lldщ̡&|9C.8RTWwxWy46ah嘦mh٤&l zCy!PY?: CJyв]dm4ǜҐR޻RլhX{FƯanшQI@x' ao(kUUuxW_Ñ줮[w8 FRJ(8˼)_mQ _!RJhm=!cVmm ?sFOnll6Qk}alY}; "baӌ~M0w,Ggw2W:G/k2%R,_=u`WU R.9T"v,<\Ik޽/2110Ӿxc0gyC&Ny޽JҢrV6N ``یeA16"J³+Rj*;BϜkZPJaÍ<Jyw:NP8/D$ 011z֊Ⱳ3ι֘k1V_"h!JPIΣ'ɜ* aEAd:ݺ>y<}Lp&PlRfTb1]o .2EW\ͮ]38؋rTJsǏP@芎sF\> P^+dYJLbJ C-xϐn> ι$nj,;Ǖa FU *择|h ~izť3ᤓ`K'-f tL7JK+vf2)V'-sFuB4i+m+@My=O҈0"|Yxoj,3]:cо3 $#uŘ%Y"y죯LebqtҢVzq¼X)~>4L׶m~[1_k?kxֺQ`\ |ٛY4Ѯr!)N9{56(iNq}O()Em]=F&u?$HypWUeB\k]JɩSع9 Zqg4ZĊo oMcjZBU]B\TUd34ݝ~:7ڶSUsB0Z3srx 7`:5xcx !qZA!;%͚7&P H<WL!džOb5kF)xor^aujƍ7 Ǡ8/p^(L>ὴ-B,{ۇWzֺ^k]3\EE@7>lYBȝR.oHnXO/}sB|.i@ɥDB4tcm,@ӣgdtJ!lH$_vN166L__'Z)y&kH;:,Y7=J 9cG) V\hjiE;gya~%ks_nC~Er er)muuMg2;֫R)Md) ,¶ 2-wr#F7<-BBn~_(o=KO㭇[Xv eN_SMgSҐ BS헃D%g_N:/pe -wkG*9yYSZS.9cREL !k}<4_Xs#FmҶ:7R$i,fi!~' # !6/S6y@kZkZcX)%5V4P]VGYq%H1!;e1MV<!ϐHO021Dp= HMs~~a)ަu7G^];git!Frl]H/L$=AeUvZE4P\.,xi {-~p?2b#amXAHq)MWǾI_r`S Hz&|{ +ʖ_= (YS(_g0a03M`I&'9vl?MM+m~}*xT۲(fY*V4x@29s{DaY"toGNTO+xCAO~4Ϳ;p`Ѫ:>Ҵ7K 3}+0 387x\)a"/E>qpWB=1 ¨"MP(\xp߫́A3+J] n[ʼnӼaTbZUWb={~2ooKױӰp(CS\S筐R*JغV&&"FA}J>G֐p1ٸbk7 ŘH$JoN <8s^yk_[;gy-;߉DV{c B yce% aJhDȶ 2IdйIB/^n0tNtџdcKj4϶v~- CBcgqx9= PJ) dMsjpYB] GD4RDWX +h{y`,3ꊕ$`zj*N^TP4L:Iz9~6s) Ga:?y*J~?OrMwP\](21sZUD ?ܟQ5Q%ggW6QdO+\@ ̪X'GxN @'4=ˋ+*VwN ne_|(/BDfj5(Dq<*tNt1х!MV.C0 32b#?n0pzj#!38}޴o1KovCJ`8ŗ_"]] rDUy޲@ Ȗ-;xџ'^Y`zEd?0„ DAL18IS]VGq\4o !swV7ˣι%4FѮ~}6)OgS[~Q vcYbL!wG3 7띸*E Pql8=jT\꘿I(z<[6OrR8ºC~ډ]=rNl[g|v TMTղb-o}OrP^Q]<98S¤!k)G(Vkwyqyr޽Nv`N/e p/~NAOk \I:G6]4+K;j$R:Mi #*[AȚT,ʰ,;N{HZTGMoּy) ]%dHء9Պ䠬|<45,\=[bƟ8QXeB3- &dҩ^{>/86bXmZ]]yޚN[(WAHL$YAgDKp=5GHjU&99v簪C0vygln*P)9^͞}lMuiH!̍#DoRBn9l@ xA/_v=ȺT{7Yt2N"4!YN`ae >Q<XMydEB`VU}u]嫇.%e^ánE87Mu\t`cP=AD/G)sI"@MP;)]%fH9'FNsj1pVhY&9=0pfuJ&gޤx+k:!r˭wkl03׼Ku C &ѓYt{.O.zҏ z}/tf_wEp2gvX)GN#I ݭ߽v/ .& и(ZF{e"=V!{zW`, ]+LGz"(UJp|j( #V4, 8B 0 9OkRrlɱl94)'VH9=9W|>PS['G(*I1==C<5"Pg+x'K5EMd؞Af8lG ?D FtoB[je?{k3zQ vZ;%Ɠ,]E>KZ+T/ EJxOZ1i #T<@ I}q9/t'zi(EMqw`mYkU6;[t4DPeckeM;H}_g pMww}k6#H㶏+b8雡Sxp)&C $@'b,fPߑt$RbJ'vznuS ~8='72_`{q纶|Q)Xk}cPz9p7O:'|G~8wx(a 0QCko|0ASD>Ip=4Q, d|F8RcU"/KM opKle M3#i0c%<7׿p&pZq[TR"BpqauIp$ 8~Ĩ!8Սx\ւdT>>Z40ks7 z2IQ}ItԀ<-%S⍤};zIb$I 5K}Q͙D8UguWE$Jh )cu4N tZl+[]M4k8֦Zeq֮M7uIqG 1==tLtR,ƜSrHYt&QP윯Lg' I,3@P'}'R˪e/%-Auv·ñ\> vDJzlӾNv5:|K/Jb6KI9)Zh*ZAi`?S {aiVDԲuy5W7pWeQJk֤#5&V<̺@/GH?^τZL|IJNvI:'P=Ϛt"¨=cud S Q.Ki0 !cJy;LJR;G{BJy޺[^8fK6)=yʊ+(k|&xQ2`L?Ȓ2@Mf 0C`6-%pKpm')c$׻K5[J*U[/#hH!6acB JA _|uMvDyk y)6OPYjœ50VT K}cǻP[ $:]4MEA.y)|B)cf-A?(e|lɉ#P9V)[9t.EiQPDѠ3ϴ;E:+Օ t ȥ~|_N2,ZJLt4! %ա]u {+=p.GhNcŞQI?Nd'yeh n7zi1DB)1S | S#ًZs2|Ɛy$F SxeX{7Vl.Src3E℃Q>b6G ўYCmtկ~=K0f(=LrAS GN'ɹ9<\!a`)֕y[uՍ[09` 9 +57ts6}b4{oqd+J5fa/,97J#6yν99mRWxJyѡyu_TJc`~W>l^q#Ts#2"nD1%fS)FU w{ܯ R{ ˎ󅃏џDsZSQS;LV;7 Od1&1n$ N /.q3~eNɪ]E#oM~}v֯FڦwyZ=<<>Xo稯lfMFV6p02|*=tV!c~]fa5Y^Q_WN|Vs 0ҘދU97OI'N2'8N֭fgg-}V%y]U4 峧p*91#9U kCac_AFңĪy뚇Y_AiuYyTTYЗ-(!JFLt›17uTozc. S;7A&&<ԋ5y;Ro+:' *eYJkWR[@F %SHWP 72k4 qLd'J "zB6{AC0ƁA6U.'F3:Ȅ(9ΜL;D]m8ڥ9}dU "v!;*13Rg^fJyShyy5auA?ɩGHRjo^]׽S)Fm\toy 4WQS@mE#%5ʈfFYDX ~D5Ϡ9tE9So_aU4?Ѽm%&c{n>.KW1Tlb}:j uGi(JgcYj0qn+>) %\!4{LaJso d||u//P_y7iRJ߬nHOy) l+@$($VFIQ9%EeKʈU. ia&FY̒mZ=)+qqoQn >L!qCiDB;Y<%} OgBxB!ØuG)WG9y(Ą{_yesuZmZZey'Wg#C~1Cev@0D $a@˲(.._GimA:uyw֬%;@!JkQVM_Ow:P.s\)ot- ˹"`B,e CRtaEUP<0'}r3[>?G8xU~Nqu;Wm8\RIkբ^5@k+5(By'L&'gBJ3ݶ!/㮻w҅ yqPWUg<e"Qy*167΃sJ\oz]T*UQ<\FԎ`HaNmڜ6DysCask8wP8y9``GJ9lF\G g's Nn͵MLN֪u$| /|7=]O)6s !ĴAKh]q_ap $HH'\1jB^s\|- W1:=6lJBqjY^LsPk""`]w)󭃈,(HC ?䔨Y$Sʣ{4Z+0NvQkhol6C.婧/u]FwiVjZka&%6\F*Ny#8O,22+|Db~d ~Çwc N:FuuCe&oZ(l;@ee-+Wn`44AMK➝2BRՈt7g*1gph9N) *"TF*R(#'88pm=}X]u[i7bEc|\~EMn}P瘊J)K.0i1M6=7'_\kaZ(Th{K*GJyytw"IO-PWJk)..axӝ47"89Cc7ĐBiZx 7m!fy|ϿF9CbȩV 9V-՛^pV̌ɄS#Bv4-@]Vxt-Z, &ֺ*diؠ2^VXbs֔Ìl.jQ]Y[47gj=幽ex)A0ip׳ W2[ᎇhuE^~q흙L} #-b۸oFJ_QP3r6jr+"nfzRJTUqoaۍ /$d8Mx'ݓ= OՃ| )$2mcM*cЙj}f };n YG w0Ia!1Q.oYfr]DyISaP}"dIӗթO67jqR ҊƐƈaɤGG|h;t]䗖oSv|iZqX)oalv;۩meEJ\!8=$4QU4Xo&VEĊ YS^E#d,yX_> ۘ-e\ "Wa6uLĜZi`aD9.% w~mB(02G[6y.773a7 /=o7D)$Z 66 $bY^\CuP. (x'"J60׿Y:Oi;F{w佩b+\Yi`TDWa~|VH)8q/=9!g߆2Y)?ND)%?Ǐ`k/sn:;O299yB=a[Ng 3˲N}vLNy;*?x?~L&=xyӴ~}q{qE*IQ^^ͧvü{Huu=R|>JyUlZV, B~/YF!Y\u_ݼF{_C)LD]m {H 0ihhadd nUkf3oٺCvE\)QJi+֥@tDJkB$1!Đr0XQ|q?d2) Ӣ_}qv-< FŊ߫%roppVBwü~JidY4:}L6M7f٬F "?71<2#?Jyy4뷢<_a7_=Q E=S1И/9{+93֮E{ǂw{))?maÆm(uLE#lïZ  ~d];+]h j?!|$F}*"4(v'8s<ŏUkm7^7no1w2ؗ}TrͿEk>p'8OB7d7R(A 9.*Mi^ͳ; eeUwS+C)uO@ =Sy]` }l8^ZzRXj[^iUɺ$tj))<sbDJfg=Pk_{xaKo1:-uyG0M ԃ\0Lvuy'ȱc2Ji AdyVgVh!{]/&}}ċJ#%d !+87<;qN޼Nفl|1N:8ya  8}k¾+-$4FiZYÔXk*I&'@iI99)HSh4+2G:tGhS^繿 Kتm0 вDk}֚+QT4;sC}rՅE,8CX-e~>G&'9xpW,%Fh,Ry56Y–hW-(v_,? ; qrBk4-V7HQ;ˇ^Gv1JVV%,ik;D_W!))+BoS4QsTM;gt+ndS-~:11Sgv!0qRVh!"Ȋ(̦Yl.]PQWgٳE'`%W1{ndΗBk|Ž7ʒR~,lnoa&:ü$ 3<a[CBݮwt"o\ePJ=Hz"_c^Z.#ˆ*x z̝grY]tdkP*:97YľXyBkD4N.C_[;F9`8& !AMO c `@BA& Ost\-\NX+Xp < !bj3C&QL+*&kAQ=04}cC!9~820G'PC9xa!w&bo_1 Sw"ܱ V )Yl3+ס2KoXOx]"`^WOy :3GO0g;%Yv㐫(R/r (s } u B &FeYZh0y> =2<Ϟc/ -u= c&׭,.0"g"7 6T!vl#sc>{u/Oh Bᾈ)۴74]x7 gMӒ"d]U)}" v4co[ ɡs 5Gg=XR14?5A}D "b{0$L .\4y{_fe:kVS\\O]c^W52LSBDM! C3Dhr̦RtArx4&agaN3Cf<Ԉp4~ B'"1@.b_/xQ} _߃҉/gٓ2Qkqp0շpZ2fԫYz< 4L.Cyυι1t@鎫Fe sYfsF}^ V}N<_`p)alٶ "(XEAVZ<)2},:Ir*#m_YӼ R%a||EƼIJ,,+f"96r/}0jE/)s)cjW#w'Sʯ5<66lj$a~3Kʛy 2:cZ:Yh))+a߭K::N,Q F'qB]={.]h85C9cr=}*rk?vwV렵ٸW Rs%}rNAkDv|uFLBkWY YkX מ|)1!$#3%y?pF<@<Rr0}: }\J [5FRxY<9"SQdE(Q*Qʻ)q1E0B_O24[U'],lOb ]~WjHޏTQ5Syu wq)xnw8~)c 쫬gٲߠ H% k5dƝk> kEj,0% b"vi2Wس_CuK)K{n|>t{P1򨾜j>'kEkƗBg*H%'_aY6Bn!TL&ɌOb{c`'d^{t\i^[uɐ[}q0lM˕G:‚4kb祔c^:?bpg… +37stH:0}en6x˟%/<]BL&* 5&fK9Mq)/iyqtA%kUe[ڛKN]Ě^,"`/ s[EQQm?|XJ߅92m]G.E΃ח U*Cn.j_)Tѧj̿30ڇ!A0=͜ar I3$C^-9#|pk!)?7.x9 @OO;WƝZBFU keZ75F6Tc6"ZȚs2y/1 ʵ:u4xa`C>6Rb/Yм)^=+~uRd`/|_8xbB0?Ft||Z\##|K 0>>zxv8۴吅q 8ĥ)"6>~\8:qM}#͚'ĉ#p\׶ l#bA?)|g g9|8jP(cr,BwV (WliVxxᡁ@0Okn;ɥh$_ckCgriv}>=wGzβ KkBɛ[˪ !J)h&k2%07δt}!d<9;I&0wV/ v 0<H}L&8ob%Hi|޶o&h1L|u֦y~󛱢8fٲUsւ)0oiFx2}X[zVYr_;N(w]_4B@OanC?gĦx>мgx>ΛToZoOMp>40>V Oy V9iq!4 LN,ˢu{jsz]|"R޻&'ƚ{53ўFu(<٪9:΋]B;)B>1::8;~)Yt|0(pw2N%&X,URBK)3\zz&}ax4;ǟ(tLNg{N|Ǽ\G#C9g$^\}p?556]/RP.90 k,U8/u776s ʪ_01چ|\N 0VV*3H鴃J7iI!wG_^ypl}r*jɤSR 5QN@ iZ#1ٰy;_\3\BQQ x:WJv츟ٯ$"@6 S#qe딇(/P( Dy~TOϻ<4:-+F`0||;Xl-"uw$Цi󼕝mKʩorz"mϺ$F:~E'ҐvD\y?Rr8_He@ e~O,T.(ފR*cY^m|cVR[8 JҡSm!ΆԨb)RHG{?MpqrmN>߶Y)\p,d#xۆWY*,l6]v0h15M˙MS8+EdI='LBJIH7_9{Caз*Lq,dt >+~ّeʏ?xԕ4bBAŚjﵫ!'\Ը$WNvKO}ӽmSşذqsOy?\[,d@'73'j%kOe`1.g2"e =YIzS2|zŐƄa\U,dP;jhhhaxǶ?КZ՚.q SE+XrbOu%\GتX(H,N^~]JyEZQKceTQ]VGYqnah;y$cQahT&QPZ*iZ8UQQM.qo/T\7X"u?Mttl2Xq(IoW{R^ ux*SYJ! 4S.Jy~ BROS[V|žKNɛP(L6V^|cR7i7nZW1Fd@ Ara{詑|(T*dN]Ko?s=@ |_EvF]׍kR)eBJc" MUUbY6`~V޴dJKß&~'d3i WWWWWW
Current Directory: /usr/lib/python2.7/site-packages/pip/_vendor/html5lib
Viewing File: /usr/lib/python2.7/site-packages/pip/_vendor/html5lib/sanitizer.py
from __future__ import absolute_import, division, unicode_literals import re from xml.sax.saxutils import escape, unescape from six.moves import urllib_parse as urlparse from .tokenizer import HTMLTokenizer from .constants import tokenTypes content_type_rgx = re.compile(r''' ^ # Match a content type <application>/<type> (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ ''', re.VERBOSE) class HTMLSanitizerMixin(object): """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'] mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'none'] svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse', 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'] acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', 'background', 'balance', 'bgcolor', 'bgproperties', 'border', 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload', 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'step', 'style', 'summary', 'suppress', 'tabindex', 'target', 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', 'xml:lang'] mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth', 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize', 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', 'arabic-form', 'ascent', 'attributeName', 'attributeType', 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end', 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity', 'orient', 'origin', 'overline-position', 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness', 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan'] attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc', 'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base'] svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill', 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end', 'mask', 'stroke'] svg_allow_local_href = ['altGlyph', 'animate', 'animateColor', 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter', 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref', 'set', 'use'] acceptable_css_properties = ['azimuth', 'background-color', 'border-bottom-color', 'border-collapse', 'border-color', 'border-left-color', 'border-right-color', 'border-top-color', 'clear', 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', 'white-space', 'width'] acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', 'transparent', 'underline', 'white', 'yellow'] acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', 'stroke-opacity'] acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc', 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', 'ssh', 'sftp', 'rtsp', 'afs', 'data'] acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain'] # subclasses may define their own versions of these constants allowed_elements = acceptable_elements + mathml_elements + svg_elements allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes allowed_css_properties = acceptable_css_properties allowed_css_keywords = acceptable_css_keywords allowed_svg_properties = acceptable_svg_properties allowed_protocols = acceptable_protocols allowed_content_types = acceptable_content_types # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style # attributes are parsed, and a restricted set, # specified by # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified # in ALLOWED_PROTOCOLS are allowed. # # sanitize_html('<script> do_nasty_stuff() </script>') # => &lt;script> do_nasty_stuff() &lt;/script> # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') # => <a>Click here for $100</a> def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in list(tokenTypes.keys()): token_type = tokenTypes[token_type] if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: return self.allowed_token(token, token_type) else: return self.disallowed_token(token, token_type) elif token_type == tokenTypes["Comment"]: pass else: return token def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: uri = urlparse.urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] if uri and uri.scheme: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = content_type_rgx.match(uri.path) if not m: del attrs[attr] elif m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token def disallowed_token(self, token, token_type): if token_type == tokenTypes["EndTag"]: token["data"] = "</%s>" % token["name"] elif token["data"]: attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]]) token["data"] = "<%s%s>" % (token["name"], attrs) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"] = token["data"][:-1] + "/>" if token["type"] in list(tokenTypes.keys()): token["type"] = "Characters" else: token["type"] = tokenTypes["Characters"] del token["name"] return token def sanitize_css(self, style): # disallow urls style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) # gauntlet if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return '' clean = [] for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): if not value: continue if prop.lower() in self.allowed_css_properties: clean.append(prop + ': ' + value + ';') elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 'padding']: for keyword in value.split(): if keyword not in self.acceptable_css_keywords and \ not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): break else: clean.append(prop + ': ' + value + ';') elif prop.lower() in self.allowed_svg_properties: clean.append(prop + ': ' + value + ';') return ' '.join(clean) class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin): def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, lowercaseElementName=False, lowercaseAttrName=False, parser=None): # Change case matching defaults as we only output lowercase html anyway # This solution doesn't seem ideal... HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, lowercaseElementName, lowercaseAttrName, parser=parser) def __iter__(self): for token in HTMLTokenizer.__iter__(self): token = self.sanitize_token(token) if token: yield token