PNG  IHDRX cHRMz&u0`:pQ<bKGD pHYsodtIME MeqIDATxw]Wug^Qd˶ 6`!N:!@xI~)%7%@Bh&`lnjVF29gΨ4E$|>cɚ{gk= %,a KX%,a KX%,a KX%,a KX%,a KX%,a KX%, b` ǟzeאfp]<!SJmɤY޲ڿ,%c ~ع9VH.!Ͳz&QynֺTkRR.BLHi٪:l;@(!MԴ=žI,:o&N'Kù\vRmJ雵֫AWic H@" !: Cé||]k-Ha oݜ:y F())u]aG7*JV@J415p=sZH!=!DRʯvɱh~V\}v/GKY$n]"X"}t@ xS76^[bw4dsce)2dU0 CkMa-U5tvLƀ~mlMwfGE/-]7XAƟ`׮g ewxwC4\[~7@O-Q( a*XGƒ{ ՟}$_y3tĐƤatgvێi|K=uVyrŲlLӪuܿzwk$m87k( `múcE)"@rK( z4$D; 2kW=Xb$V[Ru819קR~qloѱDyįݎ*mxw]y5e4K@ЃI0A D@"BDk_)N\8͜9dz"fK0zɿvM /.:2O{ Nb=M=7>??Zuo32 DLD@D| &+֎C #B8ַ`bOb $D#ͮҪtx]%`ES`Ru[=¾!@Od37LJ0!OIR4m]GZRJu$‡c=%~s@6SKy?CeIh:[vR@Lh | (BhAMy=݃  G"'wzn޺~8ԽSh ~T*A:xR[ܹ?X[uKL_=fDȊ؂p0}7=D$Ekq!/t.*2ʼnDbŞ}DijYaȲ(""6HA;:LzxQ‘(SQQ}*PL*fc\s `/d'QXW, e`#kPGZuŞuO{{wm[&NBTiiI0bukcA9<4@SӊH*؎4U/'2U5.(9JuDfrޱtycU%j(:RUbArLֺN)udA':uGQN"-"Is.*+k@ `Ojs@yU/ H:l;@yyTn}_yw!VkRJ4P)~y#)r,D =ě"Q]ci'%HI4ZL0"MJy 8A{ aN<8D"1#IJi >XjX֔#@>-{vN!8tRݻ^)N_╗FJEk]CT՟ YP:_|H1@ CBk]yKYp|og?*dGvzنzӴzjֺNkC~AbZƷ`.H)=!QͷVTT(| u78y֮}|[8-Vjp%2JPk[}ԉaH8Wpqhwr:vWª<}l77_~{s۴V+RCģ%WRZ\AqHifɤL36: #F:p]Bq/z{0CU6ݳEv_^k7'>sq*+kH%a`0ԣisqにtү04gVgW΂iJiS'3w.w}l6MC2uԯ|>JF5`fV5m`Y**Db1FKNttu]4ccsQNnex/87+}xaUW9y>ͯ骵G{䩓Գ3+vU}~jJ.NFRD7<aJDB1#ҳgSb,+CS?/ VG J?|?,2#M9}B)MiE+G`-wo߫V`fio(}S^4e~V4bHOYb"b#E)dda:'?}׮4繏`{7Z"uny-?ǹ;0MKx{:_pÚmFמ:F " .LFQLG)Q8qN q¯¯3wOvxDb\. BKD9_NN &L:4D{mm o^tֽ:q!ƥ}K+<"m78N< ywsard5+вz~mnG)=}lYݧNj'QJS{S :UYS-952?&O-:W}(!6Mk4+>A>j+i|<<|;ر^߉=HE|V#F)Emm#}/"y GII웻Jі94+v뾧xu~5C95~ūH>c@덉pʃ1/4-A2G%7>m;–Y,cyyaln" ?ƻ!ʪ<{~h~i y.zZB̃/,雋SiC/JFMmBH&&FAbϓO^tubbb_hZ{_QZ-sύodFgO(6]TJA˯#`۶ɟ( %$&+V'~hiYy>922 Wp74Zkq+Ovn錄c>8~GqܲcWꂎz@"1A.}T)uiW4="jJ2W7mU/N0gcqܗOO}?9/wìXžΏ0 >֩(V^Rh32!Hj5`;O28؇2#ݕf3 ?sJd8NJ@7O0 b־?lldщ̡&|9C.8RTWwxWy46ah嘦mh٤&l zCy!PY?: CJyв]dm4ǜҐR޻RլhX{FƯanшQI@x' ao(kUUuxW_Ñ줮[w8 FRJ(8˼)_mQ _!RJhm=!cVmm ?sFOnll6Qk}alY}; "baӌ~M0w,Ggw2W:G/k2%R,_=u`WU R.9T"v,<\Ik޽/2110Ӿxc0gyC&Ny޽JҢrV6N ``یeA16"J³+Rj*;BϜkZPJaÍ<Jyw:NP8/D$ 011z֊Ⱳ3ι֘k1V_"h!JPIΣ'ɜ* aEAd:ݺ>y<}Lp&PlRfTb1]o .2EW\ͮ]38؋rTJsǏP@芎sF\> P^+dYJLbJ C-xϐn> ι$nj,;Ǖa FU *择|h ~izť3ᤓ`K'-f tL7JK+vf2)V'-sFuB4i+m+@My=O҈0"|Yxoj,3]:cо3 $#uŘ%Y"y죯LebqtҢVzq¼X)~>4L׶m~[1_k?kxֺQ`\ |ٛY4Ѯr!)N9{56(iNq}O()Em]=F&u?$HypWUeB\k]JɩSع9 Zqg4ZĊo oMcjZBU]B\TUd34ݝ~:7ڶSUsB0Z3srx 7`:5xcx !qZA!;%͚7&P H<WL!džOb5kF)xor^aujƍ7 Ǡ8/p^(L>ὴ-B,{ۇWzֺ^k]3\EE@7>lYBȝR.oHnXO/}sB|.i@ɥDB4tcm,@ӣgdtJ!lH$_vN166L__'Z)y&kH;:,Y7=J 9cG) V\hjiE;gya~%ks_nC~Er er)muuMg2;֫R)Md) ,¶ 2-wr#F7<-BBn~_(o=KO㭇[Xv eN_SMgSҐ BS헃D%g_N:/pe -wkG*9yYSZS.9cREL !k}<4_Xs#FmҶ:7R$i,fi!~' # !6/S6y@kZkZcX)%5V4P]VGYq%H1!;e1MV<!ϐHO021Dp= HMs~~a)ަu7G^];git!Frl]H/L$=AeUvZE4P\.,xi {-~p?2b#amXAHq)MWǾI_r`S Hz&|{ +ʖ_= (YS(_g0a03M`I&'9vl?MM+m~}*xT۲(fY*V4x@29s{DaY"toGNTO+xCAO~4Ϳ;p`Ѫ:>Ҵ7K 3}+0 387x\)a"/E>qpWB=1 ¨"MP(\xp߫́A3+J] n[ʼnӼaTbZUWb={~2ooKױӰp(CS\S筐R*JغV&&"FA}J>G֐p1ٸbk7 ŘH$JoN <8s^yk_[;gy-;߉DV{c B yce% aJhDȶ 2IdйIB/^n0tNtџdcKj4϶v~- CBcgqx9= PJ) dMsjpYB] GD4RDWX +h{y`,3ꊕ$`zj*N^TP4L:Iz9~6s) Ga:?y*J~?OrMwP\](21sZUD ?ܟQ5Q%ggW6QdO+\@ ̪X'GxN @'4=ˋ+*VwN ne_|(/BDfj5(Dq<*tNt1х!MV.C0 32b#?n0pzj#!38}޴o1KovCJ`8ŗ_"]] rDUy޲@ Ȗ-;xџ'^Y`zEd?0„ DAL18IS]VGq\4o !swV7ˣι%4FѮ~}6)OgS[~Q vcYbL!wG3 7띸*E Pql8=jT\꘿I(z<[6OrR8ºC~ډ]=rNl[g|v TMTղb-o}OrP^Q]<98S¤!k)G(Vkwyqyr޽Nv`N/e p/~NAOk \I:G6]4+K;j$R:Mi #*[AȚT,ʰ,;N{HZTGMoּy) ]%dHء9Պ䠬|<45,\=[bƟ8QXeB3- &dҩ^{>/86bXmZ]]yޚN[(WAHL$YAgDKp=5GHjU&99v簪C0vygln*P)9^͞}lMuiH!̍#DoRBn9l@ xA/_v=ȺT{7Yt2N"4!YN`ae >Q<XMydEB`VU}u]嫇.%e^ánE87Mu\t`cP=AD/G)sI"@MP;)]%fH9'FNsj1pVhY&9=0pfuJ&gޤx+k:!r˭wkl03׼Ku C &ѓYt{.O.zҏ z}/tf_wEp2gvX)GN#I ݭ߽v/ .& и(ZF{e"=V!{zW`, ]+LGz"(UJp|j( #V4, 8B 0 9OkRrlɱl94)'VH9=9W|>PS['G(*I1==C<5"Pg+x'K5EMd؞Af8lG ?D FtoB[je?{k3zQ vZ;%Ɠ,]E>KZ+T/ EJxOZ1i #T<@ I}q9/t'zi(EMqw`mYkU6;[t4DPeckeM;H}_g pMww}k6#H㶏+b8雡Sxp)&C $@'b,fPߑt$RbJ'vznuS ~8='72_`{q纶|Q)Xk}cPz9p7O:'|G~8wx(a 0QCko|0ASD>Ip=4Q, d|F8RcU"/KM opKle M3#i0c%<7׿p&pZq[TR"BpqauIp$ 8~Ĩ!8Սx\ւdT>>Z40ks7 z2IQ}ItԀ<-%S⍤};zIb$I 5K}Q͙D8UguWE$Jh )cu4N tZl+[]M4k8֦Zeq֮M7uIqG 1==tLtR,ƜSrHYt&QP윯Lg' I,3@P'}'R˪e/%-Auv·ñ\> vDJzlӾNv5:|K/Jb6KI9)Zh*ZAi`?S {aiVDԲuy5W7pWeQJk֤#5&V<̺@/GH?^τZL|IJNvI:'P=Ϛt"¨=cud S Q.Ki0 !cJy;LJR;G{BJy޺[^8fK6)=yʊ+(k|&xQ2`L?Ȓ2@Mf 0C`6-%pKpm')c$׻K5[J*U[/#hH!6acB JA _|uMvDyk y)6OPYjœ50VT K}cǻP[ $:]4MEA.y)|B)cf-A?(e|lɉ#P9V)[9t.EiQPDѠ3ϴ;E:+Օ t ȥ~|_N2,ZJLt4! %ա]u {+=p.GhNcŞQI?Nd'yeh n7zi1DB)1S | S#ًZs2|Ɛy$F SxeX{7Vl.Src3E℃Q>b6G ўYCmtկ~=K0f(=LrAS GN'ɹ9<\!a`)֕y[uՍ[09` 9 +57ts6}b4{oqd+J5fa/,97J#6yν99mRWxJyѡyu_TJc`~W>l^q#Ts#2"nD1%fS)FU w{ܯ R{ ˎ󅃏џDsZSQS;LV;7 Od1&1n$ N /.q3~eNɪ]E#oM~}v֯FڦwyZ=<<>Xo稯lfMFV6p02|*=tV!c~]fa5Y^Q_WN|Vs 0ҘދU97OI'N2'8N֭fgg-}V%y]U4 峧p*91#9U kCac_AFңĪy뚇Y_AiuYyTTYЗ-(!JFLt›17uTozc. S;7A&&<ԋ5y;Ro+:' *eYJkWR[@F %SHWP 72k4 qLd'J "zB6{AC0ƁA6U.'F3:Ȅ(9ΜL;D]m8ڥ9}dU "v!;*13Rg^fJyShyy5auA?ɩGHRjo^]׽S)Fm\toy 4WQS@mE#%5ʈfFYDX ~D5Ϡ9tE9So_aU4?Ѽm%&c{n>.KW1Tlb}:j uGi(JgcYj0qn+>) %\!4{LaJso d||u//P_y7iRJ߬nHOy) l+@$($VFIQ9%EeKʈU. ia&FY̒mZ=)+qqoQn >L!qCiDB;Y<%} OgBxB!ØuG)WG9y(Ą{_yesuZmZZey'Wg#C~1Cev@0D $a@˲(.._GimA:uyw֬%;@!JkQVM_Ow:P.s\)ot- ˹"`B,e CRtaEUP<0'}r3[>?G8xU~Nqu;Wm8\RIkբ^5@k+5(By'L&'gBJ3ݶ!/㮻w҅ yqPWUg<e"Qy*167΃sJ\oz]T*UQ<\FԎ`HaNmڜ6DysCask8wP8y9``GJ9lF\G g's Nn͵MLN֪u$| /|7=]O)6s !ĴAKh]q_ap $HH'\1jB^s\|- W1:=6lJBqjY^LsPk""`]w)󭃈,(HC ?䔨Y$Sʣ{4Z+0NvQkhol6C.婧/u]FwiVjZka&%6\F*Ny#8O,22+|Db~d ~Çwc N:FuuCe&oZ(l;@ee-+Wn`44AMK➝2BRՈt7g*1gph9N) *"TF*R(#'88pm=}X]u[i7bEc|\~EMn}P瘊J)K.0i1M6=7'_\kaZ(Th{K*GJyytw"IO-PWJk)..axӝ47"89Cc7ĐBiZx 7m!fy|ϿF9CbȩV 9V-՛^pV̌ɄS#Bv4-@]Vxt-Z, &ֺ*diؠ2^VXbs֔Ìl.jQ]Y[47gj=幽ex)A0ip׳ W2[ᎇhuE^~q흙L} #-b۸oFJ_QP3r6jr+"nfzRJTUqoaۍ /$d8Mx'ݓ= OՃ| )$2mcM*cЙj}f };n YG w0Ia!1Q.oYfr]DyISaP}"dIӗթO67jqR ҊƐƈaɤGG|h;t]䗖oSv|iZqX)oalv;۩meEJ\!8=$4QU4Xo&VEĊ YS^E#d,yX_> ۘ-e\ "Wa6uLĜZi`aD9.% w~mB(02G[6y.773a7 /=o7D)$Z 66 $bY^\CuP. (x'"J60׿Y:Oi;F{w佩b+\Yi`TDWa~|VH)8q/=9!g߆2Y)?ND)%?Ǐ`k/sn:;O299yB=a[Ng 3˲N}vLNy;*?x?~L&=xyӴ~}q{qE*IQ^^ͧvü{Huu=R|>JyUlZV, B~/YF!Y\u_ݼF{_C)LD]m {H 0ihhadd nUkf3oٺCvE\)QJi+֥@tDJkB$1!Đr0XQ|q?d2) Ӣ_}qv-< FŊ߫%roppVBwü~JidY4:}L6M7f٬F "?71<2#?Jyy4뷢<_a7_=Q E=S1И/9{+93֮E{ǂw{))?maÆm(uLE#lïZ  ~d];+]h j?!|$F}*"4(v'8s<ŏUkm7^7no1w2ؗ}TrͿEk>p'8OB7d7R(A 9.*Mi^ͳ; eeUwS+C)uO@ =Sy]` }l8^ZzRXj[^iUɺ$tj))<sbDJfg=Pk_{xaKo1:-uyG0M ԃ\0Lvuy'ȱc2Ji AdyVgVh!{]/&}}ċJ#%d !+87<;qN޼Nفl|1N:8ya  8}k¾+-$4FiZYÔXk*I&'@iI99)HSh4+2G:tGhS^繿 Kتm0 вDk}֚+QT4;sC}rՅE,8CX-e~>G&'9xpW,%Fh,Ry56Y–hW-(v_,? ; qrBk4-V7HQ;ˇ^Gv1JVV%,ik;D_W!))+BoS4QsTM;gt+ndS-~:11Sgv!0qRVh!"Ȋ(̦Yl.]PQWgٳE'`%W1{ndΗBk|Ž7ʒR~,lnoa&:ü$ 3<a[CBݮwt"o\ePJ=Hz"_c^Z.#ˆ*x z̝grY]tdkP*:97YľXyBkD4N.C_[;F9`8& !AMO c `@BA& Ost\-\NX+Xp < !bj3C&QL+*&kAQ=04}cC!9~820G'PC9xa!w&bo_1 Sw"ܱ V )Yl3+ס2KoXOx]"`^WOy :3GO0g;%Yv㐫(R/r (s } u B &FeYZh0y> =2<Ϟc/ -u= c&׭,.0"g"7 6T!vl#sc>{u/Oh Bᾈ)۴74]x7 gMӒ"d]U)}" v4co[ ɡs 5Gg=XR14?5A}D "b{0$L .\4y{_fe:kVS\\O]c^W52LSBDM! C3Dhr̦RtArx4&agaN3Cf<Ԉp4~ B'"1@.b_/xQ} _߃҉/gٓ2Qkqp0շpZ2fԫYz< 4L.Cyυι1t@鎫Fe sYfsF}^ V}N<_`p)alٶ "(XEAVZ<)2},:Ir*#m_YӼ R%a||EƼIJ,,+f"96r/}0jE/)s)cjW#w'Sʯ5<66lj$a~3Kʛy 2:cZ:Yh))+a߭K::N,Q F'qB]={.]h85C9cr=}*rk?vwV렵ٸW Rs%}rNAkDv|uFLBkWY YkX מ|)1!$#3%y?pF<@<Rr0}: }\J [5FRxY<9"SQdE(Q*Qʻ)q1E0B_O24[U'],lOb ]~WjHޏTQ5Syu wq)xnw8~)c 쫬gٲߠ H% k5dƝk> kEj,0% b"vi2Wس_CuK)K{n|>t{P1򨾜j>'kEkƗBg*H%'_aY6Bn!TL&ɌOb{c`'d^{t\i^[uɐ[}q0lM˕G:‚4kb祔c^:?bpg… +37stH:0}en6x˟%/<]BL&* 5&fK9Mq)/iyqtA%kUe[ڛKN]Ě^,"`/ s[EQQm?|XJ߅92m]G.E΃ח U*Cn.j_)Tѧj̿30ڇ!A0=͜ar I3$C^-9#|pk!)?7.x9 @OO;WƝZBFU keZ75F6Tc6"ZȚs2y/1 ʵ:u4xa`C>6Rb/Yм)^=+~uRd`/|_8xbB0?Ft||Z\##|K 0>>zxv8۴吅q 8ĥ)"6>~\8:qM}#͚'ĉ#p\׶ l#bA?)|g g9|8jP(cr,BwV (WliVxxᡁ@0Okn;ɥh$_ckCgriv}>=wGzβ KkBɛ[˪ !J)h&k2%07δt}!d<9;I&0wV/ v 0<H}L&8ob%Hi|޶o&h1L|u֦y~󛱢8fٲUsւ)0oiFx2}X[zVYr_;N(w]_4B@OanC?gĦx>мgx>ΛToZoOMp>40>V Oy V9iq!4 LN,ˢu{jsz]|"R޻&'ƚ{53ўFu(<٪9:΋]B;)B>1::8;~)Yt|0(pw2N%&X,URBK)3\zz&}ax4;ǟ(tLNg{N|Ǽ\G#C9g$^\}p?556]/RP.90 k,U8/u776s ʪ_01چ|\N 0VV*3H鴃J7iI!wG_^ypl}r*jɤSR 5QN@ iZ#1ٰy;_\3\BQQ x:WJv츟ٯ$"@6 S#qe딇(/P( Dy~TOϻ<4:-+F`0||;Xl-"uw$Цi󼕝mKʩorz"mϺ$F:~E'ҐvD\y?Rr8_He@ e~O,T.(ފR*cY^m|cVR[8 JҡSm!ΆԨb)RHG{?MpqrmN>߶Y)\p,d#xۆWY*,l6]v0h15M˙MS8+EdI='LBJIH7_9{Caз*Lq,dt >+~ّeʏ?xԕ4bBAŚjﵫ!'\Ը$WNvKO}ӽmSşذqsOy?\[,d@'73'j%kOe`1.g2"e =YIzS2|zŐƄa\U,dP;jhhhaxǶ?КZ՚.q SE+XrbOu%\GتX(H,N^~]JyEZQKceTQ]VGYqnah;y$cQahT&QPZ*iZ8UQQM.qo/T\7X"u?Mttl2Xq(IoW{R^ ux*SYJ! 4S.Jy~ BROS[V|žKNɛP(L6V^|cR7i7nZW1Fd@ Ara{詑|(T*dN]Ko?s=@ |_EvF]׍kR)eBJc" MUUbY6`~V޴dJKß&~'d3i WWWWWW
Current Directory: /usr/share/doc/libxml2-python-2.9.1
Viewing File: /usr/share/doc/libxml2-python-2.9.1/index.py
#!/usr/bin/python -u # # imports the API description and fills up a database with # name relevance to modules, functions or web pages # # Operation needed: # ================= # # install mysqld, the python wrappers for mysql and libxml2, start mysqld # Change the root passwd of mysql: # mysqladmin -u root password new_password # Create the new database xmlsoft # mysqladmin -p create xmlsoft # Create a database user 'veillard' and give him passord access # change veillard and abcde with the right user name and passwd # mysql -p # password: # mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost # IDENTIFIED BY 'abcde' WITH GRANT OPTION; # # As the user check the access: # mysql -p xmlsoft # Enter password: # Welcome to the MySQL monitor.... # mysql> use xmlsoft # Database changed # mysql> quit # Bye # # Then run the script in the doc subdir, it will create the symbols and # word tables and populate them with informations extracted from # the libxml2-api.xml API description, and make then accessible read-only # by nobody@loaclhost the user expected to be Apache's one # # On the Apache configuration, make sure you have php support enabled # import MySQLdb import libxml2 import sys import string import os # # We are not interested in parsing errors here # def callback(ctx, str): return libxml2.registerErrorHandler(callback, None) # # The dictionnary of tables required and the SQL command needed # to create them # TABLES={ "symbols" : """CREATE TABLE symbols ( name varchar(255) BINARY NOT NULL, module varchar(255) BINARY NOT NULL, type varchar(25) NOT NULL, descr varchar(255), UNIQUE KEY name (name), KEY module (module))""", "words" : """CREATE TABLE words ( name varchar(50) BINARY NOT NULL, symbol varchar(255) BINARY NOT NULL, relevance int, KEY name (name), KEY symbol (symbol), UNIQUE KEY ID (name, symbol))""", "wordsHTML" : """CREATE TABLE wordsHTML ( name varchar(50) BINARY NOT NULL, resource varchar(255) BINARY NOT NULL, section varchar(255), id varchar(50), relevance int, KEY name (name), KEY resource (resource), UNIQUE KEY ref (name, resource))""", "wordsArchive" : """CREATE TABLE wordsArchive ( name varchar(50) BINARY NOT NULL, ID int(11) NOT NULL, relevance int, KEY name (name), UNIQUE KEY ref (name, ID))""", "pages" : """CREATE TABLE pages ( resource varchar(255) BINARY NOT NULL, title varchar(255) BINARY NOT NULL, UNIQUE KEY name (resource))""", "archives" : """CREATE TABLE archives ( ID int(11) NOT NULL auto_increment, resource varchar(255) BINARY NOT NULL, title varchar(255) BINARY NOT NULL, UNIQUE KEY id (ID,resource(255)), INDEX (ID), INDEX (resource))""", "Queries" : """CREATE TABLE Queries ( ID int(11) NOT NULL auto_increment, Value varchar(50) NOT NULL, Count int(11) NOT NULL, UNIQUE KEY id (ID,Value(35)), INDEX (ID))""", "AllQueries" : """CREATE TABLE AllQueries ( ID int(11) NOT NULL auto_increment, Value varchar(50) NOT NULL, Count int(11) NOT NULL, UNIQUE KEY id (ID,Value(35)), INDEX (ID))""", } # # The XML API description file to parse # API="libxml2-api.xml" DB=None ######################################################################### # # # MySQL database interfaces # # # ######################################################################### def createTable(db, name): global TABLES if db == None: return -1 if name == None: return -1 c = db.cursor() ret = c.execute("DROP TABLE IF EXISTS %s" % (name)) if ret == 1: print "Removed table %s" % (name) print "Creating table %s" % (name) try: ret = c.execute(TABLES[name]) except: print "Failed to create table %s" % (name) return -1 return ret def checkTables(db, verbose = 1): global TABLES if db == None: return -1 c = db.cursor() nbtables = c.execute("show tables") if verbose: print "Found %d tables" % (nbtables) tables = {} i = 0 while i < nbtables: l = c.fetchone() name = l[0] tables[name] = {} i = i + 1 for table in TABLES.keys(): if not tables.has_key(table): print "table %s missing" % (table) createTable(db, table) try: ret = c.execute("SELECT count(*) from %s" % table); row = c.fetchone() if verbose: print "Table %s contains %d records" % (table, row[0]) except: print "Troubles with table %s : repairing" % (table) ret = c.execute("repair table %s" % table); print "repairing returned %d" % (ret) ret = c.execute("SELECT count(*) from %s" % table); row = c.fetchone() print "Table %s contains %d records" % (table, row[0]) if verbose: print "checkTables finished" # make sure apache can access the tables read-only try: ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost") ret = c.execute("GRANT INSERT,SELECT,UPDATE ON xmlsoft.Queries TO nobody@localhost") except: pass return 0 def openMySQL(db="xmlsoft", passwd=None, verbose = 1): global DB if passwd == None: try: passwd = os.environ["MySQL_PASS"] except: print "No password available, set environment MySQL_PASS" sys.exit(1) DB = MySQLdb.connect(passwd=passwd, db=db) if DB == None: return -1 ret = checkTables(DB, verbose) return ret def updateWord(name, symbol, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if symbol == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" % (name, symbol, relevance)) except: try: ret = c.execute( """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" % (relevance, name, symbol)) except: print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance) print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol) print sys.exc_type, sys.exc_value return -1 return ret def updateSymbol(name, module, type, desc): global DB updateWord(name, name, 50) if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if module == None: return -1 if type == None: return -1 try: desc = string.replace(desc, "'", " ") l = string.split(desc, ".") desc = l[0] desc = desc[0:99] except: desc = "" c = DB.cursor() try: ret = c.execute( """INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" % (name, module, type, desc)) except: try: ret = c.execute( """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)) except: print "Update symbol (%s, %s, %s) failed command" % (name, module, type) print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name) print sys.exc_type, sys.exc_value return -1 return ret def addFunction(name, module, desc = ""): return updateSymbol(name, module, 'function', desc) def addMacro(name, module, desc = ""): return updateSymbol(name, module, 'macro', desc) def addEnum(name, module, desc = ""): return updateSymbol(name, module, 'enum', desc) def addStruct(name, module, desc = ""): return updateSymbol(name, module, 'struct', desc) def addConst(name, module, desc = ""): return updateSymbol(name, module, 'const', desc) def addType(name, module, desc = ""): return updateSymbol(name, module, 'type', desc) def addFunctype(name, module, desc = ""): return updateSymbol(name, module, 'functype', desc) def addPage(resource, title): global DB if DB == None: openMySQL() if DB == None: return -1 if resource == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO pages (resource, title) VALUES ('%s','%s')""" % (resource, title)) except: try: ret = c.execute( """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource)) except: print "Update symbol (%s, %s, %s) failed command" % (name, module, type) print """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource) print sys.exc_type, sys.exc_value return -1 return ret def updateWordHTML(name, resource, desc, id, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if resource == None: return -1 if id == None: id = "" if desc == None: desc = "" else: try: desc = string.replace(desc, "'", " ") desc = desc[0:99] except: desc = "" c = DB.cursor() try: ret = c.execute( """INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES ('%s','%s', '%s', '%s', '%d')""" % (name, resource, desc, id, relevance)) except: try: ret = c.execute( """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource)) except: print "Update symbol (%s, %s, %d) failed command" % (name, resource, relevance) print """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource) print sys.exc_type, sys.exc_value return -1 return ret def checkXMLMsgArchive(url): global DB if DB == None: openMySQL() if DB == None: return -1 if url == None: return -1 c = DB.cursor() try: ret = c.execute( """SELECT ID FROM archives WHERE resource='%s'""" % (url)) row = c.fetchone() if row == None: return -1 except: return -1 return row[0] def addXMLMsgArchive(url, title): global DB if DB == None: openMySQL() if DB == None: return -1 if url == None: return -1 if title == None: title = "" else: title = string.replace(title, "'", " ") title = title[0:99] c = DB.cursor() try: cmd = """INSERT INTO archives (resource, title) VALUES ('%s','%s')""" % (url, title) ret = c.execute(cmd) cmd = """SELECT ID FROM archives WHERE resource='%s'""" % (url) ret = c.execute(cmd) row = c.fetchone() if row == None: print "addXMLMsgArchive failed to get the ID: %s" % (url) return -1 except: print "addXMLMsgArchive failed command: %s" % (cmd) return -1 return((int)(row[0])) def updateWordArchive(name, id, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if id == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s', '%d', '%d')""" % (name, id, relevance)) except: try: ret = c.execute( """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id)) except: print "Update word archive (%s, %d, %d) failed command" % (name, id, relevance) print """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id) print sys.exc_type, sys.exc_value return -1 return ret ######################################################################### # # # Word dictionnary and analysis routines # # # ######################################################################### # # top 100 english word without the one len < 3 + own set # dropWords = { 'the':0, 'this':0, 'can':0, 'man':0, 'had':0, 'him':0, 'only':0, 'and':0, 'not':0, 'been':0, 'other':0, 'even':0, 'are':0, 'was':0, 'new':0, 'most':0, 'but':0, 'when':0, 'some':0, 'made':0, 'from':0, 'who':0, 'could':0, 'after':0, 'that':0, 'will':0, 'time':0, 'also':0, 'have':0, 'more':0, 'these':0, 'did':0, 'was':0, 'two':0, 'many':0, 'they':0, 'may':0, 'before':0, 'for':0, 'which':0, 'out':0, 'then':0, 'must':0, 'one':0, 'through':0, 'with':0, 'you':0, 'said':0, 'first':0, 'back':0, 'were':0, 'what':0, 'any':0, 'years':0, 'his':0, 'her':0, 'where':0, 'all':0, 'its':0, 'now':0, 'much':0, 'she':0, 'about':0, 'such':0, 'your':0, 'there':0, 'into':0, 'like':0, 'may':0, 'would':0, 'than':0, 'our':0, 'well':0, 'their':0, 'them':0, 'over':0, 'down':0, 'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0, 'cur':0, } wordsDict = {} wordsDictHTML = {} wordsDictArchive = {} def cleanupWordsString(str): str = string.replace(str, ".", " ") str = string.replace(str, "!", " ") str = string.replace(str, "?", " ") str = string.replace(str, ",", " ") str = string.replace(str, "'", " ") str = string.replace(str, '"', " ") str = string.replace(str, ";", " ") str = string.replace(str, "(", " ") str = string.replace(str, ")", " ") str = string.replace(str, "{", " ") str = string.replace(str, "}", " ") str = string.replace(str, "<", " ") str = string.replace(str, ">", " ") str = string.replace(str, "=", " ") str = string.replace(str, "/", " ") str = string.replace(str, "*", " ") str = string.replace(str, ":", " ") str = string.replace(str, "#", " ") str = string.replace(str, "\\", " ") str = string.replace(str, "\n", " ") str = string.replace(str, "\r", " ") str = string.replace(str, "\xc2", " ") str = string.replace(str, "\xa0", " ") return str def cleanupDescrString(str): str = string.replace(str, "'", " ") str = string.replace(str, "\n", " ") str = string.replace(str, "\r", " ") str = string.replace(str, "\xc2", " ") str = string.replace(str, "\xa0", " ") l = string.split(str) str = string.join(str) return str def splitIdentifier(str): ret = [] while str != "": cur = string.lower(str[0]) str = str[1:] if ((cur < 'a') or (cur > 'z')): continue while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'): cur = cur + string.lower(str[0]) str = str[1:] while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'): cur = cur + str[0] str = str[1:] while (str != "") and (str[0] >= '0') and (str[0] <= '9'): str = str[1:] ret.append(cur) return ret def addWord(word, module, symbol, relevance): global wordsDict if word == None or len(word) < 3: return -1 if module == None or symbol == None: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 if wordsDict.has_key(word): d = wordsDict[word] if d == None: return 0 if len(d) > 500: wordsDict[word] = None return 0 try: relevance = relevance + d[(module, symbol)] except: pass else: wordsDict[word] = {} wordsDict[word][(module, symbol)] = relevance return relevance def addString(str, module, symbol, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: if len(word) > 2: ret = ret + addWord(word, module, symbol, 5) return ret def addWordHTML(word, resource, id, section, relevance): global wordsDictHTML if word == None or len(word) < 3: return -1 if resource == None or section == None: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 section = cleanupDescrString(section) if wordsDictHTML.has_key(word): d = wordsDictHTML[word] if d == None: print "skipped %s" % (word) return 0 try: (r,i,s) = d[resource] if i != None: id = i if s != None: section = s relevance = relevance + r except: pass else: wordsDictHTML[word] = {} d = wordsDictHTML[word]; d[resource] = (relevance, id, section) return relevance def addStringHTML(str, resource, id, section, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: if len(word) > 2: try: r = addWordHTML(word, resource, id, section, relevance) if r < 0: print "addWordHTML failed: %s %s" % (word, resource) ret = ret + r except: print "addWordHTML failed: %s %s %d" % (word, resource, relevance) print sys.exc_type, sys.exc_value return ret def addWordArchive(word, id, relevance): global wordsDictArchive if word == None or len(word) < 3: return -1 if id == None or id == -1: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 if wordsDictArchive.has_key(word): d = wordsDictArchive[word] if d == None: print "skipped %s" % (word) return 0 try: r = d[id] relevance = relevance + r except: pass else: wordsDictArchive[word] = {} d = wordsDictArchive[word]; d[id] = relevance return relevance def addStringArchive(str, id, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: i = len(word) if i > 2: try: r = addWordArchive(word, id, relevance) if r < 0: print "addWordArchive failed: %s %s" % (word, id) else: ret = ret + r except: print "addWordArchive failed: %s %s %d" % (word, id, relevance) print sys.exc_type, sys.exc_value return ret ######################################################################### # # # XML API description analysis # # # ######################################################################### def loadAPI(filename): doc = libxml2.parseFile(filename) print "loaded %s" % (filename) return doc def foundExport(file, symbol): if file == None: return 0 if symbol == None: return 0 addFunction(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIFile(top): count = 0 name = top.prop("name") cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "exports": count = count + foundExport(name, cur.prop("symbol")) else: print "unexpected element %s in API doc <file name='%s'>" % (name) cur = cur.next return count def analyzeAPIFiles(top): count = 0 cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "file": count = count + analyzeAPIFile(cur) else: print "unexpected element %s in API doc <files>" % (cur.name) cur = cur.next return count def analyzeAPIEnum(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addEnum(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIConst(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addConst(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIType(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addType(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIFunctype(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addFunctype(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIStruct(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addStruct(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) info = top.prop("info") if info != None: info = string.replace(info, "'", " ") info = string.strip(info) l = string.split(info) for word in l: if len(word) > 2: addWord(word, file, symbol, 5) return 1 def analyzeAPIMacro(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 symbol = string.replace(symbol, "'", " ") symbol = string.strip(symbol) info = None cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "info": info = cur.content break cur = cur.next l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) if info == None: addMacro(symbol, file) print "Macro %s description has no <info>" % (symbol) return 0 info = string.replace(info, "'", " ") info = string.strip(info) addMacro(symbol, file, info) l = string.split(info) for word in l: if len(word) > 2: addWord(word, file, symbol, 5) return 1 def analyzeAPIFunction(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 symbol = string.replace(symbol, "'", " ") symbol = string.strip(symbol) info = None cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "info": info = cur.content elif cur.name == "return": rinfo = cur.prop("info") if rinfo != None: rinfo = string.replace(rinfo, "'", " ") rinfo = string.strip(rinfo) addString(rinfo, file, symbol, 7) elif cur.name == "arg": ainfo = cur.prop("info") if ainfo != None: ainfo = string.replace(ainfo, "'", " ") ainfo = string.strip(ainfo) addString(ainfo, file, symbol, 5) name = cur.prop("name") if name != None: name = string.replace(name, "'", " ") name = string.strip(name) addWord(name, file, symbol, 7) cur = cur.next if info == None: print "Function %s description has no <info>" % (symbol) addFunction(symbol, file, "") else: info = string.replace(info, "'", " ") info = string.strip(info) addFunction(symbol, file, info) addString(info, file, symbol, 5) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPISymbols(top): count = 0 cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "macro": count = count + analyzeAPIMacro(cur) elif cur.name == "function": count = count + analyzeAPIFunction(cur) elif cur.name == "const": count = count + analyzeAPIConst(cur) elif cur.name == "typedef": count = count + analyzeAPIType(cur) elif cur.name == "struct": count = count + analyzeAPIStruct(cur) elif cur.name == "enum": count = count + analyzeAPIEnum(cur) elif cur.name == "functype": count = count + analyzeAPIFunctype(cur) else: print "unexpected element %s in API doc <files>" % (cur.name) cur = cur.next return count def analyzeAPI(doc): count = 0 if doc == None: return -1 root = doc.getRootElement() if root.name != "api": print "Unexpected root name" return -1 cur = root.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "files": pass # count = count + analyzeAPIFiles(cur) elif cur.name == "symbols": count = count + analyzeAPISymbols(cur) else: print "unexpected element %s in API doc" % (cur.name) cur = cur.next return count ######################################################################### # # # Web pages parsing and analysis # # # ######################################################################### import glob def analyzeHTMLText(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTMLPara(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTMLPre(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTML(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTML(doc, resource): para = 0; ctxt = doc.xpathNewContext() try: res = ctxt.xpathEval("//head/title") title = res[0].content except: title = "Page %s" % (resource) addPage(resource, title) try: items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()") section = title id = "" for item in items: if item.name == 'h1' or item.name == 'h2' or item.name == 'h3': section = item.content if item.prop("id"): id = item.prop("id") elif item.prop("name"): id = item.prop("name") elif item.type == 'text': analyzeHTMLText(doc, resource, item, section, id) para = para + 1 elif item.name == 'p': analyzeHTMLPara(doc, resource, item, section, id) para = para + 1 elif item.name == 'pre': analyzeHTMLPre(doc, resource, item, section, id) para = para + 1 else: print "Page %s, unexpected %s element" % (resource, item.name) except: print "Page %s: problem analyzing" % (resource) print sys.exc_type, sys.exc_value return para def analyzeHTMLPages(): ret = 0 HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html") for html in HTMLfiles: if html[0:3] == "API": continue if html == "xml.html": continue try: doc = libxml2.parseFile(html) except: doc = libxml2.htmlParseFile(html, None) try: res = analyzeHTML(doc, html) print "Parsed %s : %d paragraphs" % (html, res) ret = ret + 1 except: print "could not parse %s" % (html) return ret ######################################################################### # # # Mail archives parsing and analysis # # # ######################################################################### import time def getXMLDateArchive(t = None): if t == None: t = time.time() T = time.gmtime(t) month = time.strftime("%B", T) year = T[0] url = "http://mail.gnome.org/archives/xml/%d-%s/date.html" % (year, month) return url def scanXMLMsgArchive(url, title, force = 0): if url == None or title == None: return 0 ID = checkXMLMsgArchive(url) if force == 0 and ID != -1: return 0 if ID == -1: ID = addXMLMsgArchive(url, title) if ID == -1: return 0 try: print "Loading %s" % (url) doc = libxml2.htmlParseFile(url, None); except: doc = None if doc == None: print "Failed to parse %s" % (url) return 0 addStringArchive(title, ID, 20) ctxt = doc.xpathNewContext() texts = ctxt.xpathEval("//pre//text()") for text in texts: addStringArchive(text.content, ID, 5) return 1 def scanXMLDateArchive(t = None, force = 0): global wordsDictArchive wordsDictArchive = {} url = getXMLDateArchive(t) print "loading %s" % (url) try: doc = libxml2.htmlParseFile(url, None); except: doc = None if doc == None: print "Failed to parse %s" % (url) return -1 ctxt = doc.xpathNewContext() anchors = ctxt.xpathEval("//a[@href]") links = 0 newmsg = 0 for anchor in anchors: href = anchor.prop("href") if href == None or href[0:3] != "msg": continue try: links = links + 1 msg = libxml2.buildURI(href, url) title = anchor.content if title != None and title[0:4] == 'Re: ': title = title[4:] if title != None and title[0:6] == '[xml] ': title = title[6:] newmsg = newmsg + scanXMLMsgArchive(msg, title, force) except: pass return newmsg ######################################################################### # # # Main code: open the DB, the API XML and analyze it # # # ######################################################################### def analyzeArchives(t = None, force = 0): global wordsDictArchive ret = scanXMLDateArchive(t, force) print "Indexed %d words in %d archive pages" % (len(wordsDictArchive), ret) i = 0 skipped = 0 for word in wordsDictArchive.keys(): refs = wordsDictArchive[word] if refs == None: skipped = skipped + 1 continue; for id in refs.keys(): relevance = refs[id] updateWordArchive(word, id, relevance) i = i + 1 print "Found %d associations in HTML pages" % (i) def analyzeHTMLTop(): global wordsDictHTML ret = analyzeHTMLPages() print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret) i = 0 skipped = 0 for word in wordsDictHTML.keys(): refs = wordsDictHTML[word] if refs == None: skipped = skipped + 1 continue; for resource in refs.keys(): (relevance, id, section) = refs[resource] updateWordHTML(word, resource, section, id, relevance) i = i + 1 print "Found %d associations in HTML pages" % (i) def analyzeAPITop(): global wordsDict global API try: doc = loadAPI(API) ret = analyzeAPI(doc) print "Analyzed %d blocs" % (ret) doc.freeDoc() except: print "Failed to parse and analyze %s" % (API) print sys.exc_type, sys.exc_value sys.exit(1) print "Indexed %d words" % (len(wordsDict)) i = 0 skipped = 0 for word in wordsDict.keys(): refs = wordsDict[word] if refs == None: skipped = skipped + 1 continue; for (module, symbol) in refs.keys(): updateWord(word, symbol, refs[(module, symbol)]) i = i + 1 print "Found %d associations, skipped %d words" % (i, skipped) def usage(): print "Usage index.py [--force] [--archive] [--archive-year year] [--archive-month month] [--API] [--docs]" sys.exit(1) def main(): try: openMySQL() except: print "Failed to open the database" print sys.exc_type, sys.exc_value sys.exit(1) args = sys.argv[1:] force = 0 if args: i = 0 while i < len(args): if args[i] == '--force': force = 1 elif args[i] == '--archive': analyzeArchives(None, force) elif args[i] == '--archive-year': i = i + 1; year = args[i] months = ["January" , "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; for month in months: try: str = "%s-%s" % (year, month) T = time.strptime(str, "%Y-%B") t = time.mktime(T) + 3600 * 24 * 10; analyzeArchives(t, force) except: print "Failed to index month archive:" print sys.exc_type, sys.exc_value elif args[i] == '--archive-month': i = i + 1; month = args[i] try: T = time.strptime(month, "%Y-%B") t = time.mktime(T) + 3600 * 24 * 10; analyzeArchives(t, force) except: print "Failed to index month archive:" print sys.exc_type, sys.exc_value elif args[i] == '--API': analyzeAPITop() elif args[i] == '--docs': analyzeHTMLTop() else: usage() i = i + 1 else: usage() if __name__ == "__main__": main()