From ea7616f9be5b0f43623bb809c84f25f2b847ad65 Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Tue, 13 Feb 2024 20:42:06 +0100 Subject: [PATCH] dev: first graph implementation --- yosoai/graph/__init__.py | 5 ++ .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 533 bytes .../__pycache__/base_graph.cpython-311.pyc | Bin 0 -> 1830 bytes .../__pycache__/base_node.cpython-311.pyc | Bin 0 -> 1654 bytes .../conditional_node.cpython-311.pyc | Bin 0 -> 2311 bytes .../generate_answer_node.cpython-311.pyc | Bin 0 -> 3174 bytes .../get_probable_tags_node.cpython-311.pyc | Bin 0 -> 2947 bytes .../langgraph_example.cpython-311.pyc | Bin 0 -> 4775 bytes .../parse_html_node.cpython-311.pyc | Bin 0 -> 2534 bytes yosoai/graph/base_graph.py | 33 +++++++++ yosoai/graph/base_node.py | 28 ++++++++ yosoai/graph/conditional_node.py | 37 ++++++++++ yosoai/graph/generate_answer_node.py | 64 ++++++++++++++++++ yosoai/graph/get_probable_tags_node.py | 51 ++++++++++++++ yosoai/graph/parse_html_node.py | 45 ++++++++++++ 15 files changed, 263 insertions(+) create mode 100644 yosoai/graph/__init__.py create mode 100644 yosoai/graph/__pycache__/__init__.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/base_graph.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/base_node.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/conditional_node.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/generate_answer_node.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/get_probable_tags_node.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/langgraph_example.cpython-311.pyc create mode 100644 yosoai/graph/__pycache__/parse_html_node.cpython-311.pyc create mode 100644 yosoai/graph/base_graph.py create mode 100644 yosoai/graph/base_node.py create mode 100644 yosoai/graph/conditional_node.py create mode 100644 yosoai/graph/generate_answer_node.py create mode 100644 yosoai/graph/get_probable_tags_node.py create mode 100644 yosoai/graph/parse_html_node.py diff --git a/yosoai/graph/__init__.py b/yosoai/graph/__init__.py new file mode 100644 index 00000000..9c17c3a4 --- /dev/null +++ b/yosoai/graph/__init__.py @@ -0,0 +1,5 @@ +from .base_graph import BaseGraph +from .conditional_node import ConditionalNode +from .get_probable_tags_node import GetProbableTagsNode +from .generate_answer_node import GenerateAnswerNode +from .parse_html_node import ParseHTMLNode diff --git a/yosoai/graph/__pycache__/__init__.cpython-311.pyc b/yosoai/graph/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7de902811a02fa5431001b32ee20a37fc4570bba GIT binary patch literal 533 zcmZutJxc>Y5Z%q?a!DX6D)yEUv=Rgn(I~{i#6Yz0I9N8h;c~k7!S0IWm-rLJ|6yly z<&oCPLJ-nfIeR&gfWu?v&6{Q3?mo9#O@eX$`Uam^-%axym6FUmNS=sKd@4vpT}lnF zh)QI+R>3V%jcnI0cvaLQ$8`#Bi+a>>8^~)z(sn*5E^5tV>)xdi`t9bdIl}cUF#Dy2 zjAYW#nWxTSiU}vSs9CKU+plcC!BC938F`XQ=PaFHSoM6On5cN?5KUNf@> z60M~mdhj8KN+9LJA%`lYijYwM0`6Qk(Mq-E6p0fzBXi1$_h#dbqwqZYn~(SAy?I~$ zbzmS#fb2j1bP^gNf8nM}>dB&W9TwZfCbm=|YtphL5t-a4w)~9P%C1bvK70`^%eJ~i z4DD|T_J+jMcipO8_R4OxR4KT&V|4{_Mj50|4q%%&WLdHaXpo+fWd*e_tF{8DMLK!n zPKi19X{q+L)qNUXS5Cf;C)D6|U%E$jVYd%oL_RUQOX8EAuIcy;7rwj;=ld}a?!=Dy z$|~9s1n04Rn}EA=!HO?|1Tx=%uQLX)P0C>Kak3%#(kkpD@}-~Tt&2CsH7 zTwHgV3zN88{0c&4u3)V4we{oVyj`|D%U!E2ZcN5e%$sJpTJ}scKVNaJQiUz%PvxK( z2pa|1B!RvVjXm$V{d0B4jt@HMjiGT(807<|MV*r8n2x>TFs_M-VuA6CPSvC9X3Z^E zJwxRxb1F~hAaLj~0)~rW+;Tv402aKz`?`(c^DjnGwg9k6TH3zUxs>+3N!&bL!%=w2@iP!gJEJCc;X3l*GvouI!7$wD zd*nPFg6+neJ%0BF7Im{50O0({d>kgFMA1`8N{LEjR3sds+t--gO+R>#e!KHUw*(<-?3MM}Yvp1Wc zw)Cs7b>mPs_9hPW*-)QteteY9hUtmGm^)0*1?jn?@#$djDvZ|9wH^D#YA`el<7oVy z7oP<0eH4!0j=1eUGGV~7ogwKQSZR-vLA{w0MNlyMu*vX$$257;G}m0aUcq(BG{32r zDiMe3@Mckv6g{K;2wybFHw@D01Rm-$V45?vFZ~_mCCVNEv=vE`T4W|TJKE})1SZeJKX~@<|1iW`{saAo Bs5<}v literal 0 HcmV?d00001 diff --git a/yosoai/graph/__pycache__/base_node.cpython-311.pyc b/yosoai/graph/__pycache__/base_node.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c40e0a522a281778dcb8eb8703a8dc0dc0a87408 GIT binary patch literal 1654 zcmah|&u<$=6rR~#=ZD*bG!00FB8`%Ru~c$EAR&t%N=t=$z%7&_VOd>`cgD`bde@zq z1!DP7p&WAHo&!SS6oh)=Kj6fPi;*})D{2z1eSm zSX-+SXz%{~tNh0$Ftx^o8ufgD$2qI`i`m{y+PRpUhA@39691`J5dIEc&z?Zj{ zE4=%pUik~%x|6wkH*aO@yrYfcf$2-LpNODXYdf!Srmw@`m`Kv1g0vh#r7Ilic0J)9 zdM!^@gm*|zV9k?a10k&?Q30%_Y~?1`vY7~(t;S&enD=Kxb1;xW$qu$+W#~`ufjK54 zIwC%d@-_n8FQ~d|7Ip|{%;= z_9a7d?90&XGtSaDe3VKS3K^TQ8%o7^EFi9dFZv>1jr@13Wx_qx(~Z(Yl(Jy<3Y>Ah z!9E6{i^=gwST|86kKKz|5Nk1u^^H;k5YMCsYEr{oo-%%#R@ez=6_Pp&KAVwAzHR^9)I|TJrtX=pK8N8lGQL& zL%Ox|%WF4S4^V4owAQSe|q=+-B)?oK1g%|9TL&)4k*?h*6!dSTk-u6l;ZotD>EhU z&O&%U^xI$_lgSI@#hoMXoAuL6TjT3*jxXW+$L96%%Y6LcAzJ(Xw%Pf8vopTjIlZ=x ztB>HBLIM($S_KDnj3rE$z`zZsaFd+jFQqDOe4k9*NLbm3 z1@PvNP>WiJ!m^6xrLl6x)N>eG&iBgUMtRspWDBPCDi}EH4YIm+32b-_J+I1kupj)p+Zx|g}U6DS1BIa&PwaynaiE3fAr l^%AT@U07lhmr{C4cE-2rKV!-g`xincu0{X= literal 0 HcmV?d00001 diff --git a/yosoai/graph/__pycache__/conditional_node.cpython-311.pyc b/yosoai/graph/__pycache__/conditional_node.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bcb4a7612fd50e5ec74f58c45a31d49e1e7498b6 GIT binary patch literal 2311 zcmaJ?O=u)V6t141&CKL?6EN8&l0r61229pmCJ@xP#zljhtR%XNlaZ#kyC&(R(>+$z zxJj5pkOi}lg9jzSivb}j@hW)mu-*jsFbN}uh6TY>ZjmU+o_w!*dVZ2<&CIKQRjSJ^c>E=h`-r7(sRH!Mq`9wK9GMm=vv! zDHl>S9DAJ!Di;}bp?LIxEfDz0Bj3d#B}X?pllU9iWgPj zf?)x0*Ot`sATZaH4W*bhY{AYH~YwAWWwz7v``P=E{cP{jOT| z4M)}G7;n@1a0BE@v0j~v;bzj8{J1C<821yp4mFgz?kB<%^P>)P{Zu_>KUGIh$NiXK zPM)`dTiyY}PvY^Yv-Xp^ZaRh#x-RfaVtF^7y)`+WdrvSfa?@l$W}TbxIR#W=*7_C{Oi!`H6L{DrC9(u_FiP6Rd|+udY375E2+zn` z?*az^QC$gPnlS6D&HWE7p*V|oLj)lS0+ZJ5muj3ghlq8X=*2eam@&}E9+OB6B5Vdx zV5Xh7p*xT_pi>H*3Am!Pv4a(j072W>wTf}VHl&7WdfbAZb=DeM<~(~SCZI!>ZDPk` zZ<9e%dPnR-uz+k@ueS!e~vwAuZ-V4$lR-B?v=Ci zhne|uX8x$B_p^!J(Mr!?rRPSu?Z)qCblw9V(i+E`2+RPKfJ6=sKWWhE;)oW3Q1c7 z`76{A3<1Uhc^R;x2ATnS7_tuOhCcRbSfBeM83HU22ryvSL*G=S1%f{9eg1ktn@Vp?JVa>Z)y~;S~~uk>7irb zePWf$_AOj08=8%w=lpeQnKvqSxnjSs5sFD6fC^*>`S}dAe-HdH%5yr(12Y$Ga60!e zp6B(*LzEX>w8!N|SY7ajRKC)K=H8VY+Qw<#n*= zU0kr~`t2JxZ_I1O^?i$4@M36meKWL#s`;i+S#PeqPh@~S7(Z*LT!&y32GCbQym)|~ zNy!gmk7CDC`bbJ2j@I64NV838wl2-KUb*NBk0cj<&tWVobPWIv2H~4|4to2I4CVvF zKJ6NpLvb*m9vNjIe$n|}y&5^C6He%ObLL0LKC$j4L`JK^Ntf!PBYXnk`X~PMs=q^e ztSbCg*yVQ79)BC{a{VjSPT+Q!A*4%stl#}r@&B3^KQk}tL<#Rit1&a?h)@%u7IFA$ z%#QWe>krAQYEPM0?$deVs(xz7grlec=#Ip?6e%xiu* z00xf4WL0+L+6g|L5ulGjDhxi3^xL(XJY@~3H=|e6&pXNDnK*DbNSu^*4?-gWaqpK- zm{LVE;bA$g+<}utS1ePpx3E%yyvR_cXu>H119ZQ$%#IUX(XB$IgiSlwh3i(9$R=Iv zw%AhChErovxdoVo3L!8p=u)f=A9Isv<*hjd@0B6t0Nl2ef@RuT(d>f>Xe$}U3lbh! zWqewhfRozclu>qz_EynUGTV5cW?n*Z4cirBo?^-9RmwV0)kAs=M#%)GIR!e`VOTUb zgFS>tf*zW(x9Y)#`T6;k<<;f2rCZC&((29MEU!V2CopziqPH7Q>g?bkwoHgc7L|ix z#k6c?!>WM!VMo3tbF1FqsV#eh1RHoqGi|l7RWY}zmIYVewknE7u%aou_zo@FSfK?l zHTao%b04%@+74FkRwy{c*91P^unfc6WjkYTVD8$04@?gxr*x1_Ygf_ecHpxxhBW0~ z1yj2SQKa3mAaqzPb%45yhUmEv#W_S0rNq~Wl_1`p;bLI$cu#lp41uSz48)T@X3+v@GI*L`XQ+9Xr!p!F);N~ z;1s*48NIG-gs?M}jDhf?fqk-ZPb`z7X?xPUc;EN4$8Tbrup`17ftvb~l?l611Yfjh z(cH1Nu@?=3H9J5C*i6xn4RV2vyU02?Oh#BE%><+2CBYkzHL4#xgp4!Ags-55EnL#n zvVV{8`wvtS|+ZxjD65m&V(IFg)x^-^pk+)4DL(nw+P5@n)n8R z_?8Jb*&aaY%QgGYdySFTKc8=m{M?mWsmbHirK8lP&qw}ww|@Bpcp9m-W@^oi0>Suz zyYVP_EKM9q6Ng)$M(dNWH>4{~=}KL?(i$9gWjYLI|HDKM{XMai;=dSPlKC%X5vo~^ zurEh8%I;0+Jk_D3o7cmGI15V4S7r9bYUj@&`GUwyMH$(HSQ zwEHyk=IwiLzV~L{{3(@6AZXYB@h|!|M(E#iFl_pu^Xgsbe26S$DGn+tc|}21#8RC| zSos zxXHY7MQqY?#W4j1&V0@*m(AO>VlrS@>oylRs-jXAH%-PVD};(eI<)iZB8-0t-cpoT zER+X7o2aN*%Hw!mwIYvEUTdNUN?r$FH2=&sD!}Yr^R7cT%@UXX6o#s#Vhvt7ew7FE zJLN+pr2B*R6#kzL2dH&uX2eAGh^0J@fE>JGQ&3&0BV#nS5V#WA@HH2VXH{S4)e2=k zHVoUfg<<%r${QlP|-OHso$%hZn>M!_*THw-R)C4@ZsY9qI@^%DSyZ(XM) zb7Sj=lM+<^)-MMl5#-eF&?RwmUKdFsxm8dW(11t;yU>K6WRM}-oP`r2U z1gMsVK1M-;F93P{2))FqU&WrpUf}d|oZg>nz1zW;y7*EXU+SHiZE8<&6MnBi78OPq zz#s;1fb`!1$8nqiLEt?b;j5rHU}#0=q>`Xn>OsE|IWngmnp542-$de&-3y4#HMGM~ zwe-67dr07q)uUE_fvi|V`?*$AYUqJ_2h|iY+#=w`cs4Zjy52gRNdr%h$we*Ph>7@U z8r5S~yrk5lb=69I6#4Yj_%>?bIa4O7Um1<0H7?+!APbgV z5ZM*7xlKu-%9vCf&Ui&4fLUT@W&1L*-GWoKY_~)*LkBXKNk$rm%?Ae!j7--84s$Z3 zW{YjxC7B(%%QGi*$5zPRG;uva zie44g2rCO9TG}u#Lip}@RbnyZUd;i0H9F@_?d$2xL%DD({Tbg#cl5`%ij>&l0ZqZ|9Q>KA5 zBUz!&L&@&_X!R#c~XQ`BWk<%y&dXD!%WPcViWR9JqSCS zk#e(=p-?~N1$EW{w_q=@w4D38oJ)WcvPwXq4*RCdYf!CH!vqZ~2R{X}!R8MMo0Q+i z=mT1)3cB~kNu^{kDQ_cu0TS>L`f?VS8n3%)Nej`fMLX;!_9YWpc7T_4fWMknrgwm%GX3c5=CwoO}(f zuY)}Ne~HoDcRT5Ky6JbCsowmh7xTI2^SRFaT6cb}jo;{Nk?CX;gVgMW-pu*l`ON-$ z?>q0ce|o1qpNFR((`FN&BOqT$(oFOx&@5?P_~Sxn=B-bD)|pvpCVJ^_yhvxCr?a1| z{B^JW*40ir*G=b|ssG?qd*aPD{#I)d$mr=u0fA=mN$LeY_Z*+wFSg3<^UEE4rHik$ z@s-}`>1L8o17g29w}k$ZSe;b=Hoclu|C!XGn^hQG=ukE)uM+aE09T2GVU#_q>d1c5 zFg~c7PB;_Pc$Sr6N)2ul1?o740b0H$Ln4ds4}v%bS_2|ei?0KLsz3boBN1Ij;C~RD z)o1$X{}DC?3Ip{U*_a=f^&q&2FctXzq+`0J!Zw_WOJNrBAi@3g0e>uYd9XVIb$>>o zYMF~);qQewe4l+AhH`P-1JYL&Md_h8+v86UU2cy*J(O#YKYeXUfu$WM`wO2-?iVLa H1rGfe(U=aZ literal 0 HcmV?d00001 diff --git a/yosoai/graph/__pycache__/langgraph_example.cpython-311.pyc b/yosoai/graph/__pycache__/langgraph_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8925ccbbbbd69a7fdda63bb15b4fefbb88d3d98 GIT binary patch literal 4775 zcma(U*=`%jv3Xw-sl%2?OO~ix)Ir&jWm!tHYwEOoNQy=8G0kUE)8Da?fF*d|7p2;YR!-nDbX`R1Pj$_MLcAxdkhqqS06 zTOF;P(yr9eu2Nb@#>RBn5pKd=uWbl@0dM7Dx^ef(_483}{~88gR0&KE_^DIum7+{k z>SOwB$d1+!AAW)O2=06B09kk|57Wp?H?^Wj^}=~ z>=3?liR^sU$QlIBt1jlKwF@-@RTOsi6Mm2%=O;e1*L#}IA))-%L*y^9OAN>)Sa|&;bVt_fl3>BrayR{2-!l=CGVtw^}Owij=}U zFUT1UBtr@9Ku+J&CGnxc@pN)<3+CqZB$jd_r(;mkv!qB8w~ceKnvVI0f~Ifg^;}-x z;#3W*R@W9M^PINK>D;QyNw}5?YqZ?g6uE&GUc_8p7k2aFwvx{!RZiA+6;%QQ?3QC? zW7+a=>#)|E%H^__Q&;oYy7Dlwv9@$CvA)43A19X9H`dtqrVo3wx}MV(rlz=@5Z5F@ z&&EYg&ZM)PAji{+WVKyP1HD_D>l;aHP}Ep@ z@eJ%JpNb30R8Cd)a9Y=r83b6S4tbL#y=-5-VLG0v%2!?LbL^(>BRx7K(_c6Ijs`+Flb-HI#`zCQX)=r8Epfs0dg} z46A3<(2^qa0`HW>;^3TC1s+ap)$SkB%JLM-~oU{>jNnztyazvsmKJ%#AHuc0~iD1qpUutsg4M zB&)MJj_XinS?#ROs!qaEs=>e&lO1|yY0Fk*~m4Rhc z9B&Y8A~LaBne*LLP0=sPw5B!_`4Ew>)BxJ5L9$$ku-v&E4d|71?saiD{O* zvLDMyddmsAfuvzquH_OSsWr<1CDn30<-|Nz2gvHQ>opum}uO%3{2Eqpuf|h@q+z@-w5;-QNKB`Tp9EhQPk|eR~hsaQLov5 zCPoLn$qJ{dh`LX`lZGc=aF*T3A35rMb*1R*DfxN|Zqw8Jn~#6}@vlC4@kzmXiu#Sq zpzQS8B4zY;a~%1GP`|rit$L9Sl=RhtiRO%YFwvM%4<@>0)Psp8je0nBw-okXZT_S6 zU%cUdP;@^ixgQi9r;!OGGV%GgF}+fZtd=6H2I??72970T@K&*7w$w3epmwt}W^~3r zA26ow7CRHA&V+%knC<0+TS-&%W1IlNI3QOnBK7I?J~XH zWuGhHhU?KEHTrHE-f7d! z{04==e?>G|qTZ&`J#SEt>FhL}?Wfnq%zF|ywnymVEt zX0C|tl+Yam-Kn!?yNF^X6f;l^tnq}ta(BFOcNE>>k~>^*0O^N4*!qXP-|fBXCn#!@8e literal 0 HcmV?d00001 diff --git a/yosoai/graph/__pycache__/parse_html_node.cpython-311.pyc b/yosoai/graph/__pycache__/parse_html_node.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40845be956f538cb0223fe666f6ef345e0ef9588 GIT binary patch literal 2534 zcmaJ?&2JM&6rc6_V;!6rLV!d{!bnhDhs3SaAWB-HBtS`2a3T_ra8oVI?%3I4@47SV z=0i?}Dx|s+2U@iPsY3FJex!%~1*rF4){&(|DG z-tT?vyq`KdI}o%hkN+e?5rqEY6TgL;gY7XG+(#PHgghz;89_i=Koj$Uf|wBn6hIe| z7QBPB5D^}U2(824Ynh-HoaLZR5S2mcBS2hlp@K^gs12l@paJTV z0Vl*tMM51(mUTn7WH~81K}PafC(1X-hEgC-RF>7e!k8>G&IjY$FD{-NTe!*yWeb-{ zPDwAEC+wDG78hoz`593yws3uBZe~Q$7gkJW!i$5kg(A0AUbG5%nF~_I6}k@qR}Z&2 zA?0)7k!8oA+qsRNNS)tA?nXAH-VLeuL0|Qwnsl-*owWUD8@8b~_k}^=y6-!1PP_BK zEg)NOIZZ%O?yNu`2XUJwz8tRwj1$L@)p8<2#}VyY6L*J03zo$nA-5iidy&6FT6j(T zQd|<2(6V?PEeY+Ks=IJ~?FeZRE!-ZuwO~0&Pg#)terr#GwNN=!u1!7n&#GCLLYuI#8GlDr_ z3eNyIUD6pD#*|nkYP2853d1@}wJ=(P37TVLtr_R+^1*Lwx@slI@H}u> z#6IRrqN{x)BO|jDSLQBYe)-|^Gv}^ezL1^=sT1V3I+A~AHFAMclaArl9&DHvq(}*z z06y_D0`4*%TDYg<{4HCiOqMOGsMhN7^Ua*_dANo$h=iWic`SHJtd6El+?1)V2p6gO zg{~3pgJzC!1AR8l3~}On+?5PFp(51{%aJ}ND=uS4%n^(7@4`6ss(CqGNl4^=2erN}} z+vs^eigwvu$(l4&mxgRF3+Og5C*nu5rjS%eT?+(7kJS*EZvLBaW+wf9|QpQ}uYN5`4NZ z-ayf4-`@kLY6GY11E(wLr-Q?rgX0^6>~zR-!PH85K90%q=OrcYt++QUF#p#G2%+HydQ%sED|D!V{;#9^ z0nmj{Cqqt@Uvq9brM%iX$-H9Z)J3>oWYsJbN>J5TQhxI8&L!q%ri=3n@-TTay^uci v9B`H 0: + return self.next_nodes[0].node_name + else: + return self.next_nodes[1].node_name \ No newline at end of file diff --git a/yosoai/graph/generate_answer_node.py b/yosoai/graph/generate_answer_node.py new file mode 100644 index 00000000..75bfe9c8 --- /dev/null +++ b/yosoai/graph/generate_answer_node.py @@ -0,0 +1,64 @@ +from .base_node import BaseNode +from langchain.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser + +class GenerateAnswerNode(BaseNode): + def __init__(self, llm, node_name="GenerateAnswerNode"): + super().__init__(node_name, "node") + self.llm = llm + # Initialize any other configurations for the LLM here + + def execute(self, state): + """ + Generates an answer based on the user's input and the parsed document. + + Args: + state: The current state of the graph, expected to contain + 'user_input' and 'parsed_document' within 'keys'. + + Returns: + The updated state with 'answer' within 'keys', containing the generated answer. + """ + + print("---GENERATE ANSWER---") + try: + user_input = state["keys"]["user_input"] + document = state["keys"]["document"] + except KeyError as e: + print(f"Error: {e} not found in state.") + raise + + parsed_document = state["keys"].get("parsed_document", None) + relevant_chunks = state["keys"].get("relevant_chunks", None) + + # Use relevant chunks if available, otherwise use the parsed document or the original document + if relevant_chunks: + context = relevant_chunks + elif parsed_document: + context = parsed_document + else: + context = document + + output_parser = JsonOutputParser() + format_instructions = output_parser.get_format_instructions() + + template = """You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a question about the content you have scraped.\n {format_instructions} \n The content is as follows: + {context} + + Question: {question} + """ + + schema_prompt = PromptTemplate( + template=template, + input_variables=["context", "question"], + partial_variables={"format_instructions": format_instructions}, + ) + + # Chain + schema_chain = schema_prompt | self.llm | output_parser + answer = schema_chain.invoke({"context": context, "question": user_input}) + + # Update the state with the generated answer + state["keys"].update({"answer": answer}) + return state + diff --git a/yosoai/graph/get_probable_tags_node.py b/yosoai/graph/get_probable_tags_node.py new file mode 100644 index 00000000..d96999ba --- /dev/null +++ b/yosoai/graph/get_probable_tags_node.py @@ -0,0 +1,51 @@ +from .base_node import BaseNode +from langchain.prompts import PromptTemplate +from langchain.output_parsers import CommaSeparatedListOutputParser + +class GetProbableTagsNode(BaseNode): + def __init__(self, llm, node_name="GetProbableTagsNode"): + super().__init__(node_name, "node") + self.llm = llm + + def execute(self, state): + """ + Identifies probable HTML tags from a document based on a user's question. + + Args: + state (dict): The current state of the graph, including 'document', 'user_input', and 'url' within 'keys'. + + Returns: + dict: The updated state with a new key 'tags' within 'keys' containing probable HTML tags. + """ + + print("---GET PROBABLE TAGS---") + # Accessing the nested structure + try: + user_input = state["keys"]["user_input"] + url = state["keys"]["url"] + except KeyError as e: + print(f"Error: {e} not found in state.") + raise + + output_parser = CommaSeparatedListOutputParser() + format_instructions = output_parser.get_format_instructions() + + template = """You are a website scraper that knows all the types of html tags. You are now asked to list all the html tags where you think you can find the information of the asked question.\n {format_instructions} \n The webpage is: {webpage} \n The asked question is the following: + {question} + """ + + tag_prompt = PromptTemplate( + template=template, + input_variables=["question"], + partial_variables={"format_instructions": format_instructions, "webpage": url}, + ) + + # Execute the chain to get probable tags + tag_answer = tag_prompt | self.llm | output_parser + probable_tags = tag_answer.invoke({"question": user_input}) + + print("Possible tags: ", *probable_tags) + + # Update the nested 'keys' dictionary with probable tags + state["keys"].update({"tags": probable_tags}) + return state \ No newline at end of file diff --git a/yosoai/graph/parse_html_node.py b/yosoai/graph/parse_html_node.py new file mode 100644 index 00000000..f8f13da5 --- /dev/null +++ b/yosoai/graph/parse_html_node.py @@ -0,0 +1,45 @@ +from .base_node import BaseNode +from langchain_community.document_transformers import BeautifulSoupTransformer + +class ParseHTMLNode(BaseNode): + def __init__(self, node_name="ParseHTMLNode"): + super().__init__(node_name, "node") + + def execute(self, state): + """ + Checks for the 'tags' key in the state. If it exists, parses the document + based on these tags. Otherwise, returns the document as is. + + Args: + state (dict): The current state of the graph, expected to contain + 'document' within 'keys', and optionally 'tags'. + + Returns: + dict: The updated state with 'parsed_document' within 'keys', + containing either the original or parsed document. + """ + + print("---PARSE HTML DOCUMENT---") + try: + document = state["keys"]["document"] + except KeyError as e: + print(f"Error: {e} not found in state.") + raise + + # Check if tags are specified in the state + tags = state["keys"].get("tags", None) + + if tags: + # Initialize the BeautifulSoupTransformer with any required configurations + bs_transformer = BeautifulSoupTransformer() + # Parse the document with specified tags + parsed_document = bs_transformer.transform_documents(document, tags_to_extract=tags) + print("Document parsed with specified tags.") + else: + # If no tags are specified, return the document as is + print("No specific tags provided; returning document as is.") + return state + + # Update the state with the parsed document + state["keys"].update({"parsed_document": parsed_document}) + return state \ No newline at end of file