update function

This commit is contained in:
Marco Vinciguerra 2024-07-22 16:50:58 +02:00
parent 09f983b14f
commit 2ae19aee56
7 changed files with 69 additions and 986 deletions

View File

@ -35,7 +35,8 @@ dependencies = [
"undetected-playwright==0.3.0",
"semchunk==1.0.1",
"html2text==2024.2.26",
"langchain-fireworks==0.1.3"
"langchain-fireworks==0.1.3",
"langchain-community==0.2.9"
]
license = "MIT"

View File

@ -1,585 +0,0 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
-e file:.
aiofiles==23.2.1
# via burr
aiohttp==3.9.5
# via langchain
# via langchain-community
# via langchain-fireworks
aiosignal==1.3.1
# via aiohttp
alabaster==0.7.16
# via sphinx
altair==5.3.0
# via streamlit
annotated-types==0.7.0
# via pydantic
anthropic==0.26.1
# via langchain-anthropic
anyio==4.3.0
# via anthropic
# via groq
# via httpx
# via openai
# via starlette
# via watchfiles
astroid==3.2.2
# via pylint
async-timeout==4.0.3
# via aiohttp
# via langchain
attrs==23.2.0
# via aiohttp
# via jsonschema
# via referencing
babel==2.15.0
# via sphinx
beautifulsoup4==4.12.3
# via furo
# via google
# via scrapegraphai
blinker==1.8.2
# via streamlit
boto3==1.34.113
# via langchain-aws
botocore==1.34.113
# via boto3
# via s3transfer
burr==0.22.1
# via scrapegraphai
cachetools==5.3.3
# via google-auth
# via streamlit
certifi==2024.2.2
# via httpcore
# via httpx
# via requests
charset-normalizer==3.3.2
# via requests
click==8.1.7
# via burr
# via streamlit
# via typer
# via uvicorn
contourpy==1.2.1
# via matplotlib
cycler==0.12.1
# via matplotlib
dataclasses-json==0.6.6
# via langchain
# via langchain-community
defusedxml==0.7.1
# via langchain-anthropic
dill==0.3.8
# via pylint
distro==1.9.0
# via anthropic
# via groq
# via openai
dnspython==2.6.1
# via email-validator
docstring-parser==0.16
# via google-cloud-aiplatform
docutils==0.19
# via sphinx
email-validator==2.1.1
# via fastapi
exceptiongroup==1.2.1
# via anyio
# via pytest
faiss-cpu==1.8.0
# via scrapegraphai
fastapi==0.111.0
# via burr
# via fastapi-pagination
fastapi-cli==0.0.4
# via fastapi
fastapi-pagination==0.12.24
# via burr
filelock==3.14.0
# via huggingface-hub
fireworks-ai==0.14.0
# via langchain-fireworks
fonttools==4.52.1
# via matplotlib
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.5.0
# via huggingface-hub
furo==2024.5.6
# via scrapegraphai
gitdb==4.0.11
# via gitpython
gitpython==3.1.43
# via streamlit
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.4
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
# via google-api-python-client
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via google-cloud-core
# via google-cloud-resource-manager
# via google-cloud-storage
# via google-generativeai
google-api-python-client==2.130.0
# via google-generativeai
google-auth==2.29.0
# via google-ai-generativelanguage
# via google-api-core
# via google-api-python-client
# via google-auth-httplib2
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via google-cloud-core
# via google-cloud-resource-manager
# via google-cloud-storage
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-cloud-aiplatform==1.58.0
# via langchain-google-vertexai
google-cloud-bigquery==3.25.0
# via google-cloud-aiplatform
google-cloud-core==2.4.1
# via google-cloud-bigquery
# via google-cloud-storage
google-cloud-resource-manager==1.12.3
# via google-cloud-aiplatform
google-cloud-storage==2.17.0
# via google-cloud-aiplatform
# via langchain-google-vertexai
google-crc32c==1.5.0
# via google-cloud-storage
# via google-resumable-media
google-generativeai==0.5.4
# via langchain-google-genai
google-resumable-media==2.7.1
# via google-cloud-bigquery
# via google-cloud-storage
googleapis-common-protos==1.63.0
# via google-api-core
# via grpc-google-iam-v1
# via grpcio-status
graphviz==0.20.3
# via burr
# via scrapegraphai
greenlet==3.0.3
# via playwright
groq==0.8.0
# via langchain-groq
grpc-google-iam-v1==0.13.1
# via google-cloud-resource-manager
grpcio==1.64.0
# via google-api-core
# via googleapis-common-protos
# via grpc-google-iam-v1
# via grpcio-status
grpcio-status==1.62.2
# via google-api-core
h11==0.14.0
# via httpcore
# via uvicorn
html2text==2024.2.26
# via scrapegraphai
httpcore==1.0.5
# via httpx
httplib2==0.22.0
# via google-api-python-client
# via google-auth-httplib2
httptools==0.6.1
# via uvicorn
httpx==0.27.0
# via anthropic
# via fastapi
# via fireworks-ai
# via groq
# via openai
httpx-sse==0.4.0
# via fireworks-ai
huggingface-hub==0.23.1
# via tokenizers
idna==3.7
# via anyio
# via email-validator
# via httpx
# via requests
# via yarl
imagesize==1.4.1
# via sphinx
importlib-metadata==8.0.0
# via sphinx
importlib-resources==6.4.0
# via matplotlib
iniconfig==2.0.0
# via pytest
isort==5.13.2
# via pylint
jinja2==3.1.4
# via altair
# via burr
# via fastapi
# via pydeck
# via sphinx
jiter==0.4.0
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
jsonpatch==1.33
# via langchain
# via langchain-core
jsonpointer==2.4
# via jsonpatch
jsonschema==4.22.0
# via altair
jsonschema-specifications==2023.12.1
# via jsonschema
kiwisolver==1.4.5
# via matplotlib
langchain==0.1.15
# via scrapegraphai
langchain-anthropic==0.1.11
# via scrapegraphai
langchain-aws==0.1.3
# via scrapegraphai
langchain-community==0.0.38
# via langchain
langchain-core==0.1.52
# via langchain
# via langchain-anthropic
# via langchain-aws
# via langchain-community
# via langchain-fireworks
# via langchain-google-genai
# via langchain-google-vertexai
# via langchain-groq
# via langchain-openai
# via langchain-text-splitters
langchain-fireworks==0.1.3
# via scrapegraphai
langchain-google-genai==1.0.3
# via scrapegraphai
langchain-google-vertexai==1.0.4
# via scrapegraphai
langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.2
# via langchain
langsmith==0.1.63
# via langchain
# via langchain-community
# via langchain-core
loguru==0.7.2
# via burr
lxml==5.2.2
# via free-proxy
markdown-it-py==3.0.0
# via rich
markupsafe==2.1.5
# via jinja2
marshmallow==3.21.2
# via dataclasses-json
matplotlib==3.9.0
# via burr
mccabe==0.7.0
# via pylint
mdurl==0.1.2
# via markdown-it-py
minify-html==0.15.0
# via scrapegraphai
multidict==6.0.5
# via aiohttp
# via yarl
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via altair
# via contourpy
# via faiss-cpu
# via langchain
# via langchain-aws
# via langchain-community
# via matplotlib
# via pandas
# via pyarrow
# via pydeck
# via sf-hamilton
# via shapely
# via streamlit
openai==1.30.3
# via burr
# via langchain-fireworks
# via langchain-openai
orjson==3.10.3
# via fastapi
# via langsmith
packaging==23.2
# via altair
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via huggingface-hub
# via langchain-core
# via marshmallow
# via matplotlib
# via pytest
# via sphinx
# via streamlit
pandas==2.2.2
# via altair
# via scrapegraphai
# via sf-hamilton
# via streamlit
pillow==10.3.0
# via fireworks-ai
# via matplotlib
# via streamlit
platformdirs==4.2.2
# via pylint
playwright==1.43.0
# via scrapegraphai
# via undetected-playwright
pluggy==1.5.0
# via pytest
proto-plus==1.23.0
# via google-ai-generativelanguage
# via google-api-core
# via google-cloud-aiplatform
# via google-cloud-resource-manager
protobuf==4.25.3
# via google-ai-generativelanguage
# via google-api-core
# via google-cloud-aiplatform
# via google-cloud-resource-manager
# via google-generativeai
# via googleapis-common-protos
# via grpc-google-iam-v1
# via grpcio-status
# via proto-plus
# via streamlit
pyarrow==16.1.0
# via streamlit
pyasn1==0.6.0
# via pyasn1-modules
# via rsa
pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
# via anthropic
# via burr
# via fastapi
# via fastapi-pagination
# via fireworks-ai
# via google-cloud-aiplatform
# via google-generativeai
# via groq
# via langchain
# via langchain-core
# via langsmith
# via openai
pydantic-core==2.18.2
# via pydantic
pydeck==0.9.1
# via streamlit
pyee==11.1.0
# via playwright
pygments==2.18.0
# via furo
# via rich
# via sphinx
pylint==3.2.5
pyparsing==3.1.2
# via httplib2
# via matplotlib
pytest==8.0.0
# via pytest-mock
pytest-mock==3.14.0
python-dateutil==2.9.0.post0
# via botocore
# via google-cloud-bigquery
# via matplotlib
# via pandas
python-dotenv==1.0.1
# via scrapegraphai
# via uvicorn
python-multipart==0.0.9
# via fastapi
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-core
# via uvicorn
referencing==0.35.1
# via jsonschema
# via jsonschema-specifications
regex==2024.5.15
# via tiktoken
requests==2.32.2
# via burr
# via free-proxy
# via google-api-core
# via google-cloud-bigquery
# via google-cloud-storage
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-fireworks
# via langsmith
# via sphinx
# via streamlit
# via tiktoken
rich==13.7.1
# via streamlit
# via typer
rpds-py==0.18.1
# via jsonschema
# via referencing
rsa==4.9
# via google-auth
s3transfer==0.10.1
# via boto3
semchunk==1.0.1
# via scrapegraphai
sf-hamilton==1.63.0
# via burr
shapely==2.0.4
# via google-cloud-aiplatform
shellingham==1.5.4
# via typer
six==1.16.0
# via python-dateutil
smmap==5.0.1
# via gitdb
sniffio==1.3.1
# via anthropic
# via anyio
# via groq
# via httpx
# via openai
snowballstemmer==2.2.0
# via sphinx
soupsieve==2.5
# via beautifulsoup4
sphinx==6.0.0
# via furo
# via scrapegraphai
# via sphinx-basic-ng
sphinx-basic-ng==1.0.0b2
# via furo
sphinxcontrib-applehelp==1.0.8
# via sphinx
sphinxcontrib-devhelp==1.0.6
# via sphinx
sphinxcontrib-htmlhelp==2.0.5
# via sphinx
sphinxcontrib-jsmath==1.0.1
# via sphinx
sphinxcontrib-qthelp==1.0.7
# via sphinx
sphinxcontrib-serializinghtml==1.1.10
# via sphinx
sqlalchemy==2.0.30
# via langchain
# via langchain-community
starlette==0.37.2
# via fastapi
streamlit==1.35.0
# via burr
tenacity==8.3.0
# via langchain
# via langchain-community
# via langchain-core
# via streamlit
tiktoken==0.7.0
# via langchain-openai
# via scrapegraphai
tokenizers==0.19.1
# via anthropic
toml==0.10.2
# via streamlit
tomli==2.0.1
# via pylint
# via pytest
tomlkit==0.12.5
# via pylint
toolz==0.12.1
# via altair
tornado==6.4
# via streamlit
tqdm==4.66.4
# via google-generativeai
# via huggingface-hub
# via openai
# via scrapegraphai
# via semchunk
typer==0.12.3
# via fastapi-cli
typing-extensions==4.12.0
# via altair
# via anthropic
# via anyio
# via astroid
# via fastapi
# via fastapi-pagination
# via google-generativeai
# via groq
# via huggingface-hub
# via openai
# via pydantic
# via pydantic-core
# via pyee
# via pylint
# via sf-hamilton
# via sqlalchemy
# via starlette
# via streamlit
# via typer
# via typing-inspect
# via uvicorn
typing-inspect==0.9.0
# via dataclasses-json
# via sf-hamilton
tzdata==2024.1
# via pandas
ujson==5.10.0
# via fastapi
undetected-playwright==0.3.0
# via scrapegraphai
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
# via botocore
# via requests
uvicorn==0.29.0
# via burr
# via fastapi
uvloop==0.19.0
# via uvicorn
watchfiles==0.21.0
# via uvicorn
websockets==12.0
# via uvicorn
yarl==1.9.4
# via aiohttp
zipp==3.19.2
# via importlib-metadata
# via importlib-resources

View File

@ -1,4 +0,0 @@
sphinx==7.1.2
furo==2024.5.6
pytest==8.0.0
burr[start]==0.22.1

View File

@ -1,366 +0,0 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
-e file:.
aiohttp==3.9.5
# via langchain
# via langchain-community
# via langchain-fireworks
aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.26.1
# via langchain-anthropic
anyio==4.3.0
# via anthropic
# via groq
# via httpx
# via openai
async-timeout==4.0.3
# via aiohttp
# via langchain
attrs==23.2.0
# via aiohttp
beautifulsoup4==4.12.3
# via google
# via scrapegraphai
boto3==1.34.113
# via langchain-aws
botocore==1.34.113
# via boto3
# via s3transfer
cachetools==5.3.3
# via google-auth
certifi==2024.2.2
# via httpcore
# via httpx
# via requests
charset-normalizer==3.3.2
# via requests
dataclasses-json==0.6.6
# via langchain
# via langchain-community
defusedxml==0.7.1
# via langchain-anthropic
distro==1.9.0
# via anthropic
# via groq
# via openai
docstring-parser==0.16
# via google-cloud-aiplatform
exceptiongroup==1.2.1
# via anyio
faiss-cpu==1.8.0
# via scrapegraphai
filelock==3.14.0
# via huggingface-hub
fireworks-ai==0.14.0
# via langchain-fireworks
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.5.0
# via huggingface-hub
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.4
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
# via google-api-python-client
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via google-cloud-core
# via google-cloud-resource-manager
# via google-cloud-storage
# via google-generativeai
google-api-python-client==2.130.0
# via google-generativeai
google-auth==2.29.0
# via google-ai-generativelanguage
# via google-api-core
# via google-api-python-client
# via google-auth-httplib2
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via google-cloud-core
# via google-cloud-resource-manager
# via google-cloud-storage
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-cloud-aiplatform==1.58.0
# via langchain-google-vertexai
google-cloud-bigquery==3.25.0
# via google-cloud-aiplatform
google-cloud-core==2.4.1
# via google-cloud-bigquery
# via google-cloud-storage
google-cloud-resource-manager==1.12.3
# via google-cloud-aiplatform
google-cloud-storage==2.17.0
# via google-cloud-aiplatform
# via langchain-google-vertexai
google-crc32c==1.5.0
# via google-cloud-storage
# via google-resumable-media
google-generativeai==0.5.4
# via langchain-google-genai
google-resumable-media==2.7.1
# via google-cloud-bigquery
# via google-cloud-storage
googleapis-common-protos==1.63.0
# via google-api-core
# via grpc-google-iam-v1
# via grpcio-status
graphviz==0.20.3
# via scrapegraphai
greenlet==3.0.3
# via playwright
groq==0.8.0
# via langchain-groq
grpc-google-iam-v1==0.13.1
# via google-cloud-resource-manager
grpcio==1.64.0
# via google-api-core
# via googleapis-common-protos
# via grpc-google-iam-v1
# via grpcio-status
grpcio-status==1.62.2
# via google-api-core
h11==0.14.0
# via httpcore
html2text==2024.2.26
# via scrapegraphai
httpcore==1.0.5
# via httpx
httplib2==0.22.0
# via google-api-python-client
# via google-auth-httplib2
httpx==0.27.0
# via anthropic
# via fireworks-ai
# via groq
# via openai
httpx-sse==0.4.0
# via fireworks-ai
huggingface-hub==0.23.1
# via tokenizers
idna==3.7
# via anyio
# via httpx
# via requests
# via yarl
jiter==0.4.0
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
jsonpatch==1.33
# via langchain
# via langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.1.15
# via scrapegraphai
langchain-anthropic==0.1.11
# via scrapegraphai
langchain-aws==0.1.3
# via scrapegraphai
langchain-community==0.0.38
# via langchain
langchain-core==0.1.52
# via langchain
# via langchain-anthropic
# via langchain-aws
# via langchain-community
# via langchain-fireworks
# via langchain-google-genai
# via langchain-google-vertexai
# via langchain-groq
# via langchain-openai
# via langchain-text-splitters
langchain-fireworks==0.1.3
# via scrapegraphai
langchain-google-genai==1.0.3
# via scrapegraphai
langchain-google-vertexai==1.0.4
# via scrapegraphai
langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.2
# via langchain
langsmith==0.1.63
# via langchain
# via langchain-community
# via langchain-core
lxml==5.2.2
# via free-proxy
marshmallow==3.21.2
# via dataclasses-json
minify-html==0.15.0
# via scrapegraphai
multidict==6.0.5
# via aiohttp
# via yarl
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via faiss-cpu
# via langchain
# via langchain-aws
# via langchain-community
# via pandas
# via shapely
openai==1.30.3
# via langchain-fireworks
# via langchain-openai
orjson==3.10.3
# via langsmith
packaging==23.2
# via google-cloud-aiplatform
# via google-cloud-bigquery
# via huggingface-hub
# via langchain-core
# via marshmallow
pandas==2.2.2
# via scrapegraphai
pillow==10.3.0
# via fireworks-ai
playwright==1.43.0
# via scrapegraphai
# via undetected-playwright
proto-plus==1.23.0
# via google-ai-generativelanguage
# via google-api-core
# via google-cloud-aiplatform
# via google-cloud-resource-manager
protobuf==4.25.3
# via google-ai-generativelanguage
# via google-api-core
# via google-cloud-aiplatform
# via google-cloud-resource-manager
# via google-generativeai
# via googleapis-common-protos
# via grpc-google-iam-v1
# via grpcio-status
# via proto-plus
pyasn1==0.6.0
# via pyasn1-modules
# via rsa
pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
# via anthropic
# via fireworks-ai
# via google-cloud-aiplatform
# via google-generativeai
# via groq
# via langchain
# via langchain-core
# via langsmith
# via openai
pydantic-core==2.18.2
# via pydantic
pyee==11.1.0
# via playwright
pyparsing==3.1.2
# via httplib2
python-dateutil==2.9.0.post0
# via botocore
# via google-cloud-bigquery
# via pandas
python-dotenv==1.0.1
# via scrapegraphai
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-core
regex==2024.5.15
# via tiktoken
requests==2.32.2
# via free-proxy
# via google-api-core
# via google-cloud-bigquery
# via google-cloud-storage
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-fireworks
# via langsmith
# via tiktoken
rsa==4.9
# via google-auth
s3transfer==0.10.1
# via boto3
semchunk==1.0.1
# via scrapegraphai
shapely==2.0.4
# via google-cloud-aiplatform
six==1.16.0
# via python-dateutil
sniffio==1.3.1
# via anthropic
# via anyio
# via groq
# via httpx
# via openai
soupsieve==2.5
# via beautifulsoup4
sqlalchemy==2.0.30
# via langchain
# via langchain-community
tenacity==8.3.0
# via langchain
# via langchain-community
# via langchain-core
tiktoken==0.7.0
# via langchain-openai
# via scrapegraphai
tokenizers==0.19.1
# via anthropic
tqdm==4.66.4
# via google-generativeai
# via huggingface-hub
# via openai
# via scrapegraphai
# via semchunk
typing-extensions==4.12.0
# via anthropic
# via anyio
# via google-generativeai
# via groq
# via huggingface-hub
# via openai
# via pydantic
# via pydantic-core
# via pyee
# via sqlalchemy
# via typing-inspect
typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.1
# via pandas
undetected-playwright==0.3.0
# via scrapegraphai
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
# via botocore
# via requests
yarl==1.9.4
# via aiohttp

View File

@ -7,6 +7,8 @@ from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from tqdm import tqdm
import asyncio
from ..utils.merge_results import merge_results
from ..utils.logging import get_logger
from ..models import Ollama, OpenAI
from .base_node import BaseNode
@ -109,42 +111,46 @@ class GenerateAnswerNode(BaseNode):
chains_dict = {}
if len(doc) == 1:
prompt = PromptTemplate(
template=template_no_chunks_prompt,
input_variables=["question"],
partial_variables={"context": doc,
"format_instructions": format_instructions})
chain = prompt | self.llm_model | output_parser
answer = chain.invoke({"question": user_prompt})
state.update({self.output[0]: answer})
return state
# Use tqdm to add progress bar
for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
if len(doc) == 1:
prompt = PromptTemplate(
template=template_no_chunks_prompt,
input_variables=["question"],
partial_variables={"context": chunk,
"format_instructions": format_instructions})
chain = prompt | self.llm_model | output_parser
answer = chain.invoke({"question": user_prompt})
break
prompt = PromptTemplate(
template=template_chunks_prompt,
input_variables=["question"],
partial_variables={"context": chunk,
"chunk_id": i + 1,
"format_instructions": format_instructions})
# Dynamically name the chains based on their index
template=template_chunks,
input_variables=["question"],
partial_variables={"context": chunk,
"chunk_id": i + 1,
"format_instructions": format_instructions})
# Add chain to dictionary with dynamic name
chain_name = f"chunk{i+1}"
chains_dict[chain_name] = prompt | self.llm_model | output_parser
if len(chains_dict) > 1:
# Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
map_chain = RunnableParallel(**chains_dict)
# Chain
answer = map_chain.invoke({"question": user_prompt})
# Merge the answers from the chunks
merge_prompt = PromptTemplate(
template = template_merge_prompt,
input_variables=["context", "question"],
partial_variables={"format_instructions": format_instructions},
)
merge_chain = merge_prompt | self.llm_model | output_parser
answer = merge_chain.invoke({"context": answer, "question": user_prompt})
# Update the state with the generated answer
state.update({self.output[0]: answer})
async def process_chains():
async_runner = RunnableParallel()
for chain_name, chain in chains_dict.items():
async_runner.add(chain.ainvoke([{"question": user_prompt}] * len(doc)))
batch_results = await async_runner.run()
return batch_results
loop = asyncio.get_event_loop()
batch_answers = loop.run_until_complete(process_chains())
# Merge batch results (assuming same structure)
merged_answer = merge_results(batch_answers)
answers = merged_answer
state.update({self.output[0]: answers})
return state

View File

@ -11,3 +11,4 @@ from .sys_dynamic_import import dynamic_import, srcfile_import
from .cleanup_html import cleanup_html
from .logging import *
from .convert_to_md import convert_to_md
from .merge_results import merge_results

View File

@ -0,0 +1,30 @@
def merge_results(batch_answers):
"""
Merges the results from single-chunk processing and batch processing, and adds separators between the chunks.
Parameters:
-----------
answers : list of str
A list of strings containing the results from single-chunk processing.
batch_answers : list of dict
A list of dictionaries, where each dictionary contains a key "text" with the batch processing result as a string.
Returns:
--------
str
A single string containing all merged results, with each result separated by a newline character.
Example:
--------
>>> answers = ["Result from single-chunk 1", "Result from single-chunk 2"]
>>> batch_answers = [{"text": "Result from batch 1"}, {"text": "Result from batch 2"}]
>>> merge_results(answers, batch_answers)
'Result from single-chunk 1\nResult from single-chunk 2\nResult from batch 1\nResult from batch 2'
"""
# Combine answers from single-chunk processing and batch processing
merged_answers = [answer["text"] for answer in batch_answers]
# Add separators between chunks
merged_answers = "\n".join(merged_answers)
return merged_answers