mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
Merge branch 'main' into refactoring_examples_folder
This commit is contained in:
commit
b4e640879d
8
poetry.lock
generated
8
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
@ -1117,13 +1117,13 @@ extended-testing = ["lxml (>=5.1.0,<6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "langsmith"
|
||||
version = "0.1.40"
|
||||
version = "0.1.41"
|
||||
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "langsmith-0.1.40-py3-none-any.whl", hash = "sha256:aa47d0f5a1eabd5c05ac6ce2cd3e28ccfc554d366e856a27b7c3c17c443881cb"},
|
||||
{file = "langsmith-0.1.40.tar.gz", hash = "sha256:50fdf313741cf94e978de06025fd180b56acf1d1a4549b0fd5453ef23d5461ef"},
|
||||
{file = "langsmith-0.1.41-py3-none-any.whl", hash = "sha256:11de22b6990502c630fdfdf6906681e664c6659d2118bcd2b79d08016e770831"},
|
||||
{file = "langsmith-0.1.41.tar.gz", hash = "sha256:1250cd6c9074ca10d40002b23d79b3017329b139fbe954248fdd7a79544e90d0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "scrapegraphai"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
|
||||
authors = [
|
||||
"Marco Vinciguerra <mvincig11@gmail.com>",
|
||||
|
||||
@ -59,7 +59,14 @@ class AbstractGraph(ABC):
|
||||
return Gemini(llm_params)
|
||||
|
||||
elif "ollama" in llm_params["model"]:
|
||||
# take the model after the last dash
|
||||
"""
|
||||
Avaiable models:
|
||||
- llama2
|
||||
- mistral
|
||||
- codellama
|
||||
- dolphin-mixtral
|
||||
- mistral-openorca
|
||||
"""
|
||||
llm_params["model"] = llm_params["model"].split("/")[-1]
|
||||
try:
|
||||
self.model_token = models_tokens["ollama"][llm_params["model"]]
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
Module for converting text to speach
|
||||
"""
|
||||
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
|
||||
from ..models import OpenAI, Gemini, OpenAITextToSpeech
|
||||
from ..models import OpenAITextToSpeech
|
||||
from .base_graph import BaseGraph
|
||||
from ..nodes import (
|
||||
FetchNode,
|
||||
|
||||
@ -24,6 +24,8 @@ models_tokens = {
|
||||
"ollama": {
|
||||
"llama2": 4096,
|
||||
"mistral": 8192,
|
||||
"codellama": 16000,
|
||||
"dolphin-mixtral": 32000,
|
||||
"mistral-openorca": 32000,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -67,8 +67,8 @@ class FetchNode(BaseNode):
|
||||
# Fetching data from the state based on the input keys
|
||||
input_data = [state[key] for key in input_keys]
|
||||
|
||||
source = input_data[0]
|
||||
|
||||
source = input_data[0]
|
||||
|
||||
# if it is a local directory
|
||||
if not source.startswith("http"):
|
||||
document = [Document(page_content=source, metadata={
|
||||
|
||||
@ -8,8 +8,8 @@ from langchain.retrievers import ContextualCompressionRetriever
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
|
||||
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI
|
||||
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
|
||||
from ..models import OpenAI, Ollama, AzureOpenAI
|
||||
from langchain_community.embeddings import OllamaEmbeddings
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user