mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
feat(gpt-4o): image to text single node test
This commit is contained in:
parent
d2877d89e5
commit
90955ca52f
@ -19,7 +19,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
@ -30,7 +30,7 @@ graph_config = {
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
|
||||
51
examples/single_node/image2text_node.py
Normal file
51
examples/single_node/image2text_node.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Example of ImageToTextNode
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.nodes import ImageToTextNode
|
||||
from scrapegraphai.models import OpenAIImageToText
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-4o",
|
||||
"temperature": 0,
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the node
|
||||
# ************************************************
|
||||
|
||||
llm_model = OpenAIImageToText(graph_config["llm"])
|
||||
|
||||
image_to_text_node = ImageToTextNode(
|
||||
input="img_url",
|
||||
output=["img_desc"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"headless": False
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Test the node
|
||||
# ************************************************
|
||||
|
||||
state = {
|
||||
"img_url": "https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/assets/scrapegraphai_logo.png?raw=true"
|
||||
}
|
||||
|
||||
result = image_to_text_node.execute(state)
|
||||
|
||||
print(result)
|
||||
@ -18,6 +18,7 @@ models_tokens = {
|
||||
"gpt-4-0613": 8192,
|
||||
"gpt-4-32k": 32768,
|
||||
"gpt-4-32k-0613": 32768,
|
||||
"gpt-4o": 128000,
|
||||
},
|
||||
"azure": {
|
||||
"gpt-3.5-turbo": 4096,
|
||||
|
||||
68
scrapegraphai/nodes/image_descriptor_node.py
Normal file
68
scrapegraphai/nodes/image_descriptor_node.py
Normal file
@ -0,0 +1,68 @@
|
||||
"""
|
||||
ImageDescriptorNode Module
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
class ImageDescriptorNode(BaseNode):
|
||||
"""
|
||||
Retrieve images from a list of URLs and return a description of the images using an image-to-text model.
|
||||
|
||||
Attributes:
|
||||
llm_model: An instance of the language model client used for image-to-text conversion.
|
||||
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||
|
||||
Args:
|
||||
input (str): Boolean expression defining the input keys needed from the state.
|
||||
output (List[str]): List of output keys to be updated in the state.
|
||||
node_config (dict): Additional configuration for the node.
|
||||
node_name (str): The unique identifier name for the node, defaulting to "ImageDescriptor".
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input: str,
|
||||
output: List[str],
|
||||
node_config: Optional[dict]=None,
|
||||
node_name: str = "ImageDescriptor",
|
||||
):
|
||||
super().__init__(node_name, "node", input, output, 1, node_config)
|
||||
|
||||
self.llm_model = node_config["llm_model"]
|
||||
self.verbose = False if node_config is None else node_config.get("verbose", False)
|
||||
self.max_images = 5 if node_config is None else node_config.get("max_images", 5)
|
||||
|
||||
def execute(self, state: dict) -> dict:
|
||||
"""
|
||||
Generate text from an image using an image-to-text model. The method retrieves the image
|
||||
from the list of URLs provided in the state and returns the extracted text.
|
||||
|
||||
Args:
|
||||
state (dict): The current state of the graph. The input keys will be used to fetch the
|
||||
correct data types from the state.
|
||||
|
||||
Returns:
|
||||
dict: The updated state with the input key containing the text extracted from the image.
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
input_keys = self.get_input_keys(state)
|
||||
input_data = [state[key] for key in input_keys]
|
||||
urls = input_data[0]
|
||||
|
||||
if len(urls) == 1 and not isinstance(urls, list):
|
||||
urls = [urls]
|
||||
elif len(urls) == 0:
|
||||
return state
|
||||
|
||||
img_desc = []
|
||||
for url in urls[:self.max_images]:
|
||||
text_answer = self.llm_model.run(url)
|
||||
img_desc.append(text_answer)
|
||||
|
||||
state.update({self.output[0]: img_desc})
|
||||
return state
|
||||
Loading…
Reference in New Issue
Block a user