Merge pull request #624 from ScrapeGraphAI/fix-import-errors

Fix ScreenshotScraper dynamic import errors
This commit is contained in:
Marco Vinciguerra 2024-09-02 15:11:21 +02:00 committed by GitHub
commit aed5452d39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 17 deletions

View File

@ -74,6 +74,7 @@ class SmartScraperGraph(AbstractGraph):
input="doc",
output=["parsed_doc"],
node_config={
"llm_model": self.llm_model,
"chunk_size": self.model_token
}
)

View File

@ -5,7 +5,6 @@ import asyncio
from io import BytesIO
from PIL import Image, ImageGrab
from playwright.async_api import async_playwright
import cv2 as cv
import numpy as np
from io import BytesIO
@ -42,6 +41,12 @@ def select_area_with_opencv(image):
A tuple containing the LEFT, TOP, RIGHT, and BOTTOM coordinates of the selected area.
"""
try:
import cv2 as cv
except ImportError:
raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.")
fullscreen_screenshot = ImageGrab.grab()
dw, dh = fullscreen_screenshot.size
@ -116,8 +121,12 @@ def select_area_with_ipywidget(image):
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, IntSlider
import ipywidgets as widgets
try:
from ipywidgets import interact, IntSlider
import ipywidgets as widgets
except:
raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.")
from PIL import Image
img_array = np.array(image)

View File

@ -1,24 +1,29 @@
"""
text_detection_module
"""
from surya.ocr import run_ocr
from surya.model.detection.model import (load_model as load_det_model,
load_processor as load_det_processor)
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor
def detect_text(image, languages: list = ["en"]):
"""
Detects and extracts text from a given image.
Parameters:
image (PIL Image): The input image to extract text from.
lahguages (list): A list of languages to detect text in. Defaults to ["en"]. List of languages can be found here: https://github.com/VikParuchuri/surya/blob/master/surya/languages.py
Returns:
str: The extracted text from the image.
Notes:
Model weights will automatically download the first time you run this function.
"""
Detects and extracts text from a given image.
Parameters:
image (PIL Image): The input image to extract text from.
lahguages (list): A list of languages to detect text in. Defaults to ["en"]. List of languages can be found here: https://github.com/VikParuchuri/surya/blob/master/surya/languages.py
Returns:
str: The extracted text from the image.
Notes:
Model weights will automatically download the first time you run this function.
"""
try:
from surya.ocr import run_ocr
from surya.model.detection.model import (load_model as load_det_model,
load_processor as load_det_processor)
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor
except:
raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.")
langs = languages
det_processor, det_model = load_det_processor(), load_det_model()