diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e964c4..4d89d3f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,34 @@ -## [0.10.0-beta.6](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.10.0-beta.5...v0.10.0-beta.6) (2024-05-09) +## [0.11.0-beta.2](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.11.0-beta.1...v0.11.0-beta.2) (2024-05-10) +### Features + +* revert fetch_node ([864aa91](https://github.com/VinciGit00/Scrapegraph-ai/commit/864aa91326c360992326e04811d272e55eac8355)) + +## [0.11.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.10.0...v0.11.0-beta.1) (2024-05-10) + + +### Features + +* Add support for passing pdf path as source ([f10f3b1](https://github.com/VinciGit00/Scrapegraph-ai/commit/f10f3b1438e0c625b7f2fa52faeb5a6c12116113)) +* update info ([4ed0fb8](https://github.com/VinciGit00/Scrapegraph-ai/commit/4ed0fb89c3e6068190a7775bedcb6ae65ba59d18)) + + +### Bug Fixes + +* add json integration ([0ab31c3](https://github.com/VinciGit00/Scrapegraph-ai/commit/0ab31c3fdbd56652ed306e60109301f60e8042d3)) +* Augment the information getting fetched from a webpage ([f8ce3d5](https://github.com/VinciGit00/Scrapegraph-ai/commit/f8ce3d5916eab926275d59d4d48b0d89ec9cd43f)) +* fixed bugs for csv and xml ([324e977](https://github.com/VinciGit00/Scrapegraph-ai/commit/324e977b853ecaa55bac4bf86e7cd927f7f43d0d)) +* limit python version to < 3.12 ([a37fbbc](https://github.com/VinciGit00/Scrapegraph-ai/commit/a37fbbcbcfc3ddd0cc66f586f279676b52c4abfe)) + + +### CI + +* **release:** 0.10.0-beta.3 [skip ci] ([ad32298](https://github.com/VinciGit00/Scrapegraph-ai/commit/ad32298e70fc626fd62c897e153b806f79dba9b9)) +* **release:** 0.10.0-beta.4 [skip ci] ([548bff9](https://github.com/VinciGit00/Scrapegraph-ai/commit/548bff9d77c8b4d2aadee40e966a06cc9d7fd4ab)) +* **release:** 0.10.0-beta.5 [skip ci] ([28c9dce](https://github.com/VinciGit00/Scrapegraph-ai/commit/28c9dce7cbda49750172bafd7767fa48a0c33859)) +* **release:** 0.10.0-beta.6 [skip ci] ([460d292](https://github.com/VinciGit00/Scrapegraph-ai/commit/460d292af21fabad3fdd2b66110913ccee22ba91)) + ### Bug Fixes * add json integration ([0ab31c3](https://github.com/VinciGit00/Scrapegraph-ai/commit/0ab31c3fdbd56652ed306e60109301f60e8042d3)) @@ -8,8 +36,10 @@ ## [0.10.0-beta.5](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.10.0-beta.4...v0.10.0-beta.5) (2024-05-09) + ### Bug Fixes + * fixed bugs for csv and xml ([324e977](https://github.com/VinciGit00/Scrapegraph-ai/commit/324e977b853ecaa55bac4bf86e7cd927f7f43d0d)) ## [0.10.0-beta.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.10.0-beta.3...v0.10.0-beta.4) (2024-05-09) diff --git a/docs/source/getting_started/examples.rst b/docs/source/getting_started/examples.rst index 11fb5a05..b6e2eb36 100644 --- a/docs/source/getting_started/examples.rst +++ b/docs/source/getting_started/examples.rst @@ -44,9 +44,12 @@ Local models Remember to have installed in your pc ollama `ollama ` Remember to pull the right model for LLM and for the embeddings, like: + .. code-block:: bash ollama pull llama3 + ollama pull nomic-embed-text + ollama pull mistral After that, you can run the following code, using only your machine resources brum brum brum: diff --git a/pyproject.toml b/pyproject.toml index 78febc6d..da6364e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "scrapegraphai" -version = "0.10.0b6" +version = "0.11.0b2" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 51a66518..73363917 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -129,4 +129,4 @@ class FetchNode(BaseNode): ] state.update({self.output[0]: compressed_document}) - return state + return state \ No newline at end of file diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index 74c70f84..dde9971d 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -6,5 +6,6 @@ from .convert_to_csv import convert_to_csv from .convert_to_json import convert_to_json from .prettify_exec_info import prettify_exec_info from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers +from .remover import remover from .save_audio_from_bytes import save_audio_from_bytes from .sys_dynamic_import import dynamic_import, srcfile_import diff --git a/scrapegraphai/utils/remover.py b/scrapegraphai/utils/remover.py index 5e203249..c5a0507b 100644 --- a/scrapegraphai/utils/remover.py +++ b/scrapegraphai/utils/remover.py @@ -40,4 +40,4 @@ def remover(html_content: str) -> str: minimized_body = minify(str(body_content)) return "Title: " + title + ", Body: " + minimized_body - return "Title: " + title + ", Body: No body content found" + return "Title: " + title + ", Body: No body content found" \ No newline at end of file