diff --git a/CHANGELOG.md b/CHANGELOG.md index a69adb32..abfc555d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,17 @@ -## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25) +## [1.8.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.8.0...v1.8.1-beta.1) (2024-07-04) + + +### Bug Fixes + +* add test ([3a537ee](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3a537eec6fef1743924a9aa5cef0ba2f8d44bf11)) + + +### Docs + +* **roadmap:** fix urls ([14faba4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/14faba4f00dd9f947f8dc5e0b51be49ea684179f)) +* **roadmap:** next steps ([3e644f4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3e644f498f05eb505fbd4e94b144c81567569aaa)) + +## [1.8.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.5...v1.8.0) (2024-06-30) ### Features @@ -16,6 +29,28 @@ * **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) * **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) +* **release:** 1.8.0-beta.1 [skip ci] ([bbfbbd9](https://github.com/VinciGit00/Scrapegraph-ai/commit/bbfbbd93be3c87c5f25e3c75ec7d677832d37467)) + +## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25) + + +### Features + +* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180)) +* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14)) + + + +### Bug Fixes + +* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f)) + + +### CI + +* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) +* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) + ## [1.7.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.3...v1.7.4) (2024-06-21) diff --git a/README.md b/README.md index 977243e3..11def085 100644 --- a/README.md +++ b/README.md @@ -191,9 +191,32 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra [![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/scrapegraphai) ## 📈 Roadmap -Check out the project roadmap [here](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/README.md)! 🚀 -Wanna visualize the roadmap in a more interactive way? Check out the [markmap](https://markmap.js.org/repl) visualization by copy pasting the markdown content in the editor! +We are working on the following features! If you are interested in collaborating right-click on the feature and open in a new tab to file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github! + +```mermaid +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%% +graph LR + A[DeepSearch Graph] --> F[Use Existing Chromium Instances] + F --> B[Page Caching] + B --> C[Screenshot Scraping] + C --> D[Handle Dynamic Content] + D --> E[New Webdrivers] + + style A fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style F fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style B fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style C fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style D fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style E fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + + click A href "https://github.com/VinciGit00/Scrapegraph-ai/issues/260" "Open DeepSearch Graph Issue" + click F href "https://github.com/VinciGit00/Scrapegraph-ai/issues/329" "Open Chromium Instances Issue" + click B href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Page Caching Issue" + click C href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Screenshot Scraping Issue" + click D href "https://github.com/VinciGit00/Scrapegraph-ai/issues/279" "Open Handle Dynamic Content Issue" + click E href "https://github.com/VinciGit00/Scrapegraph-ai/issues/171" "Open New Webdrivers Issue" +``` ## ❤️ Contributors [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/pyproject.toml b/pyproject.toml index 607d988e..8bfda917 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ name = "scrapegraphai" -version = "1.8.0b1" - +version = "1.8.1b1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index da159847..0ef10277 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -78,7 +78,11 @@ models_tokens = { "claude2": 9000, "claude2.1": 200000, "claude3": 200000, - "claude3.5": 200000 + "claude3.5": 200000, + "claude-3-opus-20240229": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-haiku-20240307": 200000, + "claude-3-5-sonnet-20240620": 200000 }, "vertexai": { "gemini-1.5-flash": 128000, diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 59c56975..97fed67b 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -84,7 +84,7 @@ class SearchInternetNode(BaseNode): You should return only the query string without any additional sentences. \n For example, if the user prompt is "What is the capital of France?", you should return "capital of France". \n - If yuo return something else, you will get a really bad grade. \n + If you return something else, you will get a really bad grade. \n USER PROMPT: {user_prompt}""" search_prompt = PromptTemplate( diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py index 6afc2ecb..85712ef6 100644 --- a/scrapegraphai/utils/parse_state_keys.py +++ b/scrapegraphai/utils/parse_state_keys.py @@ -101,18 +101,3 @@ def parse_expression(expression, state: dict) -> list: final_result.append(key) return final_result - - -EXPRESSION = "user_input & (relevant_chunks | parsed_document | document)" -state = { - "user_input": None, - "document": None, - "parsed_document": None, - "relevant_chunks": None, -} - -try: - result = parse_expression(EXPRESSION, state) - print("Matched keys:", result) -except ValueError as e: - print("Error:", e) diff --git a/tests/utils/parse_state_keys_test.py b/tests/utils/parse_state_keys_test.py new file mode 100644 index 00000000..d91355f1 --- /dev/null +++ b/tests/utils/parse_state_keys_test.py @@ -0,0 +1,21 @@ +""" +Parse_state_key test module +""" +import pytest +from scrapegraphai.utils.parse_state_keys import parse_expression + + +def test_parse_expression(): + """Test parse_expression function.""" + EXPRESSION = "user_input & (relevant_chunks | parsed_document | document)" + state = { + "user_input": None, + "document": None, + "parsed_document": None, + "relevant_chunks": None, + } + try: + result = parse_expression(EXPRESSION, state) + assert result != [] + except ValueError as e: + assert "Error" in str(e)