mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
This commit is contained in:
commit
104d8692d6
37
CHANGELOG.md
37
CHANGELOG.md
@ -1,4 +1,17 @@
|
||||
## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25)
|
||||
## [1.8.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.8.0...v1.8.1-beta.1) (2024-07-04)
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* add test ([3a537ee](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3a537eec6fef1743924a9aa5cef0ba2f8d44bf11))
|
||||
|
||||
|
||||
### Docs
|
||||
|
||||
* **roadmap:** fix urls ([14faba4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/14faba4f00dd9f947f8dc5e0b51be49ea684179f))
|
||||
* **roadmap:** next steps ([3e644f4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3e644f498f05eb505fbd4e94b144c81567569aaa))
|
||||
|
||||
## [1.8.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.5...v1.8.0) (2024-06-30)
|
||||
|
||||
|
||||
### Features
|
||||
@ -16,6 +29,28 @@
|
||||
|
||||
* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822))
|
||||
* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6))
|
||||
* **release:** 1.8.0-beta.1 [skip ci] ([bbfbbd9](https://github.com/VinciGit00/Scrapegraph-ai/commit/bbfbbd93be3c87c5f25e3c75ec7d677832d37467))
|
||||
|
||||
## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180))
|
||||
* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14))
|
||||
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f))
|
||||
|
||||
|
||||
### CI
|
||||
|
||||
* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822))
|
||||
* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6))
|
||||
|
||||
|
||||
## [1.7.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.3...v1.7.4) (2024-06-21)
|
||||
|
||||
|
||||
27
README.md
27
README.md
@ -191,9 +191,32 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra
|
||||
[](https://twitter.com/scrapegraphai)
|
||||
|
||||
## 📈 Roadmap
|
||||
Check out the project roadmap [here](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/README.md)! 🚀
|
||||
|
||||
Wanna visualize the roadmap in a more interactive way? Check out the [markmap](https://markmap.js.org/repl) visualization by copy pasting the markdown content in the editor!
|
||||
We are working on the following features! If you are interested in collaborating right-click on the feature and open in a new tab to file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github!
|
||||
|
||||
```mermaid
|
||||
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%%
|
||||
graph LR
|
||||
A[DeepSearch Graph] --> F[Use Existing Chromium Instances]
|
||||
F --> B[Page Caching]
|
||||
B --> C[Screenshot Scraping]
|
||||
C --> D[Handle Dynamic Content]
|
||||
D --> E[New Webdrivers]
|
||||
|
||||
style A fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
style F fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
style B fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
style C fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
style D fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
style E fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
|
||||
|
||||
click A href "https://github.com/VinciGit00/Scrapegraph-ai/issues/260" "Open DeepSearch Graph Issue"
|
||||
click F href "https://github.com/VinciGit00/Scrapegraph-ai/issues/329" "Open Chromium Instances Issue"
|
||||
click B href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Page Caching Issue"
|
||||
click C href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Screenshot Scraping Issue"
|
||||
click D href "https://github.com/VinciGit00/Scrapegraph-ai/issues/279" "Open Handle Dynamic Content Issue"
|
||||
click E href "https://github.com/VinciGit00/Scrapegraph-ai/issues/171" "Open New Webdrivers Issue"
|
||||
```
|
||||
|
||||
## ❤️ Contributors
|
||||
[](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors)
|
||||
|
||||
@ -2,8 +2,7 @@
|
||||
name = "scrapegraphai"
|
||||
|
||||
|
||||
version = "1.8.0b1"
|
||||
|
||||
version = "1.8.1b1"
|
||||
|
||||
|
||||
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
|
||||
|
||||
@ -78,7 +78,11 @@ models_tokens = {
|
||||
"claude2": 9000,
|
||||
"claude2.1": 200000,
|
||||
"claude3": 200000,
|
||||
"claude3.5": 200000
|
||||
"claude3.5": 200000,
|
||||
"claude-3-opus-20240229": 200000,
|
||||
"claude-3-sonnet-20240229": 200000,
|
||||
"claude-3-haiku-20240307": 200000,
|
||||
"claude-3-5-sonnet-20240620": 200000
|
||||
},
|
||||
"vertexai": {
|
||||
"gemini-1.5-flash": 128000,
|
||||
|
||||
@ -84,7 +84,7 @@ class SearchInternetNode(BaseNode):
|
||||
You should return only the query string without any additional sentences. \n
|
||||
For example, if the user prompt is "What is the capital of France?",
|
||||
you should return "capital of France". \n
|
||||
If yuo return something else, you will get a really bad grade. \n
|
||||
If you return something else, you will get a really bad grade. \n
|
||||
USER PROMPT: {user_prompt}"""
|
||||
|
||||
search_prompt = PromptTemplate(
|
||||
|
||||
@ -101,18 +101,3 @@ def parse_expression(expression, state: dict) -> list:
|
||||
final_result.append(key)
|
||||
|
||||
return final_result
|
||||
|
||||
|
||||
EXPRESSION = "user_input & (relevant_chunks | parsed_document | document)"
|
||||
state = {
|
||||
"user_input": None,
|
||||
"document": None,
|
||||
"parsed_document": None,
|
||||
"relevant_chunks": None,
|
||||
}
|
||||
|
||||
try:
|
||||
result = parse_expression(EXPRESSION, state)
|
||||
print("Matched keys:", result)
|
||||
except ValueError as e:
|
||||
print("Error:", e)
|
||||
|
||||
21
tests/utils/parse_state_keys_test.py
Normal file
21
tests/utils/parse_state_keys_test.py
Normal file
@ -0,0 +1,21 @@
|
||||
"""
|
||||
Parse_state_key test module
|
||||
"""
|
||||
import pytest
|
||||
from scrapegraphai.utils.parse_state_keys import parse_expression
|
||||
|
||||
|
||||
def test_parse_expression():
|
||||
"""Test parse_expression function."""
|
||||
EXPRESSION = "user_input & (relevant_chunks | parsed_document | document)"
|
||||
state = {
|
||||
"user_input": None,
|
||||
"document": None,
|
||||
"parsed_document": None,
|
||||
"relevant_chunks": None,
|
||||
}
|
||||
try:
|
||||
result = parse_expression(EXPRESSION, state)
|
||||
assert result != []
|
||||
except ValueError as e:
|
||||
assert "Error" in str(e)
|
||||
Loading…
Reference in New Issue
Block a user