mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
fix(kg): removed unused nodes and utils
This commit is contained in:
parent
edf221dcd9
commit
5684578fab
@ -1,704 +0,0 @@
|
||||
{
|
||||
"Job Postings":{
|
||||
"Netflix":[
|
||||
{
|
||||
"title":"Machine Learning Engineer (L4) - Infrastructure Algorithms and ML",
|
||||
"description":"NA",
|
||||
"location":"Los Gatos, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer L4, Algorithms Engineering",
|
||||
"description":"NA",
|
||||
"location":"Los Gatos, CA",
|
||||
"date_posted":"18 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Rose AI":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Team Remotely Inc":[
|
||||
{
|
||||
"title":"Junior Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Wilmington, DE",
|
||||
"date_posted":"14 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Zuma":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"San Francisco Bay Area",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Tinder":[
|
||||
{
|
||||
"title":"Data Scientist I",
|
||||
"description":"NA",
|
||||
"location":"West Hollywood, CA",
|
||||
"date_posted":"23 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Moveworks":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern - NLU & ML Infra",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Cognitiv":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"Berkeley, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"DoorDash":[
|
||||
{
|
||||
"title":"Machine Learning Engineer, Forecast Platform",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer, Forecast Platform",
|
||||
"description":"NA",
|
||||
"location":"Sunnyvale, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer - New Verticals",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"PipeIQ":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern (NLP)",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Fractal":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"California, United States",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Accroid Inc":[
|
||||
{
|
||||
"title":"Machine Learning Engineer/Python",
|
||||
"description":"NA",
|
||||
"location":"Austin, TX",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Notion":[
|
||||
{
|
||||
"title":"Software Engineer, Machine Learning",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Machine Learning",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"PhysicsX":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, United States",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"HireIO, Inc.":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Dexian Inc":[
|
||||
{
|
||||
"title":"Junior Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Columbia, MD",
|
||||
"date_posted":"4 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Google":[
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"Sunnyvale, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Customer Engineering, AI/ML (English, Italian)",
|
||||
"description":"Candidates will typically have 6 years of experience as a technical sales engineer in a cloud computing environment.",
|
||||
"location":"Milano, Lombardia",
|
||||
"date_posted":"15 giorni fa",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Unreal Staffing, Inc":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Reveal HealthTech":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Boston, MA",
|
||||
"date_posted":"3 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Replicate":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Truveta":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Greater Seattle Area",
|
||||
"date_posted":"3 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Atlassian":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Continua AI, Inc.":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Seattle, WA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Software Technology Inc.":[
|
||||
{
|
||||
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Neptune Technologies LLC":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 day ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Zoom":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Jose, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"California, United States",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"HP":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Enterprise Minds, Inc":[
|
||||
{
|
||||
"title":"Machine Learning Software Engineer",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Celonis":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Lockheed Martin":[
|
||||
{
|
||||
"title":"A/AI Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Littleton, CO",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Two Dots":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Los Angeles, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Verneek":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Rivian":[
|
||||
{
|
||||
"title":"Machine Learning Software Engineer",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Impax Recruitment":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Stripe":[
|
||||
{
|
||||
"title":"Machine Learning Engineer, Risk",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Adobe":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Jose, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Javelin":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York City Metropolitan Area",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Ultralytics":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Supernormal":[
|
||||
{
|
||||
"title":"Machine Learning Engineer (with a focus on modeling)",
|
||||
"description":"NA",
|
||||
"location":"Seattle, WA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Samsung Electronics America":[
|
||||
{
|
||||
"title":"Machine Learning Engineer – Data Science",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Skale":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Steneral Consulting":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Movable Ink":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"LHH":[
|
||||
{
|
||||
"title":"DevOps Engineer",
|
||||
"description":"Per azienda cliente Fit2you, siamo alla ricerca di un DevOps Engineer presso la sede di Milano che possa operare all'intersezione di Fit2you Broker e Air, guidando l'innovazione tecnologica e l'efficienza operativa in entrambi i contesti. Questo ruolo unico offre l'opportunità di influenzare significativamente due diversi, ma complementari, settori dell'industria automotive, dal brokeraggio assicurativo ai big data e alle auto connesse.",
|
||||
"location":"Italy",
|
||||
"date_posted":"15d",
|
||||
"requirements":[
|
||||
"CI/CD",
|
||||
"DevOps",
|
||||
"AWS",
|
||||
"JavaScript",
|
||||
"Integrazione continua"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Deloitte":[
|
||||
{
|
||||
"title":"Experienced - Cloud Test Engineer - Cloud Native Development & Migration - NextHub Bari",
|
||||
"description":"Scopri di più sulle nostre strategie di Corporate Sustainability, tra cui Well-being, la strategia volta a migliorare il benessere fisico, mentale e sociale.",
|
||||
"location":"Bari",
|
||||
"date_posted":"14d",
|
||||
"requirements":[
|
||||
"ASP.NET",
|
||||
"Azure",
|
||||
"DevOps",
|
||||
"C#",
|
||||
"Automazione dei test"
|
||||
]
|
||||
}
|
||||
],
|
||||
"MACMARK":[
|
||||
{
|
||||
"title":"MID/SENIOR BACKEND DEVELOPER IN PRESENZA",
|
||||
"description":"Sarà possibile solo lavorare in presenza, pertanto sei disponibile a lavorare nella sede di Rende (CS)? Buona propensione nel lavorare in Team.",
|
||||
"location":"Rende",
|
||||
"date_posted":"7d",
|
||||
"requirements":[
|
||||
"Infrastrutture cloud",
|
||||
"Azure",
|
||||
"CSS",
|
||||
"Git",
|
||||
"Google Cloud Platform"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"MID/SENIOR FRONTEND DEVELOPER IN PRESENZA",
|
||||
"description":"Buona propensione nel lavorare in Team. O Laura in informativa ed almeno 1/2 anni di esperienza in un contesto di sviluppo software.",
|
||||
"location":"Rende",
|
||||
"date_posted":"7d",
|
||||
"requirements":[
|
||||
"Infrastrutture cloud",
|
||||
"CSS",
|
||||
"React",
|
||||
"Git",
|
||||
"Google Cloud Platform"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Assist Digital Spa":[
|
||||
{
|
||||
"title":"System & Networking Engineer",
|
||||
"description":"Eu. Il Trattamento è realizzato, con il suo consenso, per realizzare processi di ricerca, selezione e valutazione del personale svolti per conto proprio, per.",
|
||||
"location":"Roma",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Inglese",
|
||||
"Windows",
|
||||
"Sistemi di sicurezza",
|
||||
"AWS",
|
||||
"Virtualizzazione"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Prompt Engineer",
|
||||
"description":"You, as data subject of the processing of personal data, may exercise at any time the rights expressly granted by the European Regulation, and in particular.",
|
||||
"location":"Roma",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Strutture dati",
|
||||
"Inglese",
|
||||
"Google Cloud Platform",
|
||||
"AWS",
|
||||
"C"
|
||||
]
|
||||
}
|
||||
],
|
||||
"TOOLS FOR SMART MINDS S.r.l.":[
|
||||
{
|
||||
"title":"Sviluppatore software",
|
||||
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura.",
|
||||
"location":"Castel Mella",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Inglese",
|
||||
"Machine learning",
|
||||
"Intelligenza artificiale"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Sviluppatore software - linguaggio OWL e SPARQL",
|
||||
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -1,134 +0,0 @@
|
||||
"""
|
||||
Example of custom graph for creating a knowledge graph
|
||||
"""
|
||||
|
||||
import os, json
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from scrapegraphai.models import OpenAI
|
||||
from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
|
||||
from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema
|
||||
# ************************************************
|
||||
|
||||
schema= """{
|
||||
"Job Postings": {
|
||||
"Company x": [
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
"location": "...",
|
||||
"date_posted": "..",
|
||||
"requirements": ["...", "...", "..."]
|
||||
},
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
"location": "...",
|
||||
"date_posted": "..",
|
||||
"requirements": ["...", "...", "..."]
|
||||
}
|
||||
],
|
||||
"Company y": [
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
"location": "...",
|
||||
"date_posted": "..",
|
||||
"requirements": ["...", "...", "..."]
|
||||
}
|
||||
]
|
||||
}
|
||||
}"""
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = OpenAI(graph_config["llm"])
|
||||
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||
|
||||
smart_scraper_instance = SmartScraperGraph(
|
||||
prompt="",
|
||||
source="",
|
||||
config=graph_config,
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
graph_iterator_node = GraphIteratorNode(
|
||||
input="user_prompt & urls",
|
||||
output=["results"],
|
||||
node_config={
|
||||
"graph_instance": smart_scraper_instance,
|
||||
}
|
||||
)
|
||||
|
||||
merge_answers_node = MergeAnswersNode(
|
||||
input="user_prompt & results",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"schema": schema
|
||||
}
|
||||
)
|
||||
|
||||
knowledge_graph_node = KnowledgeGraphNode(
|
||||
input="user_prompt & answer",
|
||||
output=["kg"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
}
|
||||
)
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
graph_iterator_node,
|
||||
merge_answers_node,
|
||||
knowledge_graph_node
|
||||
],
|
||||
edges=[
|
||||
(graph_iterator_node, merge_answers_node),
|
||||
(merge_answers_node, knowledge_graph_node)
|
||||
],
|
||||
entry_point=graph_iterator_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "List me all the Machine Learning Engineer job postings",
|
||||
"urls": [
|
||||
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
|
||||
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
|
||||
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
|
||||
],
|
||||
})
|
||||
|
||||
# get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(json.dumps(result, indent=4))
|
||||
@ -1,44 +0,0 @@
|
||||
import os, json
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.utils import create_graph, create_interactive_graph_retrieval
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Load the OpenAI API key and the embeddings model
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
|
||||
|
||||
# Paths
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
|
||||
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
|
||||
retrieval_graph_output_path = os.path.join(curr_dir, 'output', 'job_postings_retrieval.html')
|
||||
|
||||
# Load the job postings JSON file
|
||||
with open(json_file_path, 'r') as f:
|
||||
job_postings = json.load(f)
|
||||
|
||||
# Load the vector store
|
||||
db = FAISS.load_local(
|
||||
vector_store_output_path,
|
||||
embeddings_model,
|
||||
allow_dangerous_deserialization=True
|
||||
)
|
||||
|
||||
# User prompt for similarity search
|
||||
user_prompt = "Company based United States with job title Software Engineer"
|
||||
|
||||
# Similarity search on the vector store
|
||||
result = db.similarity_search_with_score(user_prompt, fetch_k=10)
|
||||
|
||||
found_companies = []
|
||||
for res in result:
|
||||
found_companies.append(res[0].page_content)
|
||||
|
||||
# Build the graph
|
||||
graph = create_graph(job_postings)
|
||||
|
||||
# Create the interactive graph
|
||||
create_interactive_graph_retrieval(graph, found_companies, output_file=retrieval_graph_output_path)
|
||||
Binary file not shown.
Binary file not shown.
@ -1,41 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Load the OpenAI API key and the embeddings model
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
|
||||
|
||||
# Paths
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
|
||||
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
|
||||
|
||||
# Load the job postings JSON file
|
||||
with open(json_file_path, 'r') as f:
|
||||
job_postings = json.load(f)
|
||||
|
||||
texts = []
|
||||
metadata = []
|
||||
|
||||
# Extract company names and job details
|
||||
for company, jobs in job_postings["Job Postings"].items():
|
||||
for job in jobs:
|
||||
texts.append(company)
|
||||
metadata.append({
|
||||
"title": job.get("title", "N/A"),
|
||||
"description": job.get("description", "N/A"),
|
||||
"location": job.get("location", "N/A"),
|
||||
"date_posted": job.get("date_posted", "N/A"),
|
||||
"requirements": job.get("requirements", [])
|
||||
})
|
||||
|
||||
# Create the vector store
|
||||
db = FAISS.from_texts(texts=texts, embedding=embeddings_model, metadatas=metadata)
|
||||
|
||||
# Save the embeddings locally
|
||||
db.save_local(vector_store_output_path)
|
||||
@ -11,8 +11,7 @@ from .smart_scraper_graph import SmartScraperGraph
|
||||
|
||||
from ..nodes import (
|
||||
GraphIteratorNode,
|
||||
MergeAnswersNode,
|
||||
KnowledgeGraphNode
|
||||
MergeAnswersNode
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -19,5 +19,4 @@ from .generate_answer_csv_node import GenerateAnswerCSVNode
|
||||
from .generate_answer_pdf_node import GenerateAnswerPDFNode
|
||||
from .graph_iterator_node import GraphIteratorNode
|
||||
from .merge_answers_node import MergeAnswersNode
|
||||
from .generate_answer_omni_node import GenerateAnswerOmniNode
|
||||
from .knowledge_graph_node import KnowledgeGraphNode
|
||||
from .generate_answer_omni_node import GenerateAnswerOmniNode
|
||||
@ -1,67 +0,0 @@
|
||||
"""
|
||||
BlocksIndentifier Module
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
class BlocksIndentifier(BaseNode):
|
||||
"""
|
||||
A node responsible to identify the blocks in the HTML content of a specified HTML content
|
||||
e.g products in a E-commerce, flights in a travel website etc.
|
||||
|
||||
Attributes:
|
||||
headless (bool): A flag indicating whether the browser should run in headless mode.
|
||||
verbose (bool): A flag indicating whether to print verbose output during execution.
|
||||
|
||||
Args:
|
||||
input (str): Boolean expression defining the input keys needed from the state.
|
||||
output (List[str]): List of output keys to be updated in the state.
|
||||
node_config (Optional[dict]): Additional configuration for the node.
|
||||
node_name (str): The unique identifier name for the node, defaulting to "BlocksIndentifier".
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input: str,
|
||||
output: List[str],
|
||||
node_config: Optional[dict],
|
||||
node_name: str = "BlocksIndentifier",
|
||||
):
|
||||
super().__init__(node_name, "node", input, output, 1)
|
||||
|
||||
self.headless = (
|
||||
True if node_config is None else node_config.get("headless", True)
|
||||
)
|
||||
self.verbose = (
|
||||
True if node_config is None else node_config.get("verbose", False)
|
||||
)
|
||||
|
||||
def execute(self, state):
|
||||
"""
|
||||
Executes the node's logic, caracterized by a pre-processing of the HTML content and
|
||||
subsequent identification of the blocks in the HTML content.
|
||||
|
||||
Args:
|
||||
state (dict): The current state of the graph. The input keys will be used
|
||||
to fetch the correct data types from the state.
|
||||
|
||||
Returns:
|
||||
dict: The updated state with a new output key containing the fetched HTML content.
|
||||
|
||||
Raises:
|
||||
KeyError: If the input key is not found in the state, indicating that the
|
||||
necessary information to perform the operation is missing.
|
||||
"""
|
||||
self.logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
# Fetching data from the state based on the input keys
|
||||
input_data = [state[key] for key in input_keys]
|
||||
@ -1,101 +0,0 @@
|
||||
"""
|
||||
KnowledgeGraphNode Module
|
||||
"""
|
||||
|
||||
# Imports from standard library
|
||||
from typing import List, Optional
|
||||
from tqdm import tqdm
|
||||
|
||||
# Imports from Langchain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
from ..utils import create_graph, create_interactive_graph
|
||||
|
||||
|
||||
class KnowledgeGraphNode(BaseNode):
|
||||
"""
|
||||
A node responsible for generating a knowledge graph from a dictionary.
|
||||
|
||||
Attributes:
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||
|
||||
Args:
|
||||
input (str): Boolean expression defining the input keys needed from the state.
|
||||
output (List[str]): List of output keys to be updated in the state.
|
||||
node_config (dict): Additional configuration for the node.
|
||||
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
|
||||
"""
|
||||
|
||||
def __init__(self, input: str, output: List[str], node_config: Optional[dict] = None,
|
||||
node_name: str = "KnowledgeGraph"):
|
||||
super().__init__(node_name, "node", input, output, 2, node_config)
|
||||
|
||||
self.llm_model = node_config["llm_model"]
|
||||
self.verbose = False if node_config is None else node_config.get(
|
||||
"verbose", False)
|
||||
|
||||
def execute(self, state: dict) -> dict:
|
||||
"""
|
||||
Executes the node's logic to create a knowledge graph from a dictionary.
|
||||
|
||||
Args:
|
||||
state (dict): The current state of the graph. The input keys will be used
|
||||
to fetch the correct data from the state.
|
||||
|
||||
Returns:
|
||||
dict: The updated state with the output key containing the generated answer.
|
||||
|
||||
Raises:
|
||||
KeyError: If the input keys are not found in the state, indicating
|
||||
that the necessary information for generating an answer is missing.
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
# Fetching data from the state based on the input keys
|
||||
input_data = [state[key] for key in input_keys]
|
||||
|
||||
user_prompt = input_data[0]
|
||||
answer_dict = input_data[1]
|
||||
|
||||
# Build the graph
|
||||
graph = create_graph(answer_dict)
|
||||
# Create the interactive graph
|
||||
create_interactive_graph(graph, output_file='knowledge_graph.html')
|
||||
|
||||
# output_parser = JsonOutputParser()
|
||||
# format_instructions = output_parser.get_format_instructions()
|
||||
|
||||
# template_merge = """
|
||||
# You are a website scraper and you have just scraped some content from multiple websites.\n
|
||||
# You are now asked to provide an answer to a USER PROMPT based on the content you have scraped.\n
|
||||
# You need to merge the content from the different websites into a single answer without repetitions (if there are any). \n
|
||||
# The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n
|
||||
# OUTPUT INSTRUCTIONS: {format_instructions}\n
|
||||
# USER PROMPT: {user_prompt}\n
|
||||
# WEBSITE CONTENT: {website_content}
|
||||
# """
|
||||
|
||||
# prompt_template = PromptTemplate(
|
||||
# template=template_merge,
|
||||
# input_variables=["user_prompt"],
|
||||
# partial_variables={
|
||||
# "format_instructions": format_instructions,
|
||||
# "website_content": answers_str,
|
||||
# },
|
||||
# )
|
||||
|
||||
# merge_chain = prompt_template | self.llm_model | output_parser
|
||||
# answer = merge_chain.invoke({"user_prompt": user_prompt})
|
||||
|
||||
# Update the state with the generated answer
|
||||
state.update({self.output[0]: graph})
|
||||
return state
|
||||
@ -9,5 +9,4 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
|
||||
from .save_audio_from_bytes import save_audio_from_bytes
|
||||
from .sys_dynamic_import import dynamic_import, srcfile_import
|
||||
from .cleanup_html import cleanup_html
|
||||
from .knowledge_graph import create_graph, create_interactive_graph, create_interactive_graph_retrieval
|
||||
from .logging import *
|
||||
|
||||
@ -1,162 +0,0 @@
|
||||
import networkx as nx
|
||||
from pyvis.network import Network
|
||||
import webbrowser
|
||||
import os
|
||||
|
||||
# Create and visualize graph
|
||||
def create_graph(job_postings):
|
||||
graph = nx.DiGraph()
|
||||
|
||||
# Add the main "Job Postings" node
|
||||
graph.add_node("Job Postings")
|
||||
|
||||
for company, jobs in job_postings["Job Postings"].items():
|
||||
# Add company node
|
||||
graph.add_node(company)
|
||||
graph.add_edge("Job Postings", company)
|
||||
|
||||
# Add job nodes and their details
|
||||
for idx, job in enumerate(jobs, start=1):
|
||||
job_id = f"{company}-Job{idx}"
|
||||
graph.add_node(job_id)
|
||||
graph.add_edge(company, job_id)
|
||||
|
||||
for key, value in job.items():
|
||||
if isinstance(value, list):
|
||||
list_node_id = f"{job_id}-{key}"
|
||||
graph.add_node(list_node_id, label=key)
|
||||
graph.add_edge(job_id, list_node_id)
|
||||
for item in value:
|
||||
detail_id = f"{list_node_id}-{item}"
|
||||
graph.add_node(detail_id, label=item, title=item)
|
||||
graph.add_edge(list_node_id, detail_id)
|
||||
else:
|
||||
detail_id = f"{job_id}-{key}"
|
||||
graph.add_node(detail_id, label=key, title=f"{key}: {value}")
|
||||
graph.add_edge(job_id, detail_id)
|
||||
|
||||
return graph
|
||||
|
||||
# Add customizations to the network
|
||||
def add_customizations(net, graph):
|
||||
node_colors = {}
|
||||
node_sizes = {}
|
||||
|
||||
# Custom colors and sizes for nodes
|
||||
node_colors["Job Postings"] = '#8470FF'
|
||||
node_sizes["Job Postings"] = 50
|
||||
|
||||
for node in graph.nodes:
|
||||
if node in node_colors:
|
||||
continue
|
||||
if '-' not in node: # Company nodes
|
||||
node_colors[node] = '#3CB371'
|
||||
node_sizes[node] = 30
|
||||
elif '-' in node and node.count('-') == 1: # Job nodes
|
||||
node_colors[node] = '#FFA07A'
|
||||
node_sizes[node] = 20
|
||||
else: # Job detail nodes
|
||||
node_colors[node] = '#B0C4DE'
|
||||
node_sizes[node] = 10
|
||||
|
||||
# Add nodes and edges to the network with customized styles
|
||||
for node in graph.nodes:
|
||||
net.add_node(node,
|
||||
label=graph.nodes[node].get('label', node.split('-')[-1]),
|
||||
color=node_colors.get(node, 'lightgray'),
|
||||
size=node_sizes.get(node, 15),
|
||||
title=graph.nodes[node].get('title', ''))
|
||||
for edge in graph.edges:
|
||||
net.add_edge(edge[0], edge[1])
|
||||
return net
|
||||
|
||||
# Add customizations to the network
|
||||
def add_customizations_retrieval(net, graph, found_companies):
|
||||
node_colors = {}
|
||||
node_sizes = {}
|
||||
edge_colors = {}
|
||||
|
||||
# Custom colors and sizes for nodes
|
||||
node_colors["Job Postings"] = '#8470FF'
|
||||
node_sizes["Job Postings"] = 50
|
||||
|
||||
# Nodes and edges to highlight in red
|
||||
highlighted_nodes = set(found_companies)
|
||||
highlighted_edges = set()
|
||||
|
||||
# Highlight found companies and their paths to the root
|
||||
for company in found_companies:
|
||||
node_colors[company] = 'red'
|
||||
node_sizes[company] = 30
|
||||
|
||||
# Highlight the path to the root
|
||||
node = company
|
||||
while node != "Job Postings":
|
||||
predecessors = list(graph.predecessors(node))
|
||||
if not predecessors:
|
||||
break
|
||||
predecessor = predecessors[0]
|
||||
highlighted_nodes.add(predecessor)
|
||||
node_colors[predecessor] = 'red'
|
||||
node_sizes[predecessor] = 30
|
||||
highlighted_edges.add((predecessor, node))
|
||||
node = predecessor
|
||||
|
||||
# Highlight job nodes and edges
|
||||
for idx in range(1, graph.out_degree(company) + 1):
|
||||
job_node = f"{company}-Job{idx}"
|
||||
if job_node in graph.nodes:
|
||||
highlighted_nodes.add(job_node)
|
||||
node_colors[job_node] = 'red'
|
||||
node_sizes[job_node] = 20
|
||||
highlighted_edges.add((company, job_node))
|
||||
|
||||
# Highlight job detail nodes
|
||||
for successor in graph.successors(job_node):
|
||||
if successor not in highlighted_nodes:
|
||||
node_colors[successor] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
node_sizes[successor] = 10
|
||||
highlighted_edges.add((job_node, successor))
|
||||
|
||||
# Set almost transparent color for non-highlighted nodes and edges
|
||||
for node in graph.nodes:
|
||||
if node not in node_colors:
|
||||
node_colors[node] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
node_sizes[node] = 10 if '-' in node else 15
|
||||
|
||||
for edge in graph.edges:
|
||||
if edge not in highlighted_edges:
|
||||
edge_colors[edge] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
|
||||
# Add nodes and edges to the network with customized styles
|
||||
for node in graph.nodes:
|
||||
net.add_node(node,
|
||||
label=graph.nodes[node].get('label', node.split('-')[-1]),
|
||||
color=node_colors.get(node, 'lightgray'),
|
||||
size=node_sizes.get(node, 15),
|
||||
title=graph.nodes[node].get('title', ''))
|
||||
for edge in graph.edges:
|
||||
if edge in highlighted_edges:
|
||||
net.add_edge(edge[0], edge[1], color='red')
|
||||
else:
|
||||
net.add_edge(edge[0], edge[1], color=edge_colors.get(edge, 'lightgray'))
|
||||
|
||||
return net
|
||||
|
||||
# Create interactive graph
|
||||
def create_interactive_graph(graph, output_file='interactive_graph.html'):
|
||||
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
|
||||
net = add_customizations(net, graph)
|
||||
net.save_graph(output_file)
|
||||
|
||||
# Automatically open the generated HTML file in the default web browser
|
||||
webbrowser.open(f"file://{os.path.realpath(output_file)}")
|
||||
|
||||
# Create interactive graph
|
||||
def create_interactive_graph_retrieval(graph, found_companies, output_file='interactive_graph.html'):
|
||||
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
|
||||
net = add_customizations_retrieval(net, graph, found_companies)
|
||||
net.save_graph(output_file)
|
||||
|
||||
# Automatically open the generated HTML file in the default web browser
|
||||
webbrowser.open(f"file://{os.path.realpath(output_file)}")
|
||||
Loading…
Reference in New Issue
Block a user