mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
feat(kg): working rag kg
This commit is contained in:
parent
58cc903d55
commit
c75e6a06b1
704
examples/knowledge_graph/input/job_postings.json
Normal file
704
examples/knowledge_graph/input/job_postings.json
Normal file
@ -0,0 +1,704 @@
|
||||
{
|
||||
"Job Postings":{
|
||||
"Netflix":[
|
||||
{
|
||||
"title":"Machine Learning Engineer (L4) - Infrastructure Algorithms and ML",
|
||||
"description":"NA",
|
||||
"location":"Los Gatos, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer L4, Algorithms Engineering",
|
||||
"description":"NA",
|
||||
"location":"Los Gatos, CA",
|
||||
"date_posted":"18 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Rose AI":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Team Remotely Inc":[
|
||||
{
|
||||
"title":"Junior Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Wilmington, DE",
|
||||
"date_posted":"14 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Zuma":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"San Francisco Bay Area",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Tinder":[
|
||||
{
|
||||
"title":"Data Scientist I",
|
||||
"description":"NA",
|
||||
"location":"West Hollywood, CA",
|
||||
"date_posted":"23 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Moveworks":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern - NLU & ML Infra",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Cognitiv":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"Berkeley, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"DoorDash":[
|
||||
{
|
||||
"title":"Machine Learning Engineer, Forecast Platform",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer, Forecast Platform",
|
||||
"description":"NA",
|
||||
"location":"Sunnyvale, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer - New Verticals",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"PipeIQ":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern (NLP)",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Fractal":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"California, United States",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Accroid Inc":[
|
||||
{
|
||||
"title":"Machine Learning Engineer/Python",
|
||||
"description":"NA",
|
||||
"location":"Austin, TX",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Notion":[
|
||||
{
|
||||
"title":"Software Engineer, Machine Learning",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Machine Learning",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"PhysicsX":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, United States",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"HireIO, Inc.":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Dexian Inc":[
|
||||
{
|
||||
"title":"Junior Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Columbia, MD",
|
||||
"date_posted":"4 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Google":[
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Software Engineer, Early Career",
|
||||
"description":"NA",
|
||||
"location":"Sunnyvale, CA",
|
||||
"date_posted":"11 hours ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Customer Engineering, AI/ML (English, Italian)",
|
||||
"description":"Candidates will typically have 6 years of experience as a technical sales engineer in a cloud computing environment.",
|
||||
"location":"Milano, Lombardia",
|
||||
"date_posted":"15 giorni fa",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Unreal Staffing, Inc":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Reveal HealthTech":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Boston, MA",
|
||||
"date_posted":"3 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Replicate":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Truveta":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Greater Seattle Area",
|
||||
"date_posted":"3 days ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Atlassian":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Continua AI, Inc.":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Seattle, WA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Software Technology Inc.":[
|
||||
{
|
||||
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Neptune Technologies LLC":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 day ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Zoom":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Jose, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"California, United States",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"HP":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Enterprise Minds, Inc":[
|
||||
{
|
||||
"title":"Machine Learning Software Engineer",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Celonis":[
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Machine Learning Engineer Intern",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Lockheed Martin":[
|
||||
{
|
||||
"title":"A/AI Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Littleton, CO",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Two Dots":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"Los Angeles, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Verneek":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Rivian":[
|
||||
{
|
||||
"title":"Machine Learning Software Engineer",
|
||||
"description":"NA",
|
||||
"location":"Palo Alto, CA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Impax Recruitment":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Stripe":[
|
||||
{
|
||||
"title":"Machine Learning Engineer, Risk",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"3 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Adobe":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Jose, CA",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Javelin":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York City Metropolitan Area",
|
||||
"date_posted":"1 week ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Ultralytics":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"New York, NY",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Supernormal":[
|
||||
{
|
||||
"title":"Machine Learning Engineer (with a focus on modeling)",
|
||||
"description":"NA",
|
||||
"location":"Seattle, WA",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Samsung Electronics America":[
|
||||
{
|
||||
"title":"Machine Learning Engineer – Data Science",
|
||||
"description":"NA",
|
||||
"location":"Mountain View, CA",
|
||||
"date_posted":"4 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Skale":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"San Francisco, CA",
|
||||
"date_posted":"2 weeks ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Steneral Consulting":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"1 month ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Movable Ink":[
|
||||
{
|
||||
"title":"Machine Learning Engineer",
|
||||
"description":"NA",
|
||||
"location":"United States",
|
||||
"date_posted":"2 months ago",
|
||||
"requirements":[
|
||||
"NA"
|
||||
]
|
||||
}
|
||||
],
|
||||
"LHH":[
|
||||
{
|
||||
"title":"DevOps Engineer",
|
||||
"description":"Per azienda cliente Fit2you, siamo alla ricerca di un DevOps Engineer presso la sede di Milano che possa operare all'intersezione di Fit2you Broker e Air, guidando l'innovazione tecnologica e l'efficienza operativa in entrambi i contesti. Questo ruolo unico offre l'opportunità di influenzare significativamente due diversi, ma complementari, settori dell'industria automotive, dal brokeraggio assicurativo ai big data e alle auto connesse.",
|
||||
"location":"Italy",
|
||||
"date_posted":"15d",
|
||||
"requirements":[
|
||||
"CI/CD",
|
||||
"DevOps",
|
||||
"AWS",
|
||||
"JavaScript",
|
||||
"Integrazione continua"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Deloitte":[
|
||||
{
|
||||
"title":"Experienced - Cloud Test Engineer - Cloud Native Development & Migration - NextHub Bari",
|
||||
"description":"Scopri di più sulle nostre strategie di Corporate Sustainability, tra cui Well-being, la strategia volta a migliorare il benessere fisico, mentale e sociale.",
|
||||
"location":"Bari",
|
||||
"date_posted":"14d",
|
||||
"requirements":[
|
||||
"ASP.NET",
|
||||
"Azure",
|
||||
"DevOps",
|
||||
"C#",
|
||||
"Automazione dei test"
|
||||
]
|
||||
}
|
||||
],
|
||||
"MACMARK":[
|
||||
{
|
||||
"title":"MID/SENIOR BACKEND DEVELOPER IN PRESENZA",
|
||||
"description":"Sarà possibile solo lavorare in presenza, pertanto sei disponibile a lavorare nella sede di Rende (CS)? Buona propensione nel lavorare in Team.",
|
||||
"location":"Rende",
|
||||
"date_posted":"7d",
|
||||
"requirements":[
|
||||
"Infrastrutture cloud",
|
||||
"Azure",
|
||||
"CSS",
|
||||
"Git",
|
||||
"Google Cloud Platform"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"MID/SENIOR FRONTEND DEVELOPER IN PRESENZA",
|
||||
"description":"Buona propensione nel lavorare in Team. O Laura in informativa ed almeno 1/2 anni di esperienza in un contesto di sviluppo software.",
|
||||
"location":"Rende",
|
||||
"date_posted":"7d",
|
||||
"requirements":[
|
||||
"Infrastrutture cloud",
|
||||
"CSS",
|
||||
"React",
|
||||
"Git",
|
||||
"Google Cloud Platform"
|
||||
]
|
||||
}
|
||||
],
|
||||
"Assist Digital Spa":[
|
||||
{
|
||||
"title":"System & Networking Engineer",
|
||||
"description":"Eu. Il Trattamento è realizzato, con il suo consenso, per realizzare processi di ricerca, selezione e valutazione del personale svolti per conto proprio, per.",
|
||||
"location":"Roma",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Inglese",
|
||||
"Windows",
|
||||
"Sistemi di sicurezza",
|
||||
"AWS",
|
||||
"Virtualizzazione"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Prompt Engineer",
|
||||
"description":"You, as data subject of the processing of personal data, may exercise at any time the rights expressly granted by the European Regulation, and in particular.",
|
||||
"location":"Roma",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Strutture dati",
|
||||
"Inglese",
|
||||
"Google Cloud Platform",
|
||||
"AWS",
|
||||
"C"
|
||||
]
|
||||
}
|
||||
],
|
||||
"TOOLS FOR SMART MINDS S.r.l.":[
|
||||
{
|
||||
"title":"Sviluppatore software",
|
||||
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura.",
|
||||
"location":"Castel Mella",
|
||||
"date_posted":"30d+",
|
||||
"requirements":[
|
||||
"Inglese",
|
||||
"Machine learning",
|
||||
"Intelligenza artificiale"
|
||||
]
|
||||
},
|
||||
{
|
||||
"title":"Sviluppatore software - linguaggio OWL e SPARQL",
|
||||
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
44
examples/knowledge_graph/load_vector.py
Normal file
44
examples/knowledge_graph/load_vector.py
Normal file
@ -0,0 +1,44 @@
|
||||
import os, json
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.utils import create_graph, create_interactive_graph_retrieval
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Load the OpenAI API key and the embeddings model
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
|
||||
|
||||
# Paths
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
|
||||
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
|
||||
retrieval_graph_output_path = os.path.join(curr_dir, 'output', 'job_postings_retrieval.html')
|
||||
|
||||
# Load the job postings JSON file
|
||||
with open(json_file_path, 'r') as f:
|
||||
job_postings = json.load(f)
|
||||
|
||||
# Load the vector store
|
||||
db = FAISS.load_local(
|
||||
vector_store_output_path,
|
||||
embeddings_model,
|
||||
allow_dangerous_deserialization=True
|
||||
)
|
||||
|
||||
# User prompt for similarity search
|
||||
user_prompt = "Company based United States with job title Software Engineer"
|
||||
|
||||
# Similarity search on the vector store
|
||||
result = db.similarity_search_with_score(user_prompt, fetch_k=10)
|
||||
|
||||
found_companies = []
|
||||
for res in result:
|
||||
found_companies.append(res[0].page_content)
|
||||
|
||||
# Build the graph
|
||||
graph = create_graph(job_postings)
|
||||
|
||||
# Create the interactive graph
|
||||
create_interactive_graph_retrieval(graph, found_companies, output_file=retrieval_graph_output_path)
|
||||
BIN
examples/knowledge_graph/output/faiss_index/index.faiss
Normal file
BIN
examples/knowledge_graph/output/faiss_index/index.faiss
Normal file
Binary file not shown.
BIN
examples/knowledge_graph/output/faiss_index/index.pkl
Normal file
BIN
examples/knowledge_graph/output/faiss_index/index.pkl
Normal file
Binary file not shown.
41
examples/knowledge_graph/save_vector.py
Normal file
41
examples/knowledge_graph/save_vector.py
Normal file
@ -0,0 +1,41 @@
|
||||
import json
|
||||
import os
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Load the OpenAI API key and the embeddings model
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
|
||||
|
||||
# Paths
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
|
||||
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
|
||||
|
||||
# Load the job postings JSON file
|
||||
with open(json_file_path, 'r') as f:
|
||||
job_postings = json.load(f)
|
||||
|
||||
texts = []
|
||||
metadata = []
|
||||
|
||||
# Extract company names and job details
|
||||
for company, jobs in job_postings["Job Postings"].items():
|
||||
for job in jobs:
|
||||
texts.append(company)
|
||||
metadata.append({
|
||||
"title": job.get("title", "N/A"),
|
||||
"description": job.get("description", "N/A"),
|
||||
"location": job.get("location", "N/A"),
|
||||
"date_posted": job.get("date_posted", "N/A"),
|
||||
"requirements": job.get("requirements", [])
|
||||
})
|
||||
|
||||
# Create the vector store
|
||||
db = FAISS.from_texts(texts=texts, embedding=embeddings_model, metadatas=metadata)
|
||||
|
||||
# Save the embeddings locally
|
||||
db.save_local(vector_store_output_path)
|
||||
@ -9,4 +9,4 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
|
||||
from .save_audio_from_bytes import save_audio_from_bytes
|
||||
from .sys_dynamic_import import dynamic_import, srcfile_import
|
||||
from .cleanup_html import cleanup_html
|
||||
from .knowledge_graph import create_graph, add_customizations, create_interactive_graph
|
||||
from .knowledge_graph import create_graph, create_interactive_graph, create_interactive_graph_retrieval
|
||||
@ -70,6 +70,79 @@ def add_customizations(net, graph):
|
||||
net.add_edge(edge[0], edge[1])
|
||||
return net
|
||||
|
||||
# Add customizations to the network
|
||||
def add_customizations_retrieval(net, graph, found_companies):
|
||||
node_colors = {}
|
||||
node_sizes = {}
|
||||
edge_colors = {}
|
||||
|
||||
# Custom colors and sizes for nodes
|
||||
node_colors["Job Postings"] = '#8470FF'
|
||||
node_sizes["Job Postings"] = 50
|
||||
|
||||
# Nodes and edges to highlight in red
|
||||
highlighted_nodes = set(found_companies)
|
||||
highlighted_edges = set()
|
||||
|
||||
# Highlight found companies and their paths to the root
|
||||
for company in found_companies:
|
||||
node_colors[company] = 'red'
|
||||
node_sizes[company] = 30
|
||||
|
||||
# Highlight the path to the root
|
||||
node = company
|
||||
while node != "Job Postings":
|
||||
predecessors = list(graph.predecessors(node))
|
||||
if not predecessors:
|
||||
break
|
||||
predecessor = predecessors[0]
|
||||
highlighted_nodes.add(predecessor)
|
||||
node_colors[predecessor] = 'red'
|
||||
node_sizes[predecessor] = 30
|
||||
highlighted_edges.add((predecessor, node))
|
||||
node = predecessor
|
||||
|
||||
# Highlight job nodes and edges
|
||||
for idx in range(1, graph.out_degree(company) + 1):
|
||||
job_node = f"{company}-Job{idx}"
|
||||
if job_node in graph.nodes:
|
||||
highlighted_nodes.add(job_node)
|
||||
node_colors[job_node] = 'red'
|
||||
node_sizes[job_node] = 20
|
||||
highlighted_edges.add((company, job_node))
|
||||
|
||||
# Highlight job detail nodes
|
||||
for successor in graph.successors(job_node):
|
||||
if successor not in highlighted_nodes:
|
||||
node_colors[successor] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
node_sizes[successor] = 10
|
||||
highlighted_edges.add((job_node, successor))
|
||||
|
||||
# Set almost transparent color for non-highlighted nodes and edges
|
||||
for node in graph.nodes:
|
||||
if node not in node_colors:
|
||||
node_colors[node] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
node_sizes[node] = 10 if '-' in node else 15
|
||||
|
||||
for edge in graph.edges:
|
||||
if edge not in highlighted_edges:
|
||||
edge_colors[edge] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
|
||||
|
||||
# Add nodes and edges to the network with customized styles
|
||||
for node in graph.nodes:
|
||||
net.add_node(node,
|
||||
label=graph.nodes[node].get('label', node.split('-')[-1]),
|
||||
color=node_colors.get(node, 'lightgray'),
|
||||
size=node_sizes.get(node, 15),
|
||||
title=graph.nodes[node].get('title', ''))
|
||||
for edge in graph.edges:
|
||||
if edge in highlighted_edges:
|
||||
net.add_edge(edge[0], edge[1], color='red')
|
||||
else:
|
||||
net.add_edge(edge[0], edge[1], color=edge_colors.get(edge, 'lightgray'))
|
||||
|
||||
return net
|
||||
|
||||
# Create interactive graph
|
||||
def create_interactive_graph(graph, output_file='interactive_graph.html'):
|
||||
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
|
||||
@ -79,3 +152,11 @@ def create_interactive_graph(graph, output_file='interactive_graph.html'):
|
||||
# Automatically open the generated HTML file in the default web browser
|
||||
webbrowser.open(f"file://{os.path.realpath(output_file)}")
|
||||
|
||||
# Create interactive graph
|
||||
def create_interactive_graph_retrieval(graph, found_companies, output_file='interactive_graph.html'):
|
||||
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
|
||||
net = add_customizations_retrieval(net, graph, found_companies)
|
||||
net.save_graph(output_file)
|
||||
|
||||
# Automatically open the generated HTML file in the default web browser
|
||||
webbrowser.open(f"file://{os.path.realpath(output_file)}")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user