feat(kg): working rag kg

This commit is contained in:
Marco Perini 2024-05-18 10:26:25 +02:00
parent 58cc903d55
commit c75e6a06b1
7 changed files with 871 additions and 1 deletions

View File

@ -0,0 +1,704 @@
{
"Job Postings":{
"Netflix":[
{
"title":"Machine Learning Engineer (L4) - Infrastructure Algorithms and ML",
"description":"NA",
"location":"Los Gatos, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer L4, Algorithms Engineering",
"description":"NA",
"location":"Los Gatos, CA",
"date_posted":"18 hours ago",
"requirements":[
"NA"
]
}
],
"Rose AI":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Team Remotely Inc":[
{
"title":"Junior Machine Learning Engineer",
"description":"NA",
"location":"Wilmington, DE",
"date_posted":"14 hours ago",
"requirements":[
"NA"
]
}
],
"Zuma":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"San Francisco Bay Area",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
}
],
"Tinder":[
{
"title":"Data Scientist I",
"description":"NA",
"location":"West Hollywood, CA",
"date_posted":"23 hours ago",
"requirements":[
"NA"
]
}
],
"Moveworks":[
{
"title":"Machine Learning Engineer Intern - NLU & ML Infra",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Cognitiv":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"Berkeley, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"DoorDash":[
{
"title":"Machine Learning Engineer, Forecast Platform",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer, Forecast Platform",
"description":"NA",
"location":"Sunnyvale, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer - New Verticals",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"PipeIQ":[
{
"title":"Machine Learning Engineer Intern (NLP)",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Fractal":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"California, United States",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Accroid Inc":[
{
"title":"Machine Learning Engineer/Python",
"description":"NA",
"location":"Austin, TX",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Notion":[
{
"title":"Software Engineer, Machine Learning",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Machine Learning",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"PhysicsX":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, United States",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"HireIO, Inc.":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Dexian Inc":[
{
"title":"Junior Machine Learning Engineer",
"description":"NA",
"location":"Columbia, MD",
"date_posted":"4 days ago",
"requirements":[
"NA"
]
}
],
"Google":[
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"New York, NY",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"Sunnyvale, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Customer Engineering, AI/ML (English, Italian)",
"description":"Candidates will typically have 6 years of experience as a technical sales engineer in a cloud computing environment.",
"location":"Milano, Lombardia",
"date_posted":"15 giorni fa",
"requirements":[
"NA"
]
}
],
"Unreal Staffing, Inc":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Reveal HealthTech":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Boston, MA",
"date_posted":"3 days ago",
"requirements":[
"NA"
]
}
],
"Replicate":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"Truveta":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Greater Seattle Area",
"date_posted":"3 days ago",
"requirements":[
"NA"
]
}
],
"Atlassian":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Continua AI, Inc.":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Seattle, WA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Software Technology Inc.":[
{
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Neptune Technologies LLC":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"1 day ago",
"requirements":[
"NA"
]
}
],
"Zoom":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Jose, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"California, United States",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"HP":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Enterprise Minds, Inc":[
{
"title":"Machine Learning Software Engineer",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Celonis":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"New York, NY",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Lockheed Martin":[
{
"title":"A/AI Machine Learning Engineer",
"description":"NA",
"location":"Littleton, CO",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Two Dots":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Los Angeles, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Verneek":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Rivian":[
{
"title":"Machine Learning Software Engineer",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Impax Recruitment":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Stripe":[
{
"title":"Machine Learning Engineer, Risk",
"description":"NA",
"location":"United States",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Adobe":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Jose, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Javelin":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York City Metropolitan Area",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Ultralytics":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Supernormal":[
{
"title":"Machine Learning Engineer (with a focus on modeling)",
"description":"NA",
"location":"Seattle, WA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Samsung Electronics America":[
{
"title":"Machine Learning Engineer Data Science",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"Skale":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Steneral Consulting":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Movable Ink":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"LHH":[
{
"title":"DevOps Engineer",
"description":"Per azienda cliente Fit2you, siamo alla ricerca di un DevOps Engineer presso la sede di Milano che possa operare all'intersezione di Fit2you Broker e Air, guidando l'innovazione tecnologica e l'efficienza operativa in entrambi i contesti. Questo ruolo unico offre l'opportunità di influenzare significativamente due diversi, ma complementari, settori dell'industria automotive, dal brokeraggio assicurativo ai big data e alle auto connesse.",
"location":"Italy",
"date_posted":"15d",
"requirements":[
"CI/CD",
"DevOps",
"AWS",
"JavaScript",
"Integrazione continua"
]
}
],
"Deloitte":[
{
"title":"Experienced - Cloud Test Engineer - Cloud Native Development & Migration - NextHub Bari",
"description":"Scopri di più sulle nostre strategie di Corporate Sustainability, tra cui Well-being, la strategia volta a migliorare il benessere fisico, mentale e sociale.",
"location":"Bari",
"date_posted":"14d",
"requirements":[
"ASP.NET",
"Azure",
"DevOps",
"C#",
"Automazione dei test"
]
}
],
"MACMARK":[
{
"title":"MID/SENIOR BACKEND DEVELOPER IN PRESENZA",
"description":"Sarà possibile solo lavorare in presenza, pertanto sei disponibile a lavorare nella sede di Rende (CS)? Buona propensione nel lavorare in Team.",
"location":"Rende",
"date_posted":"7d",
"requirements":[
"Infrastrutture cloud",
"Azure",
"CSS",
"Git",
"Google Cloud Platform"
]
},
{
"title":"MID/SENIOR FRONTEND DEVELOPER IN PRESENZA",
"description":"Buona propensione nel lavorare in Team. O Laura in informativa ed almeno 1/2 anni di esperienza in un contesto di sviluppo software.",
"location":"Rende",
"date_posted":"7d",
"requirements":[
"Infrastrutture cloud",
"CSS",
"React",
"Git",
"Google Cloud Platform"
]
}
],
"Assist Digital Spa":[
{
"title":"System & Networking Engineer",
"description":"Eu. Il Trattamento è realizzato, con il suo consenso, per realizzare processi di ricerca, selezione e valutazione del personale svolti per conto proprio, per.",
"location":"Roma",
"date_posted":"30d+",
"requirements":[
"Inglese",
"Windows",
"Sistemi di sicurezza",
"AWS",
"Virtualizzazione"
]
},
{
"title":"Prompt Engineer",
"description":"You, as data subject of the processing of personal data, may exercise at any time the rights expressly granted by the European Regulation, and in particular.",
"location":"Roma",
"date_posted":"30d+",
"requirements":[
"Strutture dati",
"Inglese",
"Google Cloud Platform",
"AWS",
"C"
]
}
],
"TOOLS FOR SMART MINDS S.r.l.":[
{
"title":"Sviluppatore software",
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura.",
"location":"Castel Mella",
"date_posted":"30d+",
"requirements":[
"Inglese",
"Machine learning",
"Intelligenza artificiale"
]
},
{
"title":"Sviluppatore software - linguaggio OWL e SPARQL",
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura."
}
]
}
}

View File

@ -0,0 +1,44 @@
import os, json
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from scrapegraphai.utils import create_graph, create_interactive_graph_retrieval
load_dotenv()
# Load the OpenAI API key and the embeddings model
openai_key = os.getenv("OPENAI_APIKEY")
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
# Paths
curr_dir = os.path.dirname(os.path.realpath(__file__))
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
retrieval_graph_output_path = os.path.join(curr_dir, 'output', 'job_postings_retrieval.html')
# Load the job postings JSON file
with open(json_file_path, 'r') as f:
job_postings = json.load(f)
# Load the vector store
db = FAISS.load_local(
vector_store_output_path,
embeddings_model,
allow_dangerous_deserialization=True
)
# User prompt for similarity search
user_prompt = "Company based United States with job title Software Engineer"
# Similarity search on the vector store
result = db.similarity_search_with_score(user_prompt, fetch_k=10)
found_companies = []
for res in result:
found_companies.append(res[0].page_content)
# Build the graph
graph = create_graph(job_postings)
# Create the interactive graph
create_interactive_graph_retrieval(graph, found_companies, output_file=retrieval_graph_output_path)

Binary file not shown.

View File

@ -0,0 +1,41 @@
import json
import os
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
# Load the OpenAI API key and the embeddings model
openai_key = os.getenv("OPENAI_APIKEY")
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
# Paths
curr_dir = os.path.dirname(os.path.realpath(__file__))
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
# Load the job postings JSON file
with open(json_file_path, 'r') as f:
job_postings = json.load(f)
texts = []
metadata = []
# Extract company names and job details
for company, jobs in job_postings["Job Postings"].items():
for job in jobs:
texts.append(company)
metadata.append({
"title": job.get("title", "N/A"),
"description": job.get("description", "N/A"),
"location": job.get("location", "N/A"),
"date_posted": job.get("date_posted", "N/A"),
"requirements": job.get("requirements", [])
})
# Create the vector store
db = FAISS.from_texts(texts=texts, embedding=embeddings_model, metadatas=metadata)
# Save the embeddings locally
db.save_local(vector_store_output_path)

View File

@ -9,4 +9,4 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
from .save_audio_from_bytes import save_audio_from_bytes
from .sys_dynamic_import import dynamic_import, srcfile_import
from .cleanup_html import cleanup_html
from .knowledge_graph import create_graph, add_customizations, create_interactive_graph
from .knowledge_graph import create_graph, create_interactive_graph, create_interactive_graph_retrieval

View File

@ -70,6 +70,79 @@ def add_customizations(net, graph):
net.add_edge(edge[0], edge[1])
return net
# Add customizations to the network
def add_customizations_retrieval(net, graph, found_companies):
node_colors = {}
node_sizes = {}
edge_colors = {}
# Custom colors and sizes for nodes
node_colors["Job Postings"] = '#8470FF'
node_sizes["Job Postings"] = 50
# Nodes and edges to highlight in red
highlighted_nodes = set(found_companies)
highlighted_edges = set()
# Highlight found companies and their paths to the root
for company in found_companies:
node_colors[company] = 'red'
node_sizes[company] = 30
# Highlight the path to the root
node = company
while node != "Job Postings":
predecessors = list(graph.predecessors(node))
if not predecessors:
break
predecessor = predecessors[0]
highlighted_nodes.add(predecessor)
node_colors[predecessor] = 'red'
node_sizes[predecessor] = 30
highlighted_edges.add((predecessor, node))
node = predecessor
# Highlight job nodes and edges
for idx in range(1, graph.out_degree(company) + 1):
job_node = f"{company}-Job{idx}"
if job_node in graph.nodes:
highlighted_nodes.add(job_node)
node_colors[job_node] = 'red'
node_sizes[job_node] = 20
highlighted_edges.add((company, job_node))
# Highlight job detail nodes
for successor in graph.successors(job_node):
if successor not in highlighted_nodes:
node_colors[successor] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
node_sizes[successor] = 10
highlighted_edges.add((job_node, successor))
# Set almost transparent color for non-highlighted nodes and edges
for node in graph.nodes:
if node not in node_colors:
node_colors[node] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
node_sizes[node] = 10 if '-' in node else 15
for edge in graph.edges:
if edge not in highlighted_edges:
edge_colors[edge] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
# Add nodes and edges to the network with customized styles
for node in graph.nodes:
net.add_node(node,
label=graph.nodes[node].get('label', node.split('-')[-1]),
color=node_colors.get(node, 'lightgray'),
size=node_sizes.get(node, 15),
title=graph.nodes[node].get('title', ''))
for edge in graph.edges:
if edge in highlighted_edges:
net.add_edge(edge[0], edge[1], color='red')
else:
net.add_edge(edge[0], edge[1], color=edge_colors.get(edge, 'lightgray'))
return net
# Create interactive graph
def create_interactive_graph(graph, output_file='interactive_graph.html'):
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
@ -79,3 +152,11 @@ def create_interactive_graph(graph, output_file='interactive_graph.html'):
# Automatically open the generated HTML file in the default web browser
webbrowser.open(f"file://{os.path.realpath(output_file)}")
# Create interactive graph
def create_interactive_graph_retrieval(graph, found_companies, output_file='interactive_graph.html'):
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
net = add_customizations_retrieval(net, graph, found_companies)
net.save_graph(output_file)
# Automatically open the generated HTML file in the default web browser
webbrowser.open(f"file://{os.path.realpath(output_file)}")