fix(kg): removed unused nodes and utils

This commit is contained in:
Marco Perini 2024-05-25 00:44:14 +02:00
parent edf221dcd9
commit 5684578fab
12 changed files with 2 additions and 1258 deletions

View File

@ -1,704 +0,0 @@
{
"Job Postings":{
"Netflix":[
{
"title":"Machine Learning Engineer (L4) - Infrastructure Algorithms and ML",
"description":"NA",
"location":"Los Gatos, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer L4, Algorithms Engineering",
"description":"NA",
"location":"Los Gatos, CA",
"date_posted":"18 hours ago",
"requirements":[
"NA"
]
}
],
"Rose AI":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Team Remotely Inc":[
{
"title":"Junior Machine Learning Engineer",
"description":"NA",
"location":"Wilmington, DE",
"date_posted":"14 hours ago",
"requirements":[
"NA"
]
}
],
"Zuma":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"San Francisco Bay Area",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
}
],
"Tinder":[
{
"title":"Data Scientist I",
"description":"NA",
"location":"West Hollywood, CA",
"date_posted":"23 hours ago",
"requirements":[
"NA"
]
}
],
"Moveworks":[
{
"title":"Machine Learning Engineer Intern - NLU & ML Infra",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Cognitiv":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"Berkeley, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"DoorDash":[
{
"title":"Machine Learning Engineer, Forecast Platform",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer, Forecast Platform",
"description":"NA",
"location":"Sunnyvale, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer - New Verticals",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"PipeIQ":[
{
"title":"Machine Learning Engineer Intern (NLP)",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Fractal":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"California, United States",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Accroid Inc":[
{
"title":"Machine Learning Engineer/Python",
"description":"NA",
"location":"Austin, TX",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Notion":[
{
"title":"Software Engineer, Machine Learning",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Machine Learning",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"PhysicsX":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, United States",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"HireIO, Inc.":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Dexian Inc":[
{
"title":"Junior Machine Learning Engineer",
"description":"NA",
"location":"Columbia, MD",
"date_posted":"4 days ago",
"requirements":[
"NA"
]
}
],
"Google":[
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"New York, NY",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Software Engineer, Early Career",
"description":"NA",
"location":"Sunnyvale, CA",
"date_posted":"11 hours ago",
"requirements":[
"NA"
]
},
{
"title":"Customer Engineering, AI/ML (English, Italian)",
"description":"Candidates will typically have 6 years of experience as a technical sales engineer in a cloud computing environment.",
"location":"Milano, Lombardia",
"date_posted":"15 giorni fa",
"requirements":[
"NA"
]
}
],
"Unreal Staffing, Inc":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Reveal HealthTech":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Boston, MA",
"date_posted":"3 days ago",
"requirements":[
"NA"
]
}
],
"Replicate":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"Truveta":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Greater Seattle Area",
"date_posted":"3 days ago",
"requirements":[
"NA"
]
}
],
"Atlassian":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Continua AI, Inc.":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Seattle, WA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Software Technology Inc.":[
{
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
},
{
"title":"Data Scientist/ ML Engineer | Remote | Long Term",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Neptune Technologies LLC":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"1 day ago",
"requirements":[
"NA"
]
}
],
"Zoom":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Jose, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"California, United States",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"HP":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Enterprise Minds, Inc":[
{
"title":"Machine Learning Software Engineer",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Celonis":[
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"New York, NY",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
},
{
"title":"Machine Learning Engineer Intern",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Lockheed Martin":[
{
"title":"A/AI Machine Learning Engineer",
"description":"NA",
"location":"Littleton, CO",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Two Dots":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"Los Angeles, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Verneek":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Rivian":[
{
"title":"Machine Learning Software Engineer",
"description":"NA",
"location":"Palo Alto, CA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Impax Recruitment":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Stripe":[
{
"title":"Machine Learning Engineer, Risk",
"description":"NA",
"location":"United States",
"date_posted":"3 weeks ago",
"requirements":[
"NA"
]
}
],
"Adobe":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Jose, CA",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"Javelin":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York City Metropolitan Area",
"date_posted":"1 week ago",
"requirements":[
"NA"
]
}
],
"Ultralytics":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"New York, NY",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Supernormal":[
{
"title":"Machine Learning Engineer (with a focus on modeling)",
"description":"NA",
"location":"Seattle, WA",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Samsung Electronics America":[
{
"title":"Machine Learning Engineer Data Science",
"description":"NA",
"location":"Mountain View, CA",
"date_posted":"4 weeks ago",
"requirements":[
"NA"
]
}
],
"Skale":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"San Francisco, CA",
"date_posted":"2 weeks ago",
"requirements":[
"NA"
]
}
],
"Steneral Consulting":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"1 month ago",
"requirements":[
"NA"
]
}
],
"Movable Ink":[
{
"title":"Machine Learning Engineer",
"description":"NA",
"location":"United States",
"date_posted":"2 months ago",
"requirements":[
"NA"
]
}
],
"LHH":[
{
"title":"DevOps Engineer",
"description":"Per azienda cliente Fit2you, siamo alla ricerca di un DevOps Engineer presso la sede di Milano che possa operare all'intersezione di Fit2you Broker e Air, guidando l'innovazione tecnologica e l'efficienza operativa in entrambi i contesti. Questo ruolo unico offre l'opportunità di influenzare significativamente due diversi, ma complementari, settori dell'industria automotive, dal brokeraggio assicurativo ai big data e alle auto connesse.",
"location":"Italy",
"date_posted":"15d",
"requirements":[
"CI/CD",
"DevOps",
"AWS",
"JavaScript",
"Integrazione continua"
]
}
],
"Deloitte":[
{
"title":"Experienced - Cloud Test Engineer - Cloud Native Development & Migration - NextHub Bari",
"description":"Scopri di più sulle nostre strategie di Corporate Sustainability, tra cui Well-being, la strategia volta a migliorare il benessere fisico, mentale e sociale.",
"location":"Bari",
"date_posted":"14d",
"requirements":[
"ASP.NET",
"Azure",
"DevOps",
"C#",
"Automazione dei test"
]
}
],
"MACMARK":[
{
"title":"MID/SENIOR BACKEND DEVELOPER IN PRESENZA",
"description":"Sarà possibile solo lavorare in presenza, pertanto sei disponibile a lavorare nella sede di Rende (CS)? Buona propensione nel lavorare in Team.",
"location":"Rende",
"date_posted":"7d",
"requirements":[
"Infrastrutture cloud",
"Azure",
"CSS",
"Git",
"Google Cloud Platform"
]
},
{
"title":"MID/SENIOR FRONTEND DEVELOPER IN PRESENZA",
"description":"Buona propensione nel lavorare in Team. O Laura in informativa ed almeno 1/2 anni di esperienza in un contesto di sviluppo software.",
"location":"Rende",
"date_posted":"7d",
"requirements":[
"Infrastrutture cloud",
"CSS",
"React",
"Git",
"Google Cloud Platform"
]
}
],
"Assist Digital Spa":[
{
"title":"System & Networking Engineer",
"description":"Eu. Il Trattamento è realizzato, con il suo consenso, per realizzare processi di ricerca, selezione e valutazione del personale svolti per conto proprio, per.",
"location":"Roma",
"date_posted":"30d+",
"requirements":[
"Inglese",
"Windows",
"Sistemi di sicurezza",
"AWS",
"Virtualizzazione"
]
},
{
"title":"Prompt Engineer",
"description":"You, as data subject of the processing of personal data, may exercise at any time the rights expressly granted by the European Regulation, and in particular.",
"location":"Roma",
"date_posted":"30d+",
"requirements":[
"Strutture dati",
"Inglese",
"Google Cloud Platform",
"AWS",
"C"
]
}
],
"TOOLS FOR SMART MINDS S.r.l.":[
{
"title":"Sviluppatore software",
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura.",
"location":"Castel Mella",
"date_posted":"30d+",
"requirements":[
"Inglese",
"Machine learning",
"Intelligenza artificiale"
]
},
{
"title":"Sviluppatore software - linguaggio OWL e SPARQL",
"description":"predisposizione a lavorare in team. La nostra missione è creare valore per le aziende che vogliono intraprendere la trasformazione 4.0 con soluzioni su misura."
}
]
}
}

View File

@ -1,134 +0,0 @@
"""
Example of custom graph for creating a knowledge graph
"""
import os, json
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
load_dotenv()
# ************************************************
# Define the output schema
# ************************************************
schema= """{
"Job Postings": {
"Company x": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
},
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
],
"Company y": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
]
}
}"""
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True,
"headless": False,
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
smart_scraper_instance = SmartScraperGraph(
prompt="",
source="",
config=graph_config,
)
# ************************************************
# Define the graph nodes
# ************************************************
graph_iterator_node = GraphIteratorNode(
input="user_prompt & urls",
output=["results"],
node_config={
"graph_instance": smart_scraper_instance,
}
)
merge_answers_node = MergeAnswersNode(
input="user_prompt & results",
output=["answer"],
node_config={
"llm_model": llm_model,
"schema": schema
}
)
knowledge_graph_node = KnowledgeGraphNode(
input="user_prompt & answer",
output=["kg"],
node_config={
"llm_model": llm_model,
}
)
graph = BaseGraph(
nodes=[
graph_iterator_node,
merge_answers_node,
knowledge_graph_node
],
edges=[
(graph_iterator_node, merge_answers_node),
(merge_answers_node, knowledge_graph_node)
],
entry_point=graph_iterator_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "List me all the Machine Learning Engineer job postings",
"urls": [
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
],
})
# get the answer from the result
result = result.get("answer", "No answer found.")
print(json.dumps(result, indent=4))

View File

@ -1,44 +0,0 @@
import os, json
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from scrapegraphai.utils import create_graph, create_interactive_graph_retrieval
load_dotenv()
# Load the OpenAI API key and the embeddings model
openai_key = os.getenv("OPENAI_APIKEY")
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
# Paths
curr_dir = os.path.dirname(os.path.realpath(__file__))
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
retrieval_graph_output_path = os.path.join(curr_dir, 'output', 'job_postings_retrieval.html')
# Load the job postings JSON file
with open(json_file_path, 'r') as f:
job_postings = json.load(f)
# Load the vector store
db = FAISS.load_local(
vector_store_output_path,
embeddings_model,
allow_dangerous_deserialization=True
)
# User prompt for similarity search
user_prompt = "Company based United States with job title Software Engineer"
# Similarity search on the vector store
result = db.similarity_search_with_score(user_prompt, fetch_k=10)
found_companies = []
for res in result:
found_companies.append(res[0].page_content)
# Build the graph
graph = create_graph(job_postings)
# Create the interactive graph
create_interactive_graph_retrieval(graph, found_companies, output_file=retrieval_graph_output_path)

View File

@ -1,41 +0,0 @@
import json
import os
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
# Load the OpenAI API key and the embeddings model
openai_key = os.getenv("OPENAI_APIKEY")
embeddings_model = OpenAIEmbeddings(api_key=openai_key)
# Paths
curr_dir = os.path.dirname(os.path.realpath(__file__))
json_file_path = os.path.join(curr_dir, 'input', 'job_postings.json')
vector_store_output_path = os.path.join(curr_dir, 'output', 'faiss_index')
# Load the job postings JSON file
with open(json_file_path, 'r') as f:
job_postings = json.load(f)
texts = []
metadata = []
# Extract company names and job details
for company, jobs in job_postings["Job Postings"].items():
for job in jobs:
texts.append(company)
metadata.append({
"title": job.get("title", "N/A"),
"description": job.get("description", "N/A"),
"location": job.get("location", "N/A"),
"date_posted": job.get("date_posted", "N/A"),
"requirements": job.get("requirements", [])
})
# Create the vector store
db = FAISS.from_texts(texts=texts, embedding=embeddings_model, metadatas=metadata)
# Save the embeddings locally
db.save_local(vector_store_output_path)

View File

@ -11,8 +11,7 @@ from .smart_scraper_graph import SmartScraperGraph
from ..nodes import (
GraphIteratorNode,
MergeAnswersNode,
KnowledgeGraphNode
MergeAnswersNode
)

View File

@ -19,5 +19,4 @@ from .generate_answer_csv_node import GenerateAnswerCSVNode
from .generate_answer_pdf_node import GenerateAnswerPDFNode
from .graph_iterator_node import GraphIteratorNode
from .merge_answers_node import MergeAnswersNode
from .generate_answer_omni_node import GenerateAnswerOmniNode
from .knowledge_graph_node import KnowledgeGraphNode
from .generate_answer_omni_node import GenerateAnswerOmniNode

View File

@ -1,67 +0,0 @@
"""
BlocksIndentifier Module
"""
from typing import List, Optional
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_core.documents import Document
from .base_node import BaseNode
class BlocksIndentifier(BaseNode):
"""
A node responsible to identify the blocks in the HTML content of a specified HTML content
e.g products in a E-commerce, flights in a travel website etc.
Attributes:
headless (bool): A flag indicating whether the browser should run in headless mode.
verbose (bool): A flag indicating whether to print verbose output during execution.
Args:
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (Optional[dict]): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "BlocksIndentifier".
"""
def __init__(
self,
input: str,
output: List[str],
node_config: Optional[dict],
node_name: str = "BlocksIndentifier",
):
super().__init__(node_name, "node", input, output, 1)
self.headless = (
True if node_config is None else node_config.get("headless", True)
)
self.verbose = (
True if node_config is None else node_config.get("verbose", False)
)
def execute(self, state):
"""
Executes the node's logic, caracterized by a pre-processing of the HTML content and
subsequent identification of the blocks in the HTML content.
Args:
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data types from the state.
Returns:
dict: The updated state with a new output key containing the fetched HTML content.
Raises:
KeyError: If the input key is not found in the state, indicating that the
necessary information to perform the operation is missing.
"""
self.logger.info(f"--- Executing {self.node_name} Node ---")
# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
# Fetching data from the state based on the input keys
input_data = [state[key] for key in input_keys]

View File

@ -1,101 +0,0 @@
"""
KnowledgeGraphNode Module
"""
# Imports from standard library
from typing import List, Optional
from tqdm import tqdm
# Imports from Langchain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
# Imports from the library
from .base_node import BaseNode
from ..utils import create_graph, create_interactive_graph
class KnowledgeGraphNode(BaseNode):
"""
A node responsible for generating a knowledge graph from a dictionary.
Attributes:
llm_model: An instance of a language model client, configured for generating answers.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
"""
def __init__(self, input: str, output: List[str], node_config: Optional[dict] = None,
node_name: str = "KnowledgeGraph"):
super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm_model"]
self.verbose = False if node_config is None else node_config.get(
"verbose", False)
def execute(self, state: dict) -> dict:
"""
Executes the node's logic to create a knowledge graph from a dictionary.
Args:
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data from the state.
Returns:
dict: The updated state with the output key containing the generated answer.
Raises:
KeyError: If the input keys are not found in the state, indicating
that the necessary information for generating an answer is missing.
"""
if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
# Fetching data from the state based on the input keys
input_data = [state[key] for key in input_keys]
user_prompt = input_data[0]
answer_dict = input_data[1]
# Build the graph
graph = create_graph(answer_dict)
# Create the interactive graph
create_interactive_graph(graph, output_file='knowledge_graph.html')
# output_parser = JsonOutputParser()
# format_instructions = output_parser.get_format_instructions()
# template_merge = """
# You are a website scraper and you have just scraped some content from multiple websites.\n
# You are now asked to provide an answer to a USER PROMPT based on the content you have scraped.\n
# You need to merge the content from the different websites into a single answer without repetitions (if there are any). \n
# The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n
# OUTPUT INSTRUCTIONS: {format_instructions}\n
# USER PROMPT: {user_prompt}\n
# WEBSITE CONTENT: {website_content}
# """
# prompt_template = PromptTemplate(
# template=template_merge,
# input_variables=["user_prompt"],
# partial_variables={
# "format_instructions": format_instructions,
# "website_content": answers_str,
# },
# )
# merge_chain = prompt_template | self.llm_model | output_parser
# answer = merge_chain.invoke({"user_prompt": user_prompt})
# Update the state with the generated answer
state.update({self.output[0]: graph})
return state

View File

@ -9,5 +9,4 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
from .save_audio_from_bytes import save_audio_from_bytes
from .sys_dynamic_import import dynamic_import, srcfile_import
from .cleanup_html import cleanup_html
from .knowledge_graph import create_graph, create_interactive_graph, create_interactive_graph_retrieval
from .logging import *

View File

@ -1,162 +0,0 @@
import networkx as nx
from pyvis.network import Network
import webbrowser
import os
# Create and visualize graph
def create_graph(job_postings):
graph = nx.DiGraph()
# Add the main "Job Postings" node
graph.add_node("Job Postings")
for company, jobs in job_postings["Job Postings"].items():
# Add company node
graph.add_node(company)
graph.add_edge("Job Postings", company)
# Add job nodes and their details
for idx, job in enumerate(jobs, start=1):
job_id = f"{company}-Job{idx}"
graph.add_node(job_id)
graph.add_edge(company, job_id)
for key, value in job.items():
if isinstance(value, list):
list_node_id = f"{job_id}-{key}"
graph.add_node(list_node_id, label=key)
graph.add_edge(job_id, list_node_id)
for item in value:
detail_id = f"{list_node_id}-{item}"
graph.add_node(detail_id, label=item, title=item)
graph.add_edge(list_node_id, detail_id)
else:
detail_id = f"{job_id}-{key}"
graph.add_node(detail_id, label=key, title=f"{key}: {value}")
graph.add_edge(job_id, detail_id)
return graph
# Add customizations to the network
def add_customizations(net, graph):
node_colors = {}
node_sizes = {}
# Custom colors and sizes for nodes
node_colors["Job Postings"] = '#8470FF'
node_sizes["Job Postings"] = 50
for node in graph.nodes:
if node in node_colors:
continue
if '-' not in node: # Company nodes
node_colors[node] = '#3CB371'
node_sizes[node] = 30
elif '-' in node and node.count('-') == 1: # Job nodes
node_colors[node] = '#FFA07A'
node_sizes[node] = 20
else: # Job detail nodes
node_colors[node] = '#B0C4DE'
node_sizes[node] = 10
# Add nodes and edges to the network with customized styles
for node in graph.nodes:
net.add_node(node,
label=graph.nodes[node].get('label', node.split('-')[-1]),
color=node_colors.get(node, 'lightgray'),
size=node_sizes.get(node, 15),
title=graph.nodes[node].get('title', ''))
for edge in graph.edges:
net.add_edge(edge[0], edge[1])
return net
# Add customizations to the network
def add_customizations_retrieval(net, graph, found_companies):
node_colors = {}
node_sizes = {}
edge_colors = {}
# Custom colors and sizes for nodes
node_colors["Job Postings"] = '#8470FF'
node_sizes["Job Postings"] = 50
# Nodes and edges to highlight in red
highlighted_nodes = set(found_companies)
highlighted_edges = set()
# Highlight found companies and their paths to the root
for company in found_companies:
node_colors[company] = 'red'
node_sizes[company] = 30
# Highlight the path to the root
node = company
while node != "Job Postings":
predecessors = list(graph.predecessors(node))
if not predecessors:
break
predecessor = predecessors[0]
highlighted_nodes.add(predecessor)
node_colors[predecessor] = 'red'
node_sizes[predecessor] = 30
highlighted_edges.add((predecessor, node))
node = predecessor
# Highlight job nodes and edges
for idx in range(1, graph.out_degree(company) + 1):
job_node = f"{company}-Job{idx}"
if job_node in graph.nodes:
highlighted_nodes.add(job_node)
node_colors[job_node] = 'red'
node_sizes[job_node] = 20
highlighted_edges.add((company, job_node))
# Highlight job detail nodes
for successor in graph.successors(job_node):
if successor not in highlighted_nodes:
node_colors[successor] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
node_sizes[successor] = 10
highlighted_edges.add((job_node, successor))
# Set almost transparent color for non-highlighted nodes and edges
for node in graph.nodes:
if node not in node_colors:
node_colors[node] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
node_sizes[node] = 10 if '-' in node else 15
for edge in graph.edges:
if edge not in highlighted_edges:
edge_colors[edge] = 'rgba(211, 211, 211, 0.5)' # light grey with transparency
# Add nodes and edges to the network with customized styles
for node in graph.nodes:
net.add_node(node,
label=graph.nodes[node].get('label', node.split('-')[-1]),
color=node_colors.get(node, 'lightgray'),
size=node_sizes.get(node, 15),
title=graph.nodes[node].get('title', ''))
for edge in graph.edges:
if edge in highlighted_edges:
net.add_edge(edge[0], edge[1], color='red')
else:
net.add_edge(edge[0], edge[1], color=edge_colors.get(edge, 'lightgray'))
return net
# Create interactive graph
def create_interactive_graph(graph, output_file='interactive_graph.html'):
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
net = add_customizations(net, graph)
net.save_graph(output_file)
# Automatically open the generated HTML file in the default web browser
webbrowser.open(f"file://{os.path.realpath(output_file)}")
# Create interactive graph
def create_interactive_graph_retrieval(graph, found_companies, output_file='interactive_graph.html'):
net = Network(notebook=False, height='1000px', width='100%', bgcolor='white', font_color='black')
net = add_customizations_retrieval(net, graph, found_companies)
net.save_graph(output_file)
# Automatically open the generated HTML file in the default web browser
webbrowser.open(f"file://{os.path.realpath(output_file)}")