Spaces:

MCP-1st-Birthday
/

Jobly

Running

App Files Files Community

Jobly / app.py

Valentina9502

Update app.py

490576f verified 12 days ago

raw

history blame

25.5 kB

	import gradio as gr
	import anthropic
	import json
	import os
	from typing import Dict, List, Any
	from mcp.server import Server
	from mcp.types import Tool, TextContent
	import asyncio

	# LlamaIndex imports for RAG
	from llama_index.core import VectorStoreIndex, Document, Settings
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.vector_stores.chroma import ChromaVectorStore
	import chromadb

	# Initialize Anthropic client
	client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

	# ============== VECTOR DATABASE SETUP ==============

	# Initialize embedding model (using HuggingFace for sponsor recognition!)
	print("🔄 Loading embedding model...")
	embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
	Settings.embed_model = embed_model
	Settings.llm = None # Disable LLM for LlamaIndex (we use Claude directly via MCP)
	Settings.chunk_size = 512

	# Initialize ChromaDB
	chroma_client = chromadb.Client()

	# Create collections for workers and gigs
	workers_collection = chroma_client.get_or_create_collection("gig_workers")
	gigs_collection = chroma_client.get_or_create_collection("gig_posts")

	print("✅ Vector database ready!")

	# ============== LOAD AND INDEX DATA ==============

	def load_and_index_data():
	"""Load JSON data and create vector indices"""

	# Load workers
	try:
	with open("workers_data.json", "r") as f:
	workers_data = json.load(f)
	except FileNotFoundError:
	workers_data = []
	print("⚠️ workers_data.json not found, using empty list")

	# Load gigs
	try:
	with open("gigs_data.json", "r") as f:
	gigs_data = json.load(f)
	except FileNotFoundError:
	gigs_data = []
	print("⚠️ gigs_data.json not found, using empty list")

	# Create documents for workers
	worker_documents = []
	for worker in workers_data:
	# Create rich text representation for better semantic search
	text = f"""
	Name: {worker['name']}
	Title: {worker['title']}
	Skills: {', '.join(worker['skills'])}
	Experience: {worker['experience']}
	Location: {worker['location']}
	Rate: {worker['hourly_rate']}
	Availability: {worker['availability']}
	Bio: {worker['bio']}
	"""
	doc = Document(
	text=text,
	metadata=worker
	)
	worker_documents.append(doc)

	# Create documents for gigs
	gig_documents = []
	for gig in gigs_data:
	text = f"""
	Title: {gig['title']}
	Company: {gig['company']}
	Required Skills: {', '.join(gig['required_skills'])}
	Experience Level: {gig['experience_level']}
	Location: {gig['location']}
	Budget: {gig['budget']}
	Duration: {gig['duration']}
	Description: {gig['description']}
	"""
	doc = Document(
	text=text,
	metadata=gig
	)
	gig_documents.append(doc)

	# Create vector store and index for workers
	workers_vector_store = ChromaVectorStore(chroma_collection=workers_collection)
	workers_index = VectorStoreIndex.from_documents(
	worker_documents,
	vector_store=workers_vector_store
	)

	# Create vector store and index for gigs
	gigs_vector_store = ChromaVectorStore(chroma_collection=gigs_collection)
	gigs_index = VectorStoreIndex.from_documents(
	gig_documents,
	vector_store=gigs_vector_store
	)

	print(f"✅ Indexed {len(worker_documents)} workers and {len(gig_documents)} gigs")

	return workers_index, gigs_index, workers_data, gigs_data

	# Load and index data at startup
	print("🔄 Loading and indexing data...")
	workers_index, gigs_index, workers_db, gigs_db = load_and_index_data()
	print("✅ Data loaded and indexed!")

	# ============== MCP SERVER IMPLEMENTATION ==============

	mcp_server = Server("gig-market-mcp-rag")

	@mcp_server.list_tools()
	async def list_tools() -> List[Tool]:
	"""List all available MCP tools with RAG capabilities"""
	return [
	Tool(
	name="create_worker_profile",
	description="Transform user's unstructured text into a professional, structured gig worker profile using AI",
	inputSchema={
	"type": "object",
	"properties": {
	"raw_text": {
	"type": "string",
	"description": "User's description of their skills, experience, and preferences"
	}
	},
	"required": ["raw_text"]
	}
	),
	Tool(
	name="create_gig_post",
	description="Transform user's unstructured text into a clear, structured gig job post using AI",
	inputSchema={
	"type": "object",
	"properties": {
	"raw_text": {
	"type": "string",
	"description": "User's description of the job requirements and project details"
	}
	},
	"required": ["raw_text"]
	}
	),
	Tool(
	name="find_matching_gigs_rag",
	description="Find the best matching gig posts using SEMANTIC SEARCH with vector embeddings and RAG. Returns top matches based on skills, experience, and location similarity.",
	inputSchema={
	"type": "object",
	"properties": {
	"worker_profile": {
	"type": "object",
	"description": "The structured worker profile to match"
	},
	"top_n": {
	"type": "integer",
	"description": "Number of top matches to return",
	"default": 5
	}
	},
	"required": ["worker_profile"]
	}
	),
	Tool(
	name="find_matching_workers_rag",
	description="Find the best matching workers using SEMANTIC SEARCH with vector embeddings and RAG. Returns top matches based on required skills, experience, and location similarity.",
	inputSchema={
	"type": "object",
	"properties": {
	"gig_post": {
	"type": "object",
	"description": "The structured gig post to match"
	},
	"top_n": {
	"type": "integer",
	"description": "Number of top matches to return",
	"default": 5
	}
	},
	"required": ["gig_post"]
	}
	)
	]

	@mcp_server.call_tool()
	async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
	"""Handle MCP tool calls with RAG-enhanced matching"""

	if name == "create_worker_profile":
	raw_text = arguments["raw_text"]

	message = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=1500,
	messages=[{
	"role": "user",
	"content": f"""You are a professional career consultant. Transform this person's description into an attractive gig worker profile.

	USER INPUT:
	{raw_text}

	Create a professional profile with these fields. Return ONLY valid JSON (no markdown, no explanation):

	{{
	"name": "full name",
	"title": "professional title/role",
	"skills": ["skill1", "skill2", "skill3", ...],
	"experience": "X years",
	"location": "city, country",
	"hourly_rate": "€X/hour or price range",
	"availability": "full-time/part-time/freelance/weekends/flexible",
	"bio": "compelling 1-2 sentence professional summary"
	}}

	Make it professional and appealing. If information is missing, infer reasonable values."""
	}]
	)

	response_text = message.content[0].text.strip()
	if response_text.startswith("```"):
	response_text = response_text.split("```")[1]
	if response_text.startswith("json"):
	response_text = response_text[4:]
	response_text = response_text.strip()

	profile_data = json.loads(response_text)
	return [TextContent(type="text", text=json.dumps(profile_data))]

	elif name == "create_gig_post":
	raw_text = arguments["raw_text"]

	message = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=1500,
	messages=[{
	"role": "user",
	"content": f"""You are a hiring manager. Transform this job description into a clear gig post.

	USER INPUT:
	{raw_text}

	Create a professional gig post with these fields. Return ONLY valid JSON (no markdown, no explanation):

	{{
	"title": "clear job title",
	"company": "company name or 'Private Client'",
	"required_skills": ["skill1", "skill2", "skill3", ...],
	"experience_level": "Junior/Mid-level/Senior (X years) or X+ years",
	"location": "location or Remote",
	"budget": "€X-Y or budget range",
	"duration": "time period",
	"description": "clear 1-2 sentence project description"
	}}

	Make it clear and professional. If information is missing, insert Unknown."""
	}]
	)

	response_text = message.content[0].text.strip()
	if response_text.startswith("```"):
	response_text = response_text.split("```")[1]
	if response_text.startswith("json"):
	response_text = response_text[4:]
	response_text = response_text.strip()

	gig_data = json.loads(response_text)
	return [TextContent(type="text", text=json.dumps(gig_data))]

	elif name == "find_matching_gigs_rag":
	worker_profile = arguments["worker_profile"]
	top_n = arguments.get("top_n", 5)

	# Create semantic search query from worker profile
	query = f"""
	Looking for gig opportunities for:
	Skills: {', '.join(worker_profile.get('skills', []))}
	Experience: {worker_profile.get('experience', '')}
	Location: {worker_profile.get('location', '')}
	Availability: {worker_profile.get('availability', '')}
	"""

	# Perform semantic search using LlamaIndex
	query_engine = gigs_index.as_query_engine(similarity_top_k=top_n)
	response = query_engine.query(query)

	# Extract matches from response
	matches = []
	for node in response.source_nodes:
	gig = node.metadata
	score = int(node.score * 100) # Convert to 0-100 scale

	# Calculate skill overlap
	worker_skills = set(s.lower() for s in worker_profile.get('skills', []))
	gig_skills = set(s.lower() for s in gig.get('required_skills', []))
	matched_skills = list(worker_skills.intersection(gig_skills))

	matches.append({
	"gig": gig,
	"score": score,
	"matched_skills": matched_skills,
	"semantic_similarity": node.score
	})

	return [TextContent(type="text", text=json.dumps(matches))]

	elif name == "find_matching_workers_rag":
	gig_post = arguments["gig_post"]
	top_n = arguments.get("top_n", 5)

	# Create semantic search query from gig post
	query = f"""
	Looking for workers for this gig:
	Required Skills: {', '.join(gig_post.get('required_skills', []))}
	Experience Level: {gig_post.get('experience_level', '')}
	Location: {gig_post.get('location', '')}
	Project: {gig_post.get('description', '')}
	"""

	# Perform semantic search using LlamaIndex
	query_engine = workers_index.as_query_engine(similarity_top_k=top_n)
	response = query_engine.query(query)

	# Extract matches from response
	matches = []
	for node in response.source_nodes:
	worker = node.metadata
	score = int(node.score * 100) # Convert to 0-100 scale

	# Calculate skill overlap
	worker_skills = set(s.lower() for s in worker.get('skills', []))
	gig_skills = set(s.lower() for s in gig_post.get('required_skills', []))
	matched_skills = list(gig_skills.intersection(worker_skills))

	matches.append({
	"worker": worker,
	"score": score,
	"matched_skills": matched_skills,
	"semantic_similarity": node.score
	})

	return [TextContent(type="text", text=json.dumps(matches))]

	return [TextContent(type="text", text=json.dumps({"error": "Tool not found"}))]

	# ============== AGENTIC WORKFLOW ==============

	def format_tools_for_claude(tools: List[Tool]) -> List[Dict]:
	"""Convert MCP tools to Anthropic API format"""
	return [
	{
	"name": tool.name,
	"description": tool.description,
	"input_schema": tool.inputSchema
	}
	for tool in tools
	]

	async def worker_agent_workflow(user_description: str) -> tuple[str, str]:
	"""Agent workflow: Create worker profile → Find matching gigs with RAG"""

	tools_list = await list_tools()
	tools_for_api = format_tools_for_claude(tools_list)

	conversation_history = [{
	"role": "user",
	"content": f"""I need help with my gig worker profile and finding opportunities.

	Here's my background:
	{user_description}

	Please:
	1. Create a professional profile for me
	2. Find the top 5 matching gig opportunities using semantic search
	3. Explain why each match is good, highlighting semantic similarity and matched skills

	Use the available tools to help me."""
	}]

	system_prompt = """You are a career advisor with access to a RAG system.
	The find_matching_gigs_rag tool uses VECTOR EMBEDDINGS and SEMANTIC SEARCH to find the best matches.
	Explain that matches are found using advanced AI semantic matching, not just keyword matching.
	Be enthusiastic about the semantic similarity scores!"""

	profile_created = None

	for _ in range(5):
	response = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=4000,
	system=system_prompt,
	tools=tools_for_api,
	messages=conversation_history
	)

	if response.stop_reason == "end_turn":
	final_text = ""
	for content in response.content:
	if content.type == "text":
	final_text += content.text
	return profile_created or "Profile created", final_text

	elif response.stop_reason == "tool_use":
	tool_results = []

	for content in response.content:
	if content.type == "tool_use":
	result = await call_tool(content.name, content.input)
	result_text = result[0].text

	if content.name == "create_worker_profile":
	profile_created = result_text

	tool_results.append({
	"type": "tool_result",
	"tool_use_id": content.id,
	"content": result_text
	})

	conversation_history.append({"role": "assistant", "content": response.content})
	conversation_history.append({"role": "user", "content": tool_results})

	return profile_created or "{}", "Agent completed"

	async def employer_agent_workflow(job_description: str) -> tuple[str, str]:
	"""Agent workflow: Create gig post → Find matching workers with RAG"""

	tools_list = await list_tools()
	tools_for_api = format_tools_for_claude(tools_list)

	conversation_history = [{
	"role": "user",
	"content": f"""I need to create a gig post and find qualified workers.

	Here's what I'm looking for:
	{job_description}

	Please:
	1. Create a clear gig post
	2. Find the top 5 best matching workers using semantic search
	3. Explain why each candidate is a good fit, highlighting semantic similarity

	Use the available tools to help me."""
	}]

	system_prompt = """You are a hiring consultant with access to a RAG system.
	The find_matching_workers_rag tool uses VECTOR EMBEDDINGS and SEMANTIC SEARCH to find the best matches.
	Explain that matches are found using advanced AI semantic matching powered by HuggingFace embeddings.
	Be enthusiastic about the semantic similarity scores!"""

	gig_created = None

	for _ in range(5):
	response = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=4000,
	system=system_prompt,
	tools=tools_for_api,
	messages=conversation_history
	)

	if response.stop_reason == "end_turn":
	final_text = ""
	for content in response.content:
	if content.type == "text":
	final_text += content.text
	return gig_created or "Gig post created", final_text

	elif response.stop_reason == "tool_use":
	tool_results = []

	for content in response.content:
	if content.type == "tool_use":
	result = await call_tool(content.name, content.input)
	result_text = result[0].text

	if content.name == "create_gig_post":
	gig_created = result_text

	tool_results.append({
	"type": "tool_result",
	"tool_use_id": content.id,
	"content": result_text
	})

	conversation_history.append({"role": "assistant", "content": response.content})
	conversation_history.append({"role": "user", "content": tool_results})

	return gig_created or "{}", "Agent completed"

	# ============== GRADIO UI ==============

	def run_worker_flow(description: str) -> tuple[str, str]:
	"""Worker flow with RAG"""
	try:
	profile_json, analysis = asyncio.run(worker_agent_workflow(description))

	profile = json.loads(profile_json)
	profile_display = f"""## ✅ Your Professional Profile

	{profile.get('name', 'N/A')}
	{profile.get('title', 'N/A')}

	📍 Location: {profile.get('location', 'N/A')}
	💼 Experience: {profile.get('experience', 'N/A')}
	💰 Rate: {profile.get('hourly_rate', 'N/A')}
	⏰ Availability: {profile.get('availability', 'N/A')}

	🎯 Skills:
	{', '.join(profile.get('skills', []))}

	📝 Bio:
	{profile.get('bio', 'N/A')}
	"""

	return profile_display, analysis
	except Exception as e:
	return f"❌ Error: {str(e)}", ""

	def run_employer_flow(description: str) -> tuple[str, str]:
	"""Employer flow with RAG"""
	try:
	gig_json, analysis = asyncio.run(employer_agent_workflow(description))

	gig = json.loads(gig_json)
	gig_display = f"""## ✅ Your Gig Post

	{gig.get('title', 'N/A')}
	{gig.get('company', 'N/A')}

	📍 Location: {gig.get('location', 'N/A')}
	👔 Experience Level: {gig.get('experience_level', 'N/A')}
	💰 Budget: {gig.get('budget', 'N/A')}
	⏱️ Duration: {gig.get('duration', 'N/A')}

	🎯 Required Skills:
	{', '.join(gig.get('required_skills', []))}

	📝 Description:
	{gig.get('description', 'N/A')}
	"""

	return gig_display, analysis
	except Exception as e:
	return f"❌ Error: {str(e)}", ""

	# ============== GRADIO INTERFACE ==============

	with gr.Blocks(title="🤖 Jobly - Transforming Gig Market with AI") as app:

	# BANNER
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<img src="https://huggingface.co/spaces/MCP-1st-Birthday/Jobly/resolve/main/banner_jobly.png"
	style="width: 100%; max-width: 1200px; border-radius: 10px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);"
	alt="GigMatch AI Banner"/>
	</div>
	""")

	with gr.Tabs():

	# BOARD 1: WORKER SEEKING GIGS
	with gr.Tab("👤 Find Gigs for Me", elem_id="worker-board"):
	gr.Markdown("""
	## 🎯 I'm a Gig Worker looking for opportunities

	Tell me about yourself, and our AI + RAG system will:
	1. ✨ Create your professional profile
	2. 🔍 Search through 50 gig posts
	3. 💡 Find the top 5 matches - AI-powered feature

	Example: "I'm Eddy Stone, an experienced handyman with 10 years doing plumbing,
	electrical work, and carpentry. Based in Rome, available weekdays and weekends,
	charge around €25/hour"
	""")

	with gr.Row():
	with gr.Column(scale=2):
	worker_input = gr.Textbox(
	label="📝 Tell me about yourself",
	placeholder="Tell me your full name, describe your skills, experience, location, rate, and what you're looking for...",
	lines=6
	)
	worker_btn = gr.Button("🚀 Create profile & find your dream gig", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	worker_profile_output = gr.Markdown(label="Your Profile")
	with gr.Column():
	worker_matches_output = gr.Markdown(label="🔍 Semantic Search Results")

	worker_btn.click(
	fn=run_worker_flow,
	inputs=worker_input,
	outputs=[worker_profile_output, worker_matches_output]
	)

	# BOARD 2: EMPLOYER SEEKING WORKERS
	with gr.Tab("💼 Find Workers for My Gig", elem_id="employer-board"):
	gr.Markdown("""
	## 🎯 I'm looking for help with a task

	Describe your needs, and our AI Agent will:
	1. ✨ Create a clear gig post
	2. 🔍 Search through 50 worker profiles
	3. 💡 Find the top 5 matches - AI-powered feature


	Example: "I need someone to move my apartment furniture and boxes
	to a new place about 10km away. It's a 2-bedroom apartment. Need someone
	with a van and experience with heavy lifting. Budget around €300, can do it
	this weekend in Barcelona"
	""")

	with gr.Column(scale=2):
	employer_input = gr.Textbox(
	label="📝 Describe your needs",
	placeholder="What skills do you need? Job details? Budget? Timeline?",
	lines=6
	)
	employer_btn = gr.Button("🚀 Create Post & Find Workers", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	employer_post_output = gr.Markdown(label="Your Gig Post")
	with gr.Column():
	employer_matches_output = gr.Markdown(label="🔍 Semantic Search Results")

	employer_btn.click(
	fn=run_employer_flow,
	inputs=employer_input,
	outputs=[employer_post_output, employer_matches_output]
	)

	gr.Markdown(f"""
	---

	### 🧠 Some amazing nerd facts

	🦙 LlamaIndex RAG Pipeline: to keep it short
	```
	Your Query → Vector Embedding → Semantic Search → Top K Results → AI Analysis
	```

	🔧 MCP Tools:
	1. `create_worker_profile` - AI profile generation
	2. `create_gig_post` - AI post generation
	3. `find_matching_gigs_rag` - Semantic search with vector embeddings
	4. `find_matching_workers_rag` - Semantic search with vector embeddings

	📊 Database Stats:
	- Workers indexed: {len(workers_db)}
	- Gigs indexed: {len(gigs_db)}
	- Total potential matches: {len(workers_db) * len(gigs_db)}
	- Embedding model: sentence-transformers/all-MiniLM-L6-v2 (HuggingFace 🤗)
	- Vector DB: ChromaDB

	🎯 Matching Features:
	- ✅ Semantic similarity (not just keyword matching!)
	- ✅ Vector embeddings for deep understanding
	- ✅ Skills matching
	- ✅ Location awareness
	- ✅ Experience level matching

	### 🛠️ Tech Stack
	- AI Agent: Claude Sonnet 4 (Anthropic)
	- RAG Framework: LlamaIndex 🦙
	- Embeddings: HuggingFace sentence-transformers 🤗
	- Vector Store: ChromaDB
	- Protocol: Model Context Protocol (MCP)

	Built for Hugging Face Winter Hackathon 2025 by Jobly Team (Valentina, Giacomo & Elisa <3) 🎉
	""")

	if __name__ == "__main__":
	app.launch(share=True)