Spaces:
Running
Running
| import gradio as gr | |
| import anthropic | |
| import json | |
| import os | |
| from typing import Dict, List, Any | |
| from mcp.server import Server | |
| from mcp.types import Tool, TextContent | |
| import asyncio | |
| # LlamaIndex imports for RAG | |
| from llama_index.core import VectorStoreIndex, Document, Settings | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.vector_stores.chroma import ChromaVectorStore | |
| import chromadb | |
| # Initialize Anthropic client | |
| client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) | |
| # ============== VECTOR DATABASE SETUP ============== | |
| # Initialize embedding model (using HuggingFace for sponsor recognition!) | |
| print("🔄 Loading embedding model...") | |
| embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| Settings.embed_model = embed_model | |
| Settings.llm = None # Disable LLM for LlamaIndex (we use Claude directly via MCP) | |
| Settings.chunk_size = 512 | |
| # Initialize ChromaDB | |
| chroma_client = chromadb.Client() | |
| # Create collections for workers and gigs | |
| workers_collection = chroma_client.get_or_create_collection("gig_workers") | |
| gigs_collection = chroma_client.get_or_create_collection("gig_posts") | |
| print("✅ Vector database ready!") | |
| # ============== LOAD AND INDEX DATA ============== | |
| def load_and_index_data(): | |
| """Load JSON data and create vector indices""" | |
| # Load workers | |
| try: | |
| with open("workers_data.json", "r") as f: | |
| workers_data = json.load(f) | |
| except FileNotFoundError: | |
| workers_data = [] | |
| print("⚠️ workers_data.json not found, using empty list") | |
| # Load gigs | |
| try: | |
| with open("gigs_data.json", "r") as f: | |
| gigs_data = json.load(f) | |
| except FileNotFoundError: | |
| gigs_data = [] | |
| print("⚠️ gigs_data.json not found, using empty list") | |
| # Create documents for workers | |
| worker_documents = [] | |
| for worker in workers_data: | |
| # Create rich text representation for better semantic search | |
| text = f""" | |
| Name: {worker['name']} | |
| Title: {worker['title']} | |
| Skills: {', '.join(worker['skills'])} | |
| Experience: {worker['experience']} | |
| Location: {worker['location']} | |
| Rate: {worker['hourly_rate']} | |
| Availability: {worker['availability']} | |
| Bio: {worker['bio']} | |
| """ | |
| doc = Document( | |
| text=text, | |
| metadata=worker | |
| ) | |
| worker_documents.append(doc) | |
| # Create documents for gigs | |
| gig_documents = [] | |
| for gig in gigs_data: | |
| text = f""" | |
| Title: {gig['title']} | |
| Company: {gig['company']} | |
| Required Skills: {', '.join(gig['required_skills'])} | |
| Experience Level: {gig['experience_level']} | |
| Location: {gig['location']} | |
| Budget: {gig['budget']} | |
| Duration: {gig['duration']} | |
| Description: {gig['description']} | |
| """ | |
| doc = Document( | |
| text=text, | |
| metadata=gig | |
| ) | |
| gig_documents.append(doc) | |
| # Create vector store and index for workers | |
| workers_vector_store = ChromaVectorStore(chroma_collection=workers_collection) | |
| workers_index = VectorStoreIndex.from_documents( | |
| worker_documents, | |
| vector_store=workers_vector_store | |
| ) | |
| # Create vector store and index for gigs | |
| gigs_vector_store = ChromaVectorStore(chroma_collection=gigs_collection) | |
| gigs_index = VectorStoreIndex.from_documents( | |
| gig_documents, | |
| vector_store=gigs_vector_store | |
| ) | |
| print(f"✅ Indexed {len(worker_documents)} workers and {len(gig_documents)} gigs") | |
| return workers_index, gigs_index, workers_data, gigs_data | |
| # Load and index data at startup | |
| print("🔄 Loading and indexing data...") | |
| workers_index, gigs_index, workers_db, gigs_db = load_and_index_data() | |
| print("✅ Data loaded and indexed!") | |
| # ============== MCP SERVER IMPLEMENTATION ============== | |
| mcp_server = Server("gig-market-mcp-rag") | |
| async def list_tools() -> List[Tool]: | |
| """List all available MCP tools with RAG capabilities""" | |
| return [ | |
| Tool( | |
| name="create_worker_profile", | |
| description="Transform user's unstructured text into a professional, structured gig worker profile using AI", | |
| inputSchema={ | |
| "type": "object", | |
| "properties": { | |
| "raw_text": { | |
| "type": "string", | |
| "description": "User's description of their skills, experience, and preferences" | |
| } | |
| }, | |
| "required": ["raw_text"] | |
| } | |
| ), | |
| Tool( | |
| name="create_gig_post", | |
| description="Transform user's unstructured text into a clear, structured gig job post using AI", | |
| inputSchema={ | |
| "type": "object", | |
| "properties": { | |
| "raw_text": { | |
| "type": "string", | |
| "description": "User's description of the job requirements and project details" | |
| } | |
| }, | |
| "required": ["raw_text"] | |
| } | |
| ), | |
| Tool( | |
| name="find_matching_gigs_rag", | |
| description="Find the best matching gig posts using SEMANTIC SEARCH with vector embeddings and RAG. Returns top matches based on skills, experience, and location similarity.", | |
| inputSchema={ | |
| "type": "object", | |
| "properties": { | |
| "worker_profile": { | |
| "type": "object", | |
| "description": "The structured worker profile to match" | |
| }, | |
| "top_n": { | |
| "type": "integer", | |
| "description": "Number of top matches to return", | |
| "default": 5 | |
| } | |
| }, | |
| "required": ["worker_profile"] | |
| } | |
| ), | |
| Tool( | |
| name="find_matching_workers_rag", | |
| description="Find the best matching workers using SEMANTIC SEARCH with vector embeddings and RAG. Returns top matches based on required skills, experience, and location similarity.", | |
| inputSchema={ | |
| "type": "object", | |
| "properties": { | |
| "gig_post": { | |
| "type": "object", | |
| "description": "The structured gig post to match" | |
| }, | |
| "top_n": { | |
| "type": "integer", | |
| "description": "Number of top matches to return", | |
| "default": 5 | |
| } | |
| }, | |
| "required": ["gig_post"] | |
| } | |
| ) | |
| ] | |
| async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: | |
| """Handle MCP tool calls with RAG-enhanced matching""" | |
| if name == "create_worker_profile": | |
| raw_text = arguments["raw_text"] | |
| message = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=1500, | |
| messages=[{ | |
| "role": "user", | |
| "content": f"""You are a professional career consultant. Transform this person's description into an attractive gig worker profile. | |
| USER INPUT: | |
| {raw_text} | |
| Create a professional profile with these fields. Return ONLY valid JSON (no markdown, no explanation): | |
| {{ | |
| "name": "full name", | |
| "title": "professional title/role", | |
| "skills": ["skill1", "skill2", "skill3", ...], | |
| "experience": "X years", | |
| "location": "city, country", | |
| "hourly_rate": "€X/hour or price range", | |
| "availability": "full-time/part-time/freelance/weekends/flexible", | |
| "bio": "compelling 1-2 sentence professional summary" | |
| }} | |
| Make it professional and appealing. If information is missing, infer reasonable values.""" | |
| }] | |
| ) | |
| response_text = message.content[0].text.strip() | |
| if response_text.startswith("```"): | |
| response_text = response_text.split("```")[1] | |
| if response_text.startswith("json"): | |
| response_text = response_text[4:] | |
| response_text = response_text.strip() | |
| profile_data = json.loads(response_text) | |
| return [TextContent(type="text", text=json.dumps(profile_data))] | |
| elif name == "create_gig_post": | |
| raw_text = arguments["raw_text"] | |
| message = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=1500, | |
| messages=[{ | |
| "role": "user", | |
| "content": f"""You are a hiring manager. Transform this job description into a clear gig post. | |
| USER INPUT: | |
| {raw_text} | |
| Create a professional gig post with these fields. Return ONLY valid JSON (no markdown, no explanation): | |
| {{ | |
| "title": "clear job title", | |
| "company": "company name or 'Private Client'", | |
| "required_skills": ["skill1", "skill2", "skill3", ...], | |
| "experience_level": "Junior/Mid-level/Senior (X years) or X+ years", | |
| "location": "location or Remote", | |
| "budget": "€X-Y or budget range", | |
| "duration": "time period", | |
| "description": "clear 1-2 sentence project description" | |
| }} | |
| Make it clear and professional. If information is missing, insert Unknown.""" | |
| }] | |
| ) | |
| response_text = message.content[0].text.strip() | |
| if response_text.startswith("```"): | |
| response_text = response_text.split("```")[1] | |
| if response_text.startswith("json"): | |
| response_text = response_text[4:] | |
| response_text = response_text.strip() | |
| gig_data = json.loads(response_text) | |
| return [TextContent(type="text", text=json.dumps(gig_data))] | |
| elif name == "find_matching_gigs_rag": | |
| worker_profile = arguments["worker_profile"] | |
| top_n = arguments.get("top_n", 5) | |
| # Create semantic search query from worker profile | |
| query = f""" | |
| Looking for gig opportunities for: | |
| Skills: {', '.join(worker_profile.get('skills', []))} | |
| Experience: {worker_profile.get('experience', '')} | |
| Location: {worker_profile.get('location', '')} | |
| Availability: {worker_profile.get('availability', '')} | |
| """ | |
| # Perform semantic search using LlamaIndex | |
| query_engine = gigs_index.as_query_engine(similarity_top_k=top_n) | |
| response = query_engine.query(query) | |
| # Extract matches from response | |
| matches = [] | |
| for node in response.source_nodes: | |
| gig = node.metadata | |
| score = int(node.score * 100) # Convert to 0-100 scale | |
| # Calculate skill overlap | |
| worker_skills = set(s.lower() for s in worker_profile.get('skills', [])) | |
| gig_skills = set(s.lower() for s in gig.get('required_skills', [])) | |
| matched_skills = list(worker_skills.intersection(gig_skills)) | |
| matches.append({ | |
| "gig": gig, | |
| "score": score, | |
| "matched_skills": matched_skills, | |
| "semantic_similarity": node.score | |
| }) | |
| return [TextContent(type="text", text=json.dumps(matches))] | |
| elif name == "find_matching_workers_rag": | |
| gig_post = arguments["gig_post"] | |
| top_n = arguments.get("top_n", 5) | |
| # Create semantic search query from gig post | |
| query = f""" | |
| Looking for workers for this gig: | |
| Required Skills: {', '.join(gig_post.get('required_skills', []))} | |
| Experience Level: {gig_post.get('experience_level', '')} | |
| Location: {gig_post.get('location', '')} | |
| Project: {gig_post.get('description', '')} | |
| """ | |
| # Perform semantic search using LlamaIndex | |
| query_engine = workers_index.as_query_engine(similarity_top_k=top_n) | |
| response = query_engine.query(query) | |
| # Extract matches from response | |
| matches = [] | |
| for node in response.source_nodes: | |
| worker = node.metadata | |
| score = int(node.score * 100) # Convert to 0-100 scale | |
| # Calculate skill overlap | |
| worker_skills = set(s.lower() for s in worker.get('skills', [])) | |
| gig_skills = set(s.lower() for s in gig_post.get('required_skills', [])) | |
| matched_skills = list(gig_skills.intersection(worker_skills)) | |
| matches.append({ | |
| "worker": worker, | |
| "score": score, | |
| "matched_skills": matched_skills, | |
| "semantic_similarity": node.score | |
| }) | |
| return [TextContent(type="text", text=json.dumps(matches))] | |
| return [TextContent(type="text", text=json.dumps({"error": "Tool not found"}))] | |
| # ============== AGENTIC WORKFLOW ============== | |
| def format_tools_for_claude(tools: List[Tool]) -> List[Dict]: | |
| """Convert MCP tools to Anthropic API format""" | |
| return [ | |
| { | |
| "name": tool.name, | |
| "description": tool.description, | |
| "input_schema": tool.inputSchema | |
| } | |
| for tool in tools | |
| ] | |
| async def worker_agent_workflow(user_description: str) -> tuple[str, str]: | |
| """Agent workflow: Create worker profile → Find matching gigs with RAG""" | |
| tools_list = await list_tools() | |
| tools_for_api = format_tools_for_claude(tools_list) | |
| conversation_history = [{ | |
| "role": "user", | |
| "content": f"""I need help with my gig worker profile and finding opportunities. | |
| Here's my background: | |
| {user_description} | |
| Please: | |
| 1. Create a professional profile for me | |
| 2. Find the top 5 matching gig opportunities using semantic search | |
| 3. Explain why each match is good, highlighting semantic similarity and matched skills | |
| Use the available tools to help me.""" | |
| }] | |
| system_prompt = """You are a career advisor with access to a RAG system. | |
| The find_matching_gigs_rag tool uses VECTOR EMBEDDINGS and SEMANTIC SEARCH to find the best matches. | |
| Explain that matches are found using advanced AI semantic matching, not just keyword matching. | |
| Be enthusiastic about the semantic similarity scores!""" | |
| profile_created = None | |
| for _ in range(5): | |
| response = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=4000, | |
| system=system_prompt, | |
| tools=tools_for_api, | |
| messages=conversation_history | |
| ) | |
| if response.stop_reason == "end_turn": | |
| final_text = "" | |
| for content in response.content: | |
| if content.type == "text": | |
| final_text += content.text | |
| return profile_created or "Profile created", final_text | |
| elif response.stop_reason == "tool_use": | |
| tool_results = [] | |
| for content in response.content: | |
| if content.type == "tool_use": | |
| result = await call_tool(content.name, content.input) | |
| result_text = result[0].text | |
| if content.name == "create_worker_profile": | |
| profile_created = result_text | |
| tool_results.append({ | |
| "type": "tool_result", | |
| "tool_use_id": content.id, | |
| "content": result_text | |
| }) | |
| conversation_history.append({"role": "assistant", "content": response.content}) | |
| conversation_history.append({"role": "user", "content": tool_results}) | |
| return profile_created or "{}", "Agent completed" | |
| async def employer_agent_workflow(job_description: str) -> tuple[str, str]: | |
| """Agent workflow: Create gig post → Find matching workers with RAG""" | |
| tools_list = await list_tools() | |
| tools_for_api = format_tools_for_claude(tools_list) | |
| conversation_history = [{ | |
| "role": "user", | |
| "content": f"""I need to create a gig post and find qualified workers. | |
| Here's what I'm looking for: | |
| {job_description} | |
| Please: | |
| 1. Create a clear gig post | |
| 2. Find the top 5 best matching workers using semantic search | |
| 3. Explain why each candidate is a good fit, highlighting semantic similarity | |
| Use the available tools to help me.""" | |
| }] | |
| system_prompt = """You are a hiring consultant with access to a RAG system. | |
| The find_matching_workers_rag tool uses VECTOR EMBEDDINGS and SEMANTIC SEARCH to find the best matches. | |
| Explain that matches are found using advanced AI semantic matching powered by HuggingFace embeddings. | |
| Be enthusiastic about the semantic similarity scores!""" | |
| gig_created = None | |
| for _ in range(5): | |
| response = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=4000, | |
| system=system_prompt, | |
| tools=tools_for_api, | |
| messages=conversation_history | |
| ) | |
| if response.stop_reason == "end_turn": | |
| final_text = "" | |
| for content in response.content: | |
| if content.type == "text": | |
| final_text += content.text | |
| return gig_created or "Gig post created", final_text | |
| elif response.stop_reason == "tool_use": | |
| tool_results = [] | |
| for content in response.content: | |
| if content.type == "tool_use": | |
| result = await call_tool(content.name, content.input) | |
| result_text = result[0].text | |
| if content.name == "create_gig_post": | |
| gig_created = result_text | |
| tool_results.append({ | |
| "type": "tool_result", | |
| "tool_use_id": content.id, | |
| "content": result_text | |
| }) | |
| conversation_history.append({"role": "assistant", "content": response.content}) | |
| conversation_history.append({"role": "user", "content": tool_results}) | |
| return gig_created or "{}", "Agent completed" | |
| # ============== GRADIO UI ============== | |
| def run_worker_flow(description: str) -> tuple[str, str]: | |
| """Worker flow with RAG""" | |
| try: | |
| profile_json, analysis = asyncio.run(worker_agent_workflow(description)) | |
| profile = json.loads(profile_json) | |
| profile_display = f"""## ✅ Your Professional Profile | |
| **{profile.get('name', 'N/A')}** | |
| *{profile.get('title', 'N/A')}* | |
| 📍 **Location:** {profile.get('location', 'N/A')} | |
| 💼 **Experience:** {profile.get('experience', 'N/A')} | |
| 💰 **Rate:** {profile.get('hourly_rate', 'N/A')} | |
| ⏰ **Availability:** {profile.get('availability', 'N/A')} | |
| **🎯 Skills:** | |
| {', '.join(profile.get('skills', []))} | |
| **📝 Bio:** | |
| {profile.get('bio', 'N/A')} | |
| """ | |
| return profile_display, analysis | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}", "" | |
| def run_employer_flow(description: str) -> tuple[str, str]: | |
| """Employer flow with RAG""" | |
| try: | |
| gig_json, analysis = asyncio.run(employer_agent_workflow(description)) | |
| gig = json.loads(gig_json) | |
| gig_display = f"""## ✅ Your Gig Post | |
| **{gig.get('title', 'N/A')}** | |
| *{gig.get('company', 'N/A')}* | |
| 📍 **Location:** {gig.get('location', 'N/A')} | |
| 👔 **Experience Level:** {gig.get('experience_level', 'N/A')} | |
| 💰 **Budget:** {gig.get('budget', 'N/A')} | |
| ⏱️ **Duration:** {gig.get('duration', 'N/A')} | |
| **🎯 Required Skills:** | |
| {', '.join(gig.get('required_skills', []))} | |
| **📝 Description:** | |
| {gig.get('description', 'N/A')} | |
| """ | |
| return gig_display, analysis | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}", "" | |
| # ============== GRADIO INTERFACE ============== | |
| with gr.Blocks(title="🤖 Jobly - Transforming Gig Market with AI") as app: | |
| # BANNER | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 20px;"> | |
| <img src="https://huggingface.co/spaces/MCP-1st-Birthday/Jobly/resolve/main/banner_jobly.png" | |
| style="width: 100%; max-width: 1200px; border-radius: 10px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);" | |
| alt="GigMatch AI Banner"/> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # BOARD 1: WORKER SEEKING GIGS | |
| with gr.Tab("👤 Find Gigs for Me", elem_id="worker-board"): | |
| gr.Markdown(""" | |
| ## 🎯 I'm a Gig Worker looking for opportunities | |
| Tell me about yourself, and our **AI + RAG system** will: | |
| 1. ✨ Create your professional profile | |
| 2. 🔍 Search through **50 gig posts** | |
| 3. 💡 Find the top 5 matches - **AI-powered feature** | |
| **Example:** "I'm Eddy Stone, an experienced handyman with 10 years doing plumbing, | |
| electrical work, and carpentry. Based in Rome, available weekdays and weekends, | |
| charge around €25/hour" | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| worker_input = gr.Textbox( | |
| label="📝 Tell me about yourself", | |
| placeholder="Tell me your full name, describe your skills, experience, location, rate, and what you're looking for...", | |
| lines=6 | |
| ) | |
| worker_btn = gr.Button("🚀 Create profile & find your dream gig", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| worker_profile_output = gr.Markdown(label="Your Profile") | |
| with gr.Column(): | |
| worker_matches_output = gr.Markdown(label="🔍 Semantic Search Results") | |
| worker_btn.click( | |
| fn=run_worker_flow, | |
| inputs=worker_input, | |
| outputs=[worker_profile_output, worker_matches_output] | |
| ) | |
| # BOARD 2: EMPLOYER SEEKING WORKERS | |
| with gr.Tab("💼 Find Workers for My Gig", elem_id="employer-board"): | |
| gr.Markdown(""" | |
| ## 🎯 I'm looking for help with a task | |
| Describe your needs, and our **AI Agent** will: | |
| 1. ✨ Create a clear gig post | |
| 2. 🔍 Search through **50 worker profiles** | |
| 3. 💡 Find the top 5 matches - **AI-powered feature** | |
| **Example:** "I need someone to move my apartment furniture and boxes | |
| to a new place about 10km away. It's a 2-bedroom apartment. Need someone | |
| with a van and experience with heavy lifting. Budget around €300, can do it | |
| this weekend in Barcelona" | |
| """) | |
| with gr.Column(scale=2): | |
| employer_input = gr.Textbox( | |
| label="📝 Describe your needs", | |
| placeholder="What skills do you need? Job details? Budget? Timeline?", | |
| lines=6 | |
| ) | |
| employer_btn = gr.Button("🚀 Create Post & Find Workers", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| employer_post_output = gr.Markdown(label="Your Gig Post") | |
| with gr.Column(): | |
| employer_matches_output = gr.Markdown(label="🔍 Semantic Search Results") | |
| employer_btn.click( | |
| fn=run_employer_flow, | |
| inputs=employer_input, | |
| outputs=[employer_post_output, employer_matches_output] | |
| ) | |
| gr.Markdown(f""" | |
| --- | |
| ### 🧠 Some amazing nerd facts | |
| **🦙 LlamaIndex RAG Pipeline: to keep it short** | |
| ``` | |
| Your Query → Vector Embedding → Semantic Search → Top K Results → AI Analysis | |
| ``` | |
| **🔧 MCP Tools:** | |
| 1. `create_worker_profile` - AI profile generation | |
| 2. `create_gig_post` - AI post generation | |
| 3. `find_matching_gigs_rag` - **Semantic search** with vector embeddings | |
| 4. `find_matching_workers_rag` - **Semantic search** with vector embeddings | |
| **📊 Database Stats:** | |
| - **Workers indexed:** {len(workers_db)} | |
| - **Gigs indexed:** {len(gigs_db)} | |
| - **Total potential matches:** {len(workers_db) * len(gigs_db)} | |
| - **Embedding model:** sentence-transformers/all-MiniLM-L6-v2 (HuggingFace 🤗) | |
| - **Vector DB:** ChromaDB | |
| **🎯 Matching Features:** | |
| - ✅ Semantic similarity (not just keyword matching!) | |
| - ✅ Vector embeddings for deep understanding | |
| - ✅ Skills matching | |
| - ✅ Location awareness | |
| - ✅ Experience level matching | |
| ### 🛠️ Tech Stack | |
| - **AI Agent:** Claude Sonnet 4 (Anthropic) | |
| - **RAG Framework:** LlamaIndex 🦙 | |
| - **Embeddings:** HuggingFace sentence-transformers 🤗 | |
| - **Vector Store:** ChromaDB | |
| - **Protocol:** Model Context Protocol (MCP) | |
| *Built for Hugging Face Winter Hackathon 2025 by Jobly Team (Valentina, Giacomo & Elisa <3) 🎉* | |
| """) | |
| if __name__ == "__main__": | |
| app.launch(share=True) | |