Jobly / generate_data.py
Valentina9502's picture
First commit
fdf5af0 verified
raw
history blame
11.3 kB
"""
Generate synthetic gig economy data for testing
Creates 50 worker profiles and 50 gig posts with realistic variety
"""
import json
import random
# Skills by category
HANDYMAN_SKILLS = ["Plumbing", "Electrical Work", "Carpentry", "Painting", "Drywall", "Tile Work", "Door Installation", "Window Repair"]
GARDENING_SKILLS = ["Lawn Mowing", "Tree Pruning", "Garden Design", "Plant Care", "Landscaping", "Hedge Trimming", "Weeding", "Irrigation"]
PHOTO_SKILLS = ["Event Photography", "Portrait Photography", "Photo Editing", "Lighting", "Drone Photography", "Wedding Photography", "Product Photography"]
PET_SKILLS = ["Dog Walking", "Pet Sitting", "Cat Care", "Basic Pet Training", "Pet First Aid", "Grooming", "Bird Care"]
MOVING_SKILLS = ["Furniture Moving", "Packing", "Heavy Lifting", "Van Transport", "Assembly", "Disassembly", "Storage"]
CLEANING_SKILLS = ["Deep Cleaning", "Regular Cleaning", "Organization", "Ironing", "Window Cleaning", "Carpet Cleaning", "Eco-friendly Products"]
FURNITURE_SKILLS = ["Custom Furniture", "Furniture Repair", "Wood Working", "Furniture Refinishing", "Upholstery", "Cabinet Making"]
ART_SKILLS = ["Mural Painting", "Portrait Art", "Interior Painting", "Canvas Art", "Custom Artwork", "Decorative Painting", "Restoration"]
TECH_SKILLS = ["Computer Repair", "TV Installation", "Smart Home Setup", "Network Setup", "Printer Repair", "Data Recovery"]
TUTORING_SKILLS = ["Math Tutoring", "Language Teaching", "Music Lessons", "Art Classes", "Homework Help", "Test Prep"]
# Cities in Europe
CITIES = [
"Rome, Italy", "Milan, Italy", "Florence, Italy", "Venice, Italy", "Naples, Italy",
"Paris, France", "Lyon, France", "Marseille, France", "Nice, France",
"Madrid, Spain", "Barcelona, Spain", "Valencia, Spain", "Seville, Spain",
"Berlin, Germany", "Munich, Germany", "Hamburg, Germany", "Frankfurt, Germany",
"Amsterdam, Netherlands", "Vienna, Austria", "Brussels, Belgium", "Lisbon, Portugal"
]
# First names by origin
FIRST_NAMES = [
"Marco", "Sofia", "Luca", "Giulia", "Alessandro", "Francesca", "Lorenzo", "Elena",
"Pierre", "Marie", "Jean", "Sophie", "Antoine", "Camille", "Lucas", "Emma",
"Carlos", "Maria", "Diego", "Ana", "Pablo", "Carmen", "Miguel", "Laura",
"Hans", "Anna", "Klaus", "Petra", "Lukas", "Julia", "Felix", "Nina",
"Ahmed", "Fatima", "Omar", "Leila", "Hassan", "Aisha", "Thomas", "Isabella"
]
LAST_NAMES = [
"Rossi", "Ferrari", "Russo", "Bianchi", "Romano", "Conti", "Ricci", "Marino",
"Dupont", "Martin", "Bernard", "Dubois", "Laurent", "Moreau", "Simon", "Michel",
"Garcia", "Rodriguez", "Martinez", "Sanchez", "Lopez", "Gonzalez", "Perez", "Torres",
"Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker",
"Hassan", "Ali", "Ibrahim", "Ahmed", "Khan", "Patel", "Chen", "Wang"
]
def generate_workers(n=50):
"""Generate n worker profiles"""
workers = []
categories = [
("Handyman & Home Repairs", HANDYMAN_SKILLS),
("Gardener & Landscaper", GARDENING_SKILLS),
("Photographer", PHOTO_SKILLS),
("Pet Care Specialist", PET_SKILLS),
("Moving & Delivery", MOVING_SKILLS),
("House Cleaner", CLEANING_SKILLS),
("Furniture Specialist", FURNITURE_SKILLS),
("Artist & Painter", ART_SKILLS),
("Tech Support", TECH_SKILLS),
("Tutor & Teacher", TUTORING_SKILLS),
]
for i in range(n):
category = random.choice(categories)
title, skill_pool = category
# Select 3-6 random skills from the category
num_skills = random.randint(3, 6)
skills = random.sample(skill_pool, min(num_skills, len(skill_pool)))
# Sometimes add skills from another category (versatile workers)
if random.random() > 0.7:
other_category = random.choice(categories)
skills.append(random.choice(other_category[1]))
experience_years = random.randint(2, 20)
hourly_rate = random.randint(15, 50)
worker = {
"id": f"w{i+1}",
"name": f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}",
"title": title,
"skills": skills,
"experience": f"{experience_years} years",
"location": random.choice(CITIES),
"hourly_rate": f"€{hourly_rate}/hour",
"availability": random.choice(["Full-time", "Part-time", "Weekends only", "Flexible", "Evenings & Weekends"]),
"bio": f"Experienced {title.lower()} with {experience_years} years in the field"
}
workers.append(worker)
return workers
def generate_gigs(n=50):
"""Generate n gig posts"""
gigs = []
gig_templates = [
# Handyman jobs
("Bathroom Plumbing Repair", "Private Homeowner", ["Plumbing", "Pipe Repair"], "3+ years", 100, 250, "Half day"),
("Kitchen Renovation Help", "Apartment Owner", ["Carpentry", "Tile Work", "Painting"], "5+ years", 400, 800, "3-5 days"),
("Electrical Outlet Installation", "Home Owner", ["Electrical Work", "Installation"], "4+ years", 80, 150, "2-3 hours"),
("Fence Repair & Painting", "Property Owner", ["Carpentry", "Painting"], "3+ years", 150, 300, "1 day"),
# Gardening jobs
("Weekly Lawn Maintenance", "Residential Property", ["Lawn Mowing", "Weeding"], "2+ years", 60, 100, "Ongoing"),
("Garden Redesign Project", "Villa Owner", ["Garden Design", "Landscaping", "Plant Care"], "5+ years", 500, 1000, "1-2 weeks"),
("Tree Removal & Stump Grinding", "Property Manager", ["Tree Pruning", "Heavy Equipment"], "6+ years", 300, 500, "1 day"),
("Spring Garden Cleanup", "Homeowner", ["Weeding", "Plant Care", "Cleanup"], "2+ years", 80, 150, "Half day"),
# Photography jobs
("Birthday Party Photography", "Private Family", ["Event Photography", "Photo Editing"], "3+ years", 200, 350, "3-4 hours"),
("Real Estate Property Photos", "Real Estate Agent", ["Product Photography", "Photo Editing"], "3+ years", 150, 300, "Half day"),
("Family Portrait Session", "Family", ["Portrait Photography", "Lighting"], "4+ years", 180, 300, "2 hours"),
("Corporate Event Coverage", "Company", ["Event Photography", "Lighting"], "5+ years", 400, 700, "Full day"),
# Pet care jobs
("Weekend Dog Sitting", "Pet Owner", ["Pet Sitting", "Dog Walking"], "2+ years", 80, 150, "2 days"),
("Daily Cat Feeding - 1 Week", "Traveling Owner", ["Cat Care", "Pet Sitting"], "1+ years", 100, 150, "1 week"),
("Puppy Training Sessions", "New Dog Owner", ["Basic Pet Training", "Dog Walking"], "4+ years", 200, 350, "4 sessions"),
("Multiple Pet Care", "Pet Owner", ["Dog Walking", "Cat Care", "Pet Sitting"], "3+ years", 120, 200, "10 days"),
# Moving jobs
("Studio Apartment Move", "Student", ["Furniture Moving", "Packing"], "2+ years", 150, 250, "Half day"),
("Piano Moving Service", "Homeowner", ["Heavy Lifting", "Special Equipment"], "5+ years", 200, 400, "2-3 hours"),
("Office Furniture Relocation", "Small Business", ["Furniture Moving", "Assembly"], "4+ years", 300, 500, "1 day"),
("Storage Unit to Apartment", "Individual", ["Moving", "Heavy Lifting"], "2+ years", 180, 300, "Half day"),
# Cleaning jobs
("Post-Party Cleaning", "Event Host", ["Deep Cleaning", "Organization"], "2+ years", 80, 150, "3-4 hours"),
("Move-Out Deep Clean", "Apartment Tenant", ["Deep Cleaning", "Window Cleaning"], "3+ years", 150, 250, "Full day"),
("Weekly House Cleaning", "Busy Family", ["Regular Cleaning", "Organization"], "2+ years", 70, 120, "Ongoing"),
("Commercial Office Cleaning", "Office Manager", ["Regular Cleaning", "Eco-friendly"], "3+ years", 200, 350, "Evening shift"),
# Furniture jobs
("Custom Dining Table", "Homeowner", ["Custom Furniture", "Wood Working"], "6+ years", 600, 1200, "2 weeks"),
("Antique Chair Restoration", "Collector", ["Furniture Repair", "Upholstery"], "8+ years", 250, 500, "1 week"),
("Built-in Closet System", "Apartment Owner", ["Custom Furniture", "Cabinet Making"], "5+ years", 800, 1500, "1 week"),
("Furniture Refinishing", "Homeowner", ["Furniture Refinishing", "Wood Working"], "4+ years", 200, 400, "3-5 days"),
# Art jobs
("Living Room Feature Wall", "Homeowner", ["Mural Painting", "Interior Painting"], "4+ years", 400, 700, "2-3 days"),
("Restaurant Interior Mural", "Restaurant Owner", ["Mural Painting", "Custom Artwork"], "6+ years", 1000, 2000, "1-2 weeks"),
("Portrait Commission", "Private Client", ["Portrait Art", "Canvas Art"], "5+ years", 300, 600, "2 weeks"),
("Kid's Playroom Decoration", "Parents", ["Mural Painting", "Decorative Painting"], "3+ years", 250, 450, "2 days"),
# Tech jobs
("Home Network Setup", "Homeowner", ["Network Setup", "Smart Home Setup"], "3+ years", 100, 200, "2-3 hours"),
("Computer Virus Removal", "Individual", ["Computer Repair", "Data Recovery"], "4+ years", 60, 120, "1-2 hours"),
("TV Wall Mounting & Setup", "Apartment Owner", ["TV Installation", "Cable Management"], "2+ years", 80, 150, "2 hours"),
("Smart Home Integration", "Tech Enthusiast", ["Smart Home Setup", "Network Setup"], "5+ years", 200, 400, "Half day"),
# Tutoring jobs
("High School Math Tutoring", "Student Parent", ["Math Tutoring", "Homework Help"], "3+ years", 150, 300, "4 weeks"),
("Piano Lessons for Beginner", "Adult Learner", ["Music Lessons"], "4+ years", 200, 350, "8 sessions"),
("Italian Language Teaching", "Expat", ["Language Teaching"], "3+ years", 180, 300, "6 weeks"),
("SAT Test Preparation", "High School Senior", ["Test Prep", "Math Tutoring"], "5+ years", 300, 500, "6 weeks"),
]
for i in range(n):
template = random.choice(gig_templates)
title, company, skills, exp, min_budget, max_budget, duration = template
# Add some variation to titles
variations = ["", " Needed", " Required", " - Urgent", " - Flexible Schedule"]
title_variation = title + random.choice(variations)
gig = {
"id": f"j{i+1}",
"title": title_variation,
"company": company,
"required_skills": skills,
"experience_level": exp,
"location": random.choice(CITIES),
"budget": f"€{min_budget}-{max_budget}",
"duration": duration,
"description": f"{title} - {', '.join(skills)} expertise needed"
}
gigs.append(gig)
return gigs
if __name__ == "__main__":
# Generate data
workers = generate_workers(50)
gigs = generate_gigs(50)
# Save to JSON files
with open("workers_data.json", "w") as f:
json.dump(workers, f, indent=2)
with open("gigs_data.json", "w") as f:
json.dump(gigs, f, indent=2)
print(f"✅ Generated {len(workers)} workers and {len(gigs)} gigs")
print(f"📁 Saved to workers_data.json and gigs_data.json")