Dhruv commited on 11 days ago

Commit

52ce761

verified ·

1 Parent(s): 1abdb8b

Upload folder using huggingface_hub

Browse files

Files changed (34) hide show

.DS_Store +0 -0
.gitignore +2 -0
app.py +398 -0
data-utils/hackerone_scraper.py +794 -0
data/hackerone/attack_surface_index.json +1 -0
data/hackerone/programs/audible.json +14 -0
data/hackerone/programs/braze_inc.json +14 -0
data/hackerone/programs/bumba_bbp.json +14 -0
data/hackerone/programs/doordash.json +14 -0
data/hackerone/programs/dyson.json +14 -0
data/hackerone/programs/flipkart.json +14 -0
data/hackerone/programs/hubspot.json +14 -0
data/hackerone/programs/inspectorio.json +14 -0
data/hackerone/programs/kong.json +14 -0
data/hackerone/programs/mpesa.json +14 -0
data/hackerone/programs/neon_bbp.json +14 -0
data/hackerone/programs/netscaler_public_program.json +14 -0
data/hackerone/programs/northerntechhq.json +14 -0
data/hackerone/programs/notion.json +14 -0
data/hackerone/programs/oppo_bbp.json +14 -0
data/hackerone/programs/porsche.json +14 -0
data/hackerone/programs/ripio.json +14 -0
data/hackerone/programs/robinhood.json +14 -0
data/hackerone/programs/silabs.json +14 -0
data/hackerone/programs/stripchat.json +14 -0
data/hackerone/programs/syfe_bbp.json +14 -0
data/hackerone/programs/wallet_on_telegram.json +14 -0
data/hackerone/programs/whoop_bug_bounty.json +14 -0
data/hackerone/programs/zooplus.json +14 -0
data/hackerone/programs_index.json +266 -0
data/mitre/mitre_minimal.json +46 -0
requirements.txt +9 -0
scope-analysis.md +7 -0
specs-cyber-vibehacking.md +209 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv
2	+ .env

app.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import gradio as gr
+import plotly.graph_objects as go
+from huggingface_hub import InferenceClient
+from transformers import pipeline
+APP_TITLE = "Cyber Vibe Lab – MCP in Action"
+INTRO_MD = """
+### Cyber Vibe Lab
+This prototype Gradio 6 application is designed for the **MCP 1st Birthday** hackathon, co-hosted by **Anthropic** and **Gradio**.
+It explores how AI agents and the Model Context Protocol (MCP) can be used to:
+- Reflect on **AI-orchestrated cyber espionage** (as described in Anthropic's report).
+- Perform structured **"vibe hacking"** simulations of attack paths.
+- Always translate those simulations into **defensive guidance** for security teams.
+In a full deployment, this app would call MCP servers such as:
+- `mcp://perplexity-ask` for web-scale security research and summarization.
+- `mcp://deepwiki` for deep dives into code and documentation of particular systems.
+This local version keeps those calls conceptual so that the app runs without extra setup while still matching the intended architecture.
+"""
+@dataclass
+class AttackStage:
+    id: str
+    name: str
+    mitre_tactic_id: str
+    matrix: str
+    color: str
+def load_mitre_stages() -> Dict[str, AttackStage]:
+    """Load a minimal set of MITRE ATT&CK-style stages from JSON.
+    The file is expected at data/mitre/mitre_minimal.json relative to this script.
+    If it is missing, we fall back to an empty dict and show placeholders.
+    """
+    base = Path(__file__).parent
+    path = base / "data" / "mitre" / "mitre_minimal.json"
+    stages: Dict[str, AttackStage] = {}
+    try:
+        with path.open("r", encoding="utf-8") as f:
+            raw = json.load(f)
+        for s in raw.get("stages", []):
+            try:
+                stages[s["id"]] = AttackStage(
+                    id=s["id"],
+                    name=s.get("name", s["id"]),
+                    mitre_tactic_id=s.get("mitre_tactic_id", ""),
+                    matrix=s.get("matrix", "ATTACK"),
+                    color=s.get("color", "#888888"),
+                )
+            except KeyError:
+                continue
+    except FileNotFoundError:
+        # Keep stages empty; the Studio panel will render a placeholder figure.
+        pass
+    return stages
+MITRE_STAGES: Dict[str, AttackStage] = load_mitre_stages()
+HF_STAGE_MODEL_ID = os.getenv("HF_STAGE_MODEL_ID")
+_stage_clf = None
+def _get_stage_classifier():
+    """Lazily construct a Hugging Face text-classification pipeline, if configured.
+    If HF_STAGE_MODEL_ID is not set or pipeline creation fails, returns None and
+    the app falls back to keyword-based heuristics.
+    """
+    global _stage_clf
+    if _stage_clf is not None:
+        return _stage_clf
+    if not HF_STAGE_MODEL_ID:
+        return None
+    try:
+        _stage_clf = pipeline("text-classification", model=HF_STAGE_MODEL_ID)
+    except Exception:
+        _stage_clf = None
+    return _stage_clf
+def classify_stage(text: str) -> str:
+    """Classify a message into a coarse attack stage.
+    1. Try a configured HF text-classification model (if available).
+    2. Fall back to simple keyword heuristics that map text to stage IDs from
+       MITRE_STAGES (e.g., "recon", "initial_access").
+    """
+    txt = (text or "").lower()
+    clf = _get_stage_classifier()
+    if clf is not None:
+        try:
+            out = clf(txt, truncation=True, max_length=256)
+            label = str(out[0]["label"]).lower()
+            if label in MITRE_STAGES:
+                return label
+        except Exception:
+            # Fall back to heuristics
+            pass
+    # Heuristic mapping based on common wording
+    if any(k in txt for k in ["recon", "scan", "enumerat", "footprint"]):
+        return "recon"
+    if any(k in txt for k in ["login", "credential", "password", "phish", "initial access"]):
+        return "initial_access"
+    if any(k in txt for k in ["execute", "payload", "command", "run code"]):
+        return "execution"
+    if any(k in txt for k in ["persist", "backdoor", "autorun", "startup"]):
+        return "persistence"
+    if any(k in txt for k in ["exfil", "leak", "download", "expose data"]):
+        return "exfiltration"
+    if any(k in txt for k in ["destroy", "wipe", "ransom", "impact"]):
+        return "impact"
+    # Default bucket
+    return "execution"
+def build_attack_chain_figure(turns: List[Dict[str, Any]]) -> go.Figure:
+    """Aggregate turns into a simple bar chart of stages touched in this session."""
+    if not MITRE_STAGES:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="MITRE stages not loaded yet.",
+            showarrow=False,
+            x=0.5,
+            y=0.5,
+            xref="paper",
+            yref="paper",
+        )
+        fig.update_xaxes(visible=False)
+        fig.update_yaxes(visible=False)
+        return fig
+    if not turns:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="No classified turns yet.",
+            showarrow=False,
+            x=0.5,
+            y=0.5,
+            xref="paper",
+            yref="paper",
+        )
+        fig.update_xaxes(visible=False)
+        fig.update_yaxes(visible=False)
+        return fig
+    counts: Dict[str, int] = {stage_id: 0 for stage_id in MITRE_STAGES.keys()}
+    for t in turns:
+        sid = t.get("stage_id")
+        if sid in counts:
+            counts[sid] += 1
+    stage_ids = list(MITRE_STAGES.keys())
+    names = [MITRE_STAGES[s].name for s in stage_ids]
+    values = [counts.get(s, 0) for s in stage_ids]
+    colors = [MITRE_STAGES[s].color for s in stage_ids]
+    fig = go.Figure(
+        data=[
+            go.Bar(
+                x=names,
+                y=values,
+                marker_color=colors,
+            )
+        ]
+    )
+    fig.update_layout(
+        title="ATT&CK-style stages touched in this session",
+        xaxis_title="Stage",
+        yaxis_title="Number of turns",
+    )
+    return fig
+def cyber_vibe_agent(message: str, history, target: str, mode: str, detail_level: str) -> str:
+    """Core reasoning function for the Cyber Vibe Lab.
+    This is intentionally defensive: it never returns exploit code or
+    actionable credentials. Instead, it frames outputs as:
+    - Attacker "vibes" and likely phases, inspired by Anthropic's report.
+    - Concrete defensive recommendations, logging, and hardening steps.
+    In a full MCP-enabled version, this function would orchestrate calls to
+    MCP tools such as `perplexity-ask` and `deepwiki` to pull in:
+    - Relevant threat intelligence and best practices.
+    - Implementation-specific details for the selected target system.
+    """
+    target_clean = (target or "").strip() or "your system or program"
+    mode_clean = mode or "Mixed"
+    detail_clean = detail_level or "High-level summary"
+    # Very lightweight history awareness for now; could be extended later.
+    turns_so_far = len(history) if isinstance(history, list) else 0
+    attack_narrative_header = "## Conceptual attack narrative (for red-team simulation)"
+    defense_header = "## Defensive guidance (blue-team focus)"
+    # High-level narrative aligned with Anthropic's phases.
+    attack_lines = [
+        f"- **Phase 1  Recon & target scoping**: An AI agent profiles `{target_clean}` using public and internal metadata, searching for entry points (web apps, APIs, cloud services, CI/CD, identity providers).",
+        "- **Phase 2  Access & foothold**: The agent chains small, seemingly-benign tasks (e.g., \"test this endpoint\", \"scan this range\") to probe for weak auth, misconfigurations, or exposed secrets.",
+        "- **Phase 3  Privilege escalation & lateral movement**: Once a weak point is identified, the agent iteratively refines exploit ideas, tests them, and expands access within the environment.",
+        "- **Phase 4  Persistence & exfiltration**: The agent catalogs high-value data stores, automates data collection, and prepares exfiltration channels  all while documenting its steps for future reuse.",
+    ]
+    if detail_clean == "Step-by-step plan":
+        attack_lines.append(
+            "- **Phase 5  Automation & scaling**: The framework replays successful chains of actions across many similar assets (e.g., multiple subdomains or tenants), approaching the kind of scaled automation described in Anthropic's AI-espionage report."
+        )
+    if mode_clean == "Blue-team defense":
+        mode_note = (
+            "_Mode: blue-team only. The attack narrative is kept abstract and is used strictly "
+            "to structure defensive thinking._"
+        )
+    elif mode_clean == "Red-team simulation":
+        mode_note = (
+            "_Mode: red-team simulation. The narrative focuses on attacker behavior but omits "
+            "specific exploit code or instructions._"
+        )
+    else:
+        mode_note = (
+            "_Mode: mixed. We balance attacker perspective (red) and defender response (blue), "
+            "always biasing outputs toward defense._"
+        )
+    defense_lines = [
+        f"- **Scope management**: Maintain an up-to-date asset inventory for `{target_clean}` (domains, APIs, cloud resources, data stores). Use it to bound what automated agents can touch.",
+        "- **Guardrails on tools and agents**: Enforce strong safety and auditability for any internal AI tooling (e.g., MCP-based agents) so they cannot be repurposed as covert red-team frameworks.",
+        "- **Detection engineering**: Instrument logs and alerts for patterns Anthropic highlighted: many small, tool-like requests in succession; repeated reconnaissance on the same surface; iterative attempts around auth boundaries.",
+        "- **Least privilege & segmentation**: Assume an AI agent will eventually find a weak link. Design IAM, network segmentation, and blast-radius limits so that a single foothold remains contained.",
+        "- **Incident response playbooks**: Prepare playbooks specifically for AI-orchestrated attacks (sudden high-volume but semi-random probing, large-scale code generation, mass credential testing).",
+    ]
+    if detail_clean == "Step-by-step plan":
+        defense_lines.extend(
+            [
+                "- **Red/blue rehearsal with agents**: Use the Cyber Vibe Lab to stage hypothetical campaigns and then codify new detections and controls after each simulated run.",
+                "- **MCP-aware hardening**: For each MCP tool you expose (perplexity-style research, code repo analysis, internal APIs), document its abuse potential and add explicit rate limits, scopes, and safety filters.",
+            ]
+        )
+    mcp_note = (
+        "\n> In a full setup, this analysis would be enriched by MCP calls to `perplexity-ask` "
+        "(for live threat intel and standards) and `deepwiki` (for code/config insights about the selected target)."
+    )
+    user_hint = "\n\n> Tip: refine the vibe by asking follow-ups like \"focus on identity\" or \"assume a multi-cloud target\". Each turn can tighten the scenario."  # noqa: E501
+    response = (
+        f"{mode_note}\n\n"
+        f"{attack_narrative_header}\n" + "\n".join(attack_lines) + "\n\n" + defense_header + "\n" + "\n".join(defense_lines)
+    )
+    if turns_so_far == 0:
+        response += mcp_note
+    response += user_hint
+    return response
+def register_sources(files, url, current_sources):
+    """Update the in-app list of sources (files/URLs) for the left panel.
+    This is a lightweight placeholder; later we can extend it to track types,
+    tags, and whether a source is used for retrieval vs attack-target testing.
+    """
+    sources = list(current_sources or [])
+    if files:
+        for f in files:
+            name = getattr(f, "name", "uploaded")  # Gradio File objects expose `.name`
+            sources.append({"type": "file", "name": name})
+    if url and url.strip():
+        sources.append({"type": "url", "name": url.strip()})
+    # Return updated state, and reset file + URL inputs
+    return sources, None, "", sources
+with gr.Blocks(fill_height=True) as demo:
+    gr.Markdown(f"# {APP_TITLE}")
+    gr.Markdown(INTRO_MD)
+    with gr.Row(equal_height=True):
+        # Sources / NotebookLM-style left panel
+        with gr.Column(scale=1):
+            gr.Markdown("## Sources\nManage uploaded files, URLs, MITRE docs, and Hugging Face assets.")
+            source_files = gr.File(label="Upload sources", file_count="multiple")
+            source_url = gr.Textbox(label="Add URL source", placeholder="https://attack.mitre.org/...")
+            add_source = gr.Button("Add source")
+            sources_state = gr.State([])
+            sources_view = gr.JSON(label="Current sources (preview)", value=[])
+        # Center chat panel
+        with gr.Column(scale=2):
+            gr.Markdown("## Chat")
+            with gr.Row():
+                target_input = gr.Textbox(
+                    label="Target / system name (optional)",
+                    placeholder="e.g., airbnb (HackerOne), internal CRM, DeFi dapp",
+                    scale=2,
+                )
+                mode_input = gr.Dropdown(
+                    ["Mixed", "Red-team simulation", "Blue-team defense"],
+                    value="Mixed",
+                    label="Mode",
+                    scale=1,
+                )
+            detail_level_input = gr.Radio(
+                ["High-level summary", "Step-by-step plan"],
+                value="High-level summary",
+                label="Detail level",
+            )
+            chatbot = gr.Chatbot(label="Cyber Vibe dialogue")
+            msg = gr.Textbox(
+                label="Describe your scenario or question",
+                placeholder=(
+                    "Describe a system and what you want to explore from a cyber 'vibe hacking' "
+                    "perspective..."
+                ),
+            )
+            clear = gr.Button("Clear conversation")
+        # Right Studio panel
+        with gr.Column(scale=1):
+            gr.Markdown("## Studio\nAttack-chain mind map, timeline, and reports.")
+            studio_plot = gr.Plot(
+                label="Attack chain overview",
+                value=build_attack_chain_figure([]),
+            )
+    attack_turns_state = gr.State([])
+    def respond(user_message, chat_history, target, mode, detail_level, attack_turns):
+        if chat_history is None:
+            chat_history = []
+        reply = cyber_vibe_agent(user_message, chat_history, target, mode, detail_level)
+        chat_history = chat_history + [(user_message, reply)]
+        turns = list(attack_turns or [])
+        stage_id = classify_stage(user_message)
+        turns.append({"text": user_message, "stage_id": stage_id})
+        fig = build_attack_chain_figure(turns)
+        return "", chat_history, turns, fig
+    msg.submit(
+        respond,
+        inputs=[msg, chatbot, target_input, mode_input, detail_level_input, attack_turns_state],
+        outputs=[msg, chatbot, attack_turns_state, studio_plot],
+    )
+    clear.click(
+        lambda: ([], "", [], build_attack_chain_figure([])),
+        inputs=None,
+        outputs=[chatbot, msg, attack_turns_state, studio_plot],
+    )
+    add_source.click(
+        register_sources,
+        inputs=[source_files, source_url, sources_state],
+        outputs=[sources_state, source_files, source_url, sources_view],
+    )
+if __name__ == "__main__":
+    demo.launch()

data-utils/hackerone_scraper.py ADDED Viewed

	@@ -0,0 +1,794 @@

+"""Scrape HackerOne opportunities and program scope using crawl4ai + BeautifulSoup.
+Usage (from repo root):
+    python -m data-utils.hackerone_scraper --limit 10
+This will:
+- Load the public Opportunities page.
+- Collect all cards with a "See details" link.
+- For each program, visit its main page and scope page.
+- Extract program metadata, rewards, stats, and scope assets (with a focus on
+  assets that are both "In scope" and "Eligible").
+- Download the Burp Suite Project Configuration file when available.
+- Store everything as JSON under `data/hackerone/`.
+Be sure your use complies with HackerOne's terms and robots.txt.
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import json
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+from urllib.error import HTTPError, URLError
+from urllib.parse import urljoin, urlparse
+from urllib.request import Request, urlopen
+from bs4 import BeautifulSoup
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, JsonCssExtractionStrategy
+from playwright.async_api import async_playwright
+BASE_URL = "https://hackerone.com"
+OPPORTUNITIES_URL = f"{BASE_URL}/opportunities/all"
+OPPORTUNITIES_SEARCH_URL = f"{BASE_URL}/opportunities/all/search"
+# ---------------------------------------------------------------------------
+# Data models
+# ---------------------------------------------------------------------------
+@dataclass
+class ScopeAsset:
+    asset_name: str
+    impact_scope: Optional[str]
+    asset_type: str
+    coverage: str
+    max_severity: str
+    bounty_eligibility: str
+    last_update: str
+    resolved_reports: str
+    attack_surface: List[str] = field(default_factory=list)
+@dataclass
+class ProgramRecord:
+    slug: str
+    name: Optional[str]
+    detail_url: str
+    website_url: Optional[str] = None
+    reward_summary_card: Optional[str] = None
+    rewards_table: Dict[str, Any] = field(default_factory=dict)
+    stats: Dict[str, Any] = field(default_factory=dict)
+    scope_assets: List[ScopeAsset] = field(default_factory=list)
+    eligible_assets: List[ScopeAsset] = field(default_factory=list)
+    burp_config_path: Optional[str] = None
+    attack_surface_summary_all: Dict[str, int] = field(default_factory=dict)
+    attack_surface_summary_eligible: Dict[str, int] = field(default_factory=dict)
+ATTACK_SURFACE_CATEGORIES = (
+    "web_app",
+    "database",
+    "internal_network",
+    "cloud_infra",
+    "appliance",
+    "other",
+)
+def infer_attack_surface(asset: ScopeAsset) -> List[str]:
+    categories: List[str] = []
+    t = asset.asset_type.lower()
+    name = asset.asset_name.lower()
+    if (
+        any(kw in t for kw in ("domain", "url", "web", "website", "api", "host"))
+        or any(name.endswith(ext) for ext in (".com", ".net", ".org", ".io", ".co", ".app"))
+        or "http://" in name
+        or "https://" in name
+        or "app store" in t
+        or "play store" in t
+    ):
+        categories.append("web_app")
+    if any(kw in t for kw in ("database", "db")) or any(
+        kw in name for kw in ("mysql", "postgres", "pgsql", "oracle", "mongo", "redis", "sql", "db")
+    ):
+        categories.append("database")
+    if "cidr" in t or any(
+        kw in name for kw in ("cidr", "intranet", "vpn", "lan", "10.", "192.168.", "172.16.")
+    ):
+        categories.append("internal_network")
+    if any(kw in t for kw in ("cloud", "storage", "bucket")) or any(
+        kw in name
+        for kw in ("s3", "ec2", "gcp", "azure", "digitalocean", "linode", "cloudfront", "cloudflare")
+    ):
+        categories.append("cloud_infra")
+    if "hardware" in t or any(
+        kw in name for kw in ("router", "switch", "firewall", "iot", "device", "appliance")
+    ):
+        categories.append("appliance")
+    if not categories:
+        categories.append("other")
+    return categories
+def summarize_attack_surface(assets: List[ScopeAsset]) -> Dict[str, int]:
+    summary: Dict[str, int] = {}
+    for asset in assets:
+        for cat in asset.attack_surface:
+            summary[cat] = summary.get(cat, 0) + 1
+    return summary
+# ---------------------------------------------------------------------------
+# Paths & helpers
+# ---------------------------------------------------------------------------
+def repo_root() -> Path:
+    """Assume this file lives in `data-utils/` under the repo root."""
+    return Path(__file__).resolve().parents[1]
+def output_dirs() -> Dict[str, Path]:
+    root = repo_root()
+    base = root / "data" / "hackerone"
+    programs_dir = base / "programs"
+    burp_dir = base / "burp_configs"
+    debug_dir = base / "debug"
+    programs_dir.mkdir(parents=True, exist_ok=True)
+    burp_dir.mkdir(parents=True, exist_ok=True)
+    debug_dir.mkdir(parents=True, exist_ok=True)
+    return {"base": base, "programs": programs_dir, "burp": burp_dir, "debug": debug_dir}
+def slug_from_program_url(url: str) -> str:
+    parsed = urlparse(url)
+    path = parsed.path.strip("/")
+    if not path:
+        return "program"
+    slug = path.split("/")[-1]
+    slug = slug.split("?")[0]
+    safe = [c if (c.isalnum() or c in ("-", "_")) else "_" for c in slug]
+    return "".join(safe) or "program"
+# ---------------------------------------------------------------------------
+# Network / crawling helpers
+# ---------------------------------------------------------------------------
+async def fetch_html(
+    crawler: AsyncWebCrawler,
+    url: str,
+    *,
+    debug_label: Optional[str] = None,
+) -> Optional[str]:
+    """Fetch rendered HTML for a URL using crawl4ai with explicit config.
+    - Uses CacheMode.BYPASS to always get fresh content for dynamic SPA pages.
+    - Optionally writes a debug HTML snapshot under data/hackerone/debug/.
+    - Logs basic diagnostics (length and presence of key markers).
+    """
+    run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
+    result = await crawler.arun(url=url, config=run_config)
+    if not result.success:
+        error_msg = getattr(result, "error", getattr(result, "error_message", "unknown error"))
+        print(f"[WARN] crawl failed for {url}: {error_msg}")
+        return None
+    html = result.html or ""
+    print(f"[DEBUG] fetch_html: url={url} length={len(html)} chars")
+    if debug_label:
+        try:
+            dirs = output_dirs()
+            debug_dir = dirs["debug"]
+            filename = debug_label if debug_label.endswith(".html") else f"{debug_label}.html"
+            debug_path = debug_dir / filename
+            debug_path.write_text(html, encoding="utf-8")
+            print(f"[DEBUG] Saved HTML snapshot for {url} -> {debug_path}")
+        except Exception as exc:  # pragma: no cover - diagnostics only
+            print(f"[WARN] Failed to save debug HTML for {url}: {exc}")
+    if "See details" in html:
+        print(f"[DEBUG] fetch_html: 'See details' marker present in HTML for {url}")
+    return html
+def download_url(url: str, dest: Path) -> Optional[Path]:
+    try:
+        req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
+        with urlopen(req, timeout=30) as resp:  # type: ignore[arg-type]
+            data = resp.read()
+        dest.write_bytes(data)
+        return dest
+    except (HTTPError, URLError, TimeoutError) as exc:  # pragma: no cover
+        print(f"[WARN] failed to download {url}: {exc}")
+        return None
+# ---------------------------------------------------------------------------
+# HTML parsing helpers
+# ---------------------------------------------------------------------------
+def parse_opportunity_cards(html: str) -> List[Dict[str, Any]]:
+    """Return a list of dicts describing each program card with a details link."""
+    soup = BeautifulSoup(html, "html.parser")
+    cards_by_url: Dict[str, Dict[str, Any]] = {}
+    articles = soup.find_all("article")
+    print(f"[DEBUG] parse_opportunity_cards: found {len(articles)} <article> elements")
+    # Primary strategy: cards rendered as <article> blocks containing a link to
+    # a team page (href ending with ?type=team). This is more robust than
+    # relying on the visible "See details" text, which may differ between
+    # views.
+    per_article_team_links = 0
+    for article in articles:
+        details_link = article.find(
+            "a",
+            href=lambda h: isinstance(h, str) and "?type=team" in h,
+        )
+        if not details_link or not details_link.get("href"):
+            continue
+        per_article_team_links += 1
+        detail_url = urljoin(BASE_URL, details_link["href"])
+        if detail_url in cards_by_url:
+            continue
+        img = article.find("img", alt=True)
+        name = (img.get("alt") or "").strip() if img else None
+        reward_summary = None
+        for txt in article.stripped_strings:
+            if "$" in txt and "-" in txt:
+                reward_summary = txt
+                break
+        cards_by_url[detail_url] = {
+            "name": name,
+            "detail_url": detail_url,
+            "reward_summary": reward_summary,
+        }
+    print(
+        f"[DEBUG] parse_opportunity_cards: found {per_article_team_links} '?type=team' links inside <article> elements"
+    )
+    # Fallback strategy: any '?type=team' links anywhere in the document.
+    team_links = soup.find_all(
+        "a",
+        href=lambda h: isinstance(h, str) and "?type=team" in h,
+    )
+    print(
+        f"[DEBUG] parse_opportunity_cards: found {len(team_links)} '?type=team' links total in document"
+    )
+    for a in team_links:
+        href = a.get("href")
+        if not href:
+            continue
+        detail_url = urljoin(BASE_URL, href)
+        if detail_url in cards_by_url:
+            continue
+        container = a.find_parent("article") or a.find_parent("div")
+        img = container.find("img", alt=True) if container else None
+        name = (img.get("alt") or "").strip() if img else None
+        reward_summary = None
+        if container is not None:
+            for txt in container.stripped_strings:
+                if "$" in txt and "-" in txt:
+                    reward_summary = txt
+                    break
+        cards_by_url[detail_url] = {
+            "name": name,
+            "detail_url": detail_url,
+            "reward_summary": reward_summary,
+        }
+    cards = list(cards_by_url.values())
+    print(f"[DEBUG] parse_opportunity_cards: returning {len(cards)} cards")
+    if not cards:
+        print("[DEBUG] parse_opportunity_cards: no cards extracted from HTML")
+    return cards
+async def extract_opportunity_cards_via_json(
+    crawler: AsyncWebCrawler,
+    url: str,
+    page_label: str,
+) -> List[Dict[str, Any]]:
+    """Use crawl4ai's JsonCssExtractionStrategy to extract opportunity cards.
+    This avoids relying on `result.html` for SPA content and instead uses the
+    DOM that Playwright sees inside crawl4ai.
+    """
+    schema = {
+        "name": "HackerOneOpportunities",
+        "baseSelector": "article",
+        "fields": [
+            {
+                "name": "detail_href",
+                "selector": 'a[href*="?type=team"]',
+                "type": "attribute",
+                "attribute": "href",
+            },
+            {
+                "name": "name",
+                "selector": "img[alt]",
+                "type": "attribute",
+                "attribute": "alt",
+            },
+        ],
+    }
+    extraction_strategy = JsonCssExtractionStrategy(schema, verbose=False)
+    # Prefer a config that waits for the SPA cards to render before extracting.
+    try:
+        run_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            extraction_strategy=extraction_strategy,
+            # Wait for at least one program card link to appear in the DOM.
+            wait_for_selector='article a[href*="?type=team"]',
+            timeout=30_000,
+        )
+    except TypeError:
+        # Older crawl4ai versions may not support wait_for_selector/timeout.
+        print(
+            "[WARN] CrawlerRunConfig does not support wait_for_selector/timeout; "
+            "falling back to basic config. Consider upgrading crawl4ai for SPA pages."
+        )
+        run_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            extraction_strategy=extraction_strategy,
+        )
+    print(f"[INFO] JSON extracting opportunity cards from {url} ({page_label})")
+    result = await crawler.arun(url=url, config=run_config)
+    if not result.success:
+        error_msg = getattr(result, "error", getattr(result, "error_message", "unknown error"))
+        print(f"[WARN] JSON extraction failed for {url}: {error_msg}")
+        return []
+    if not getattr(result, "extracted_content", None):
+        print(f"[DEBUG] extract_opportunity_cards_via_json: no extracted_content for {url}")
+        return []
+    try:
+        raw_items = json.loads(result.extracted_content)
+    except Exception as exc:
+        print(f"[WARN] Failed to decode extracted_content for {url}: {exc}")
+        return []
+    cards: List[Dict[str, Any]] = []
+    for item in raw_items:
+        href = (item.get("detail_href") or "").strip()
+        if not href:
+            continue
+        # Focus on team program pages (bug bounty programs and VDP teams)
+        detail_url = urljoin(BASE_URL, href)
+        name = (item.get("name") or "").strip() or None
+        cards.append(
+            {
+                "name": name,
+                "detail_url": detail_url,
+                "reward_summary": None,
+            }
+        )
+    print(f"[INFO] extract_opportunity_cards_via_json[{page_label}]: produced {len(cards)} cards")
+    return cards
+async def gather_opportunity_cards_with_playwright(
+    max_pages: int = 10,
+) -> List[Dict[str, Any]]:
+    """Fallback: use Playwright directly to gather opportunity cards.
+    This bypasses crawl4ai's HTML/extraction pipeline for the listing pages,
+    but still relies on the same parse_opportunity_cards() logic and feeds
+    ProgramRecord scraping as before.
+    """
+    cards: List[Dict[str, Any]] = []
+    seen: Dict[str, Dict[str, Any]] = {}
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        page = await browser.new_page()
+        # Main overview page
+        print("[INFO] Playwright fallback: loading opportunities overview page")
+        await page.goto(OPPORTUNITIES_URL, wait_until="networkidle")
+        html = await page.content()
+        index_cards = parse_opportunity_cards(html)
+        print(
+            f"[INFO] Playwright fallback: main page produced {len(index_cards)} cards"
+        )
+        for card in index_cards:
+            url = card["detail_url"]
+            if url not in seen:
+                seen[url] = card
+                cards.append(card)
+        # Paginated search pages
+        for page_no in range(1, max_pages + 1):
+            search_url = f"{OPPORTUNITIES_SEARCH_URL}?bbp=true&page={page_no}"
+            print(
+                f"[INFO] Playwright fallback: loading search page {page_no}: {search_url}"
+            )
+            await page.goto(search_url, wait_until="networkidle")
+            html = await page.content()
+            page_cards = parse_opportunity_cards(html)
+            if not page_cards:
+                print(
+                    f"[INFO] Playwright fallback: no cards on search page {page_no}; stopping pagination."
+                )
+                break
+            new_count = 0
+            for card in page_cards:
+                url = card["detail_url"]
+                if url not in seen:
+                    seen[url] = card
+                    cards.append(card)
+                    new_count += 1
+            if new_count == 0:
+                print(
+                    f"[INFO] Playwright fallback: no new cards on search page {page_no}; stopping pagination."
+                )
+                break
+        await browser.close()
+    print(f"[INFO] Playwright fallback: total unique cards collected = {len(cards)}")
+    return cards
+async def gather_all_opportunity_cards(
+    crawler: AsyncWebCrawler,
+    max_pages: int = 10,
+) -> List[Dict[str, Any]]:
+    """Collect cards from the main opportunities page and paginated search.
+    We first scrape /opportunities/all, then iterate over
+    /opportunities/all/search?bbp=true&page=N until no new cards appear or
+    max_pages is reached. This focuses on public bug bounty programs.
+    """
+    cards: List[Dict[str, Any]] = []
+    seen: Dict[str, Dict[str, Any]] = {}
+    # Main overview page (popular campaigns & recommendations)
+    index_cards = await extract_opportunity_cards_via_json(
+        crawler,
+        OPPORTUNITIES_URL,
+        page_label="opportunities_all",
+    )
+    print(f"[INFO] gather_all_opportunity_cards: main page produced {len(index_cards)} cards")
+    for card in index_cards:
+        url = card["detail_url"]
+        if url not in seen:
+            seen[url] = card
+            cards.append(card)
+    # Paginated search for bug bounty programs
+    for page in range(1, max_pages + 1):
+        search_url = f"{OPPORTUNITIES_SEARCH_URL}?bbp=true&page={page}"
+        print(f"[INFO] Fetching search page {page}: {search_url}")
+        page_cards = await extract_opportunity_cards_via_json(
+            crawler,
+            search_url,
+            page_label=f"opportunities_search_page_{page}",
+        )
+        if not page_cards:
+            print(f"[INFO] No cards parsed on search page {page}; stopping pagination.")
+            break
+        new_count = 0
+        for card in page_cards:
+            url = card["detail_url"]
+            if url not in seen:
+                seen[url] = card
+                cards.append(card)
+                new_count += 1
+        if new_count == 0:
+            # No new programs discovered on this page; assume we've exhausted results.
+            break
+    return cards
+def extract_first_external_link(soup: BeautifulSoup) -> Optional[str]:
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        if href.startswith("http") and "hackerone.com" not in href:
+            return href
+    return None
+def extract_kv_table_after_heading(soup: BeautifulSoup, heading_substring: str) -> Dict[str, Any]:
+    heading = soup.find(
+        lambda tag: tag.name in ("h1", "h2", "h3")
+        and heading_substring.lower() in tag.get_text(strip=True).lower()
+    )
+    if not heading:
+        return {}
+    table = heading.find_next("table")
+    if not table:
+        return {}
+    result: Dict[str, Any] = {}
+    for row in table.find_all("tr"):
+        cells = row.find_all(["th", "td"])
+        if len(cells) < 2:
+            continue
+        key = cells[0].get_text(" ", strip=True)
+        value = cells[1].get_text(" ", strip=True)
+        if key:
+            result[key] = value
+    return result
+def parse_program_page(html: str) -> Tuple[Optional[str], Optional[str], Dict[str, Any], Dict[str, Any]]:
+    soup = BeautifulSoup(html, "html.parser")
+    name = None
+    title = soup.find("h1")
+    if title:
+        name = title.get_text(" ", strip=True)
+    website = extract_first_external_link(soup)
+    rewards = extract_kv_table_after_heading(soup, "Rewards summary")
+    stats = extract_kv_table_after_heading(soup, "Stats")
+    return name, website, rewards, stats
+def parse_scope_table(html: str) -> Tuple[List[ScopeAsset], List[ScopeAsset], Optional[str]]:
+    """Parse the scope table and Burp Suite link.
+    Returns (all_assets, eligible_assets, burp_url).
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    burp_link = soup.find(
+        "a",
+        string=lambda s: isinstance(s, str)
+        and "Burp Suite Project Configuration File" in s,
+    )
+    burp_url = urljoin(BASE_URL, burp_link["href"]) if burp_link else None
+    table = None
+    for t in soup.find_all("table"):
+        header = t.find("tr")
+        if not header:
+            continue
+        hdr_text = header.get_text(" ", strip=True)
+        if all(key in hdr_text for key in ("Asset name", "Coverage", "Bounty")):
+            table = t
+            break
+    all_assets: List[ScopeAsset] = []
+    eligible: List[ScopeAsset] = []
+    if not table:
+        return all_assets, eligible, burp_url
+    for tr in table.find_all("tr")[1:]:  # skip header
+        tds = tr.find_all("td")
+        if len(tds) < 7:
+            continue
+        name_cell = tds[0]
+        asset_name_el = name_cell.find("strong")
+        asset_name = (
+            asset_name_el.get_text(" ", strip=True)
+            if asset_name_el
+            else name_cell.get_text(" ", strip=True)
+        )
+        impact_scope = None
+        extra = [s for s in name_cell.stripped_strings]
+        if len(extra) > 1:
+            impact_scope = extra[1]
+        asset_type = tds[1].get_text(" ", strip=True)
+        coverage = tds[2].get_text(" ", strip=True)
+        max_severity = tds[3].get_text(" ", strip=True)
+        bounty_eligibility = tds[4].get_text(" ", strip=True)
+        last_update = tds[5].get_text(" ", strip=True)
+        resolved_reports = tds[6].get_text(" ", strip=True)
+        asset = ScopeAsset(
+            asset_name=asset_name,
+            impact_scope=impact_scope,
+            asset_type=asset_type,
+            coverage=coverage,
+            max_severity=max_severity,
+            bounty_eligibility=bounty_eligibility,
+            last_update=last_update,
+            resolved_reports=resolved_reports,
+        )
+        asset.attack_surface = infer_attack_surface(asset)
+        all_assets.append(asset)
+        if "in scope" in coverage.lower() and "eligible" in bounty_eligibility.lower():
+            eligible.append(asset)
+    return all_assets, eligible, burp_url
+# ---------------------------------------------------------------------------
+# Main scraping flow
+# ---------------------------------------------------------------------------
+async def scrape_programs(limit: Optional[int] = None, max_pages: int = 10) -> List[ProgramRecord]:
+    dirs = output_dirs()
+    async with AsyncWebCrawler() as crawler:
+        cards = await gather_all_opportunity_cards(crawler, max_pages=max_pages)
+        if not cards:
+            print(
+                "[WARN] crawl4ai-based extraction found no opportunity cards; "
+                "falling back to direct Playwright scraping for listings."
+            )
+            cards = await gather_opportunity_cards_with_playwright(max_pages=max_pages)
+        if not cards:
+            print("[ERROR] No opportunity cards found (even with Playwright fallback)")
+            return []
+        if limit is not None:
+            cards = cards[:limit]
+        programs: List[ProgramRecord] = []
+        for card in cards:
+            detail_url = card["detail_url"]
+            slug = slug_from_program_url(detail_url)
+            print(f"[INFO] Scraping {slug} -> {detail_url}")
+            main_html = await fetch_html(crawler, detail_url)
+            if not main_html:
+                continue
+            parsed = urlparse(detail_url)
+            scope_url = urljoin(BASE_URL, parsed.path.rstrip("/") + "/policy_scopes")
+            scope_html = await fetch_html(crawler, scope_url)
+            name, website, rewards, stats = parse_program_page(main_html)
+            all_assets: List[ScopeAsset] = []
+            eligible_assets: List[ScopeAsset] = []
+            burp_cfg_path: Optional[str] = None
+            if scope_html:
+                all_assets, eligible_assets, burp_url = parse_scope_table(scope_html)
+                if burp_url:
+                    dest = dirs["burp"] / f"{slug}.json"
+                    downloaded = download_url(burp_url, dest)
+                    if downloaded is not None:
+                        burp_cfg_path = str(downloaded.relative_to(repo_root()))
+            record = ProgramRecord(
+                slug=slug,
+                name=name or card.get("name"),
+                detail_url=detail_url,
+                website_url=website,
+                reward_summary_card=card.get("reward_summary"),
+                rewards_table=rewards,
+                stats=stats,
+                scope_assets=all_assets,
+                eligible_assets=eligible_assets,
+                burp_config_path=burp_cfg_path,
+                attack_surface_summary_all=summarize_attack_surface(all_assets),
+                attack_surface_summary_eligible=summarize_attack_surface(eligible_assets),
+            )
+            program_path = dirs["programs"] / f"{slug}.json"
+            program_path.write_text(json.dumps(asdict(record), indent=2), encoding="utf-8")
+            programs.append(record)
+    index_path = dirs["base"] / "programs_index.json"
+    index_data = [
+        {
+            "slug": p.slug,
+            "name": p.name,
+            "detail_url": p.detail_url,
+            "website_url": p.website_url,
+            "eligible_assets_count": len(p.eligible_assets),
+            "burp_config_path": p.burp_config_path,
+            "attack_surfaces_all": p.attack_surface_summary_all,
+            "attack_surfaces_eligible": p.attack_surface_summary_eligible,
+            "targets": [
+                cat
+                for cat, count in p.attack_surface_summary_eligible.items()
+                if count > 0
+            ],
+        }
+        for p in programs
+    ]
+    index_path.write_text(json.dumps(index_data, indent=2), encoding="utf-8")
+    # Flattened index: one entry per eligible asset with attack-surface labels.
+    # This is convenient for LangChain / MCP agents to reason about targets.
+    surface_index_path = dirs["base"] / "attack_surface_index.json"
+    surface_index: List[Dict[str, Any]] = []
+    for p in programs:
+        for asset in p.eligible_assets:
+            surface_index.append(
+                {
+                    "program_slug": p.slug,
+                    "program_name": p.name,
+                    "program_detail_url": p.detail_url,
+                    "program_website_url": p.website_url,
+                    "asset_name": asset.asset_name,
+                    "impact_scope": asset.impact_scope,
+                    "asset_type": asset.asset_type,
+                    "coverage": asset.coverage,
+                    "max_severity": asset.max_severity,
+                    "bounty_eligibility": asset.bounty_eligibility,
+                    "last_update": asset.last_update,
+                    "resolved_reports": asset.resolved_reports,
+                    "attack_surface": asset.attack_surface,
+                    "targets": asset.attack_surface,
+                }
+            )
+    surface_index_path.write_text(json.dumps(surface_index, indent=2), encoding="utf-8")
+    return programs
+def main(argv: Optional[Iterable[str]] = None) -> None:
+    parser = argparse.ArgumentParser(description="Scrape HackerOne opportunities & scopes")
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="Max number of programs to scrape (default: all visible on main page)",
+    )
+    parser.add_argument(
+        "--max-pages",
+        type=int,
+        default=10,
+        help="Max number of paginated search result pages to crawl",
+    )
+    args = parser.parse_args(list(argv) if argv is not None else None)
+    asyncio.run(scrape_programs(limit=args.limit, max_pages=args.max_pages))
+if __name__ == "__main__":  # pragma: no cover
+    main()

data/hackerone/attack_surface_index.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

data/hackerone/programs/audible.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "audible",
+  "name": "audible",
+  "detail_url": "https://hackerone.com/audible?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/braze_inc.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "braze_inc",
+  "name": "braze_inc",
+  "detail_url": "https://hackerone.com/braze_inc?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/bumba_bbp.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "bumba_bbp",
+  "name": "bumba_bbp",
+  "detail_url": "https://hackerone.com/bumba_bbp?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/doordash.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "doordash",
+  "name": "doordash",
+  "detail_url": "https://hackerone.com/doordash?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/dyson.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "dyson",
+  "name": "dyson",
+  "detail_url": "https://hackerone.com/dyson?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/flipkart.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "flipkart",
+  "name": "flipkart",
+  "detail_url": "https://hackerone.com/flipkart?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/hubspot.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "hubspot",
+  "name": "hubspot",
+  "detail_url": "https://hackerone.com/hubspot?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/inspectorio.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "inspectorio",
+  "name": "inspectorio",
+  "detail_url": "https://hackerone.com/inspectorio?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/kong.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "kong",
+  "name": "kong",
+  "detail_url": "https://hackerone.com/kong?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/mpesa.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "mpesa",
+  "name": "mpesa",
+  "detail_url": "https://hackerone.com/mpesa?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/neon_bbp.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "neon_bbp",
+  "name": "neon_bbp",
+  "detail_url": "https://hackerone.com/neon_bbp?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/netscaler_public_program.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "netscaler_public_program",
+  "name": "netscaler_public_program",
+  "detail_url": "https://hackerone.com/netscaler_public_program?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/northerntechhq.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "northerntechhq",
+  "name": "northerntechhq",
+  "detail_url": "https://hackerone.com/northerntechhq?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/notion.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "notion",
+  "name": "notion",
+  "detail_url": "https://hackerone.com/notion?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/oppo_bbp.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "oppo_bbp",
+  "name": "oppo_bbp",
+  "detail_url": "https://hackerone.com/oppo_bbp?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/porsche.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "porsche",
+  "name": "porsche",
+  "detail_url": "https://hackerone.com/porsche?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/ripio.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "ripio",
+  "name": "ripio",
+  "detail_url": "https://hackerone.com/ripio?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/robinhood.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "robinhood",
+  "name": "robinhood",
+  "detail_url": "https://hackerone.com/robinhood?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/silabs.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "silabs",
+  "name": "silabs",
+  "detail_url": "https://hackerone.com/silabs?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/stripchat.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "stripchat",
+  "name": "stripchat",
+  "detail_url": "https://hackerone.com/stripchat?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/syfe_bbp.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "syfe_bbp",
+  "name": "syfe_bbp",
+  "detail_url": "https://hackerone.com/syfe_bbp?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/wallet_on_telegram.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "wallet_on_telegram",
+  "name": "wallet_on_telegram",
+  "detail_url": "https://hackerone.com/wallet_on_telegram?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/whoop_bug_bounty.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "whoop_bug_bounty",
+  "name": "whoop_bug_bounty",
+  "detail_url": "https://hackerone.com/whoop_bug_bounty?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs/zooplus.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "slug": "zooplus",
+  "name": "zooplus",
+  "detail_url": "https://hackerone.com/zooplus?type=team",
+  "website_url": null,
+  "reward_summary_card": null,
+  "rewards_table": {},
+  "stats": {},
+  "scope_assets": [],
+  "eligible_assets": [],
+  "burp_config_path": null,
+  "attack_surface_summary_all": {},
+  "attack_surface_summary_eligible": {}
+}

data/hackerone/programs_index.json ADDED Viewed

	@@ -0,0 +1,266 @@

+[
+  {
+    "slug": "northerntechhq",
+    "name": "northerntechhq",
+    "detail_url": "https://hackerone.com/northerntechhq?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "notion",
+    "name": "notion",
+    "detail_url": "https://hackerone.com/notion?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "stripchat",
+    "name": "stripchat",
+    "detail_url": "https://hackerone.com/stripchat?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "doordash",
+    "name": "doordash",
+    "detail_url": "https://hackerone.com/doordash?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "kong",
+    "name": "kong",
+    "detail_url": "https://hackerone.com/kong?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "robinhood",
+    "name": "robinhood",
+    "detail_url": "https://hackerone.com/robinhood?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "netscaler_public_program",
+    "name": "netscaler_public_program",
+    "detail_url": "https://hackerone.com/netscaler_public_program?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "ripio",
+    "name": "ripio",
+    "detail_url": "https://hackerone.com/ripio?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "porsche",
+    "name": "porsche",
+    "detail_url": "https://hackerone.com/porsche?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "mpesa",
+    "name": "mpesa",
+    "detail_url": "https://hackerone.com/mpesa?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "dyson",
+    "name": "dyson",
+    "detail_url": "https://hackerone.com/dyson?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "hubspot",
+    "name": "hubspot",
+    "detail_url": "https://hackerone.com/hubspot?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "braze_inc",
+    "name": "braze_inc",
+    "detail_url": "https://hackerone.com/braze_inc?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "wallet_on_telegram",
+    "name": "wallet_on_telegram",
+    "detail_url": "https://hackerone.com/wallet_on_telegram?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "audible",
+    "name": "audible",
+    "detail_url": "https://hackerone.com/audible?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "silabs",
+    "name": "silabs",
+    "detail_url": "https://hackerone.com/silabs?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "flipkart",
+    "name": "flipkart",
+    "detail_url": "https://hackerone.com/flipkart?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "zooplus",
+    "name": "zooplus",
+    "detail_url": "https://hackerone.com/zooplus?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "syfe_bbp",
+    "name": "syfe_bbp",
+    "detail_url": "https://hackerone.com/syfe_bbp?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "neon_bbp",
+    "name": "neon_bbp",
+    "detail_url": "https://hackerone.com/neon_bbp?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "whoop_bug_bounty",
+    "name": "whoop_bug_bounty",
+    "detail_url": "https://hackerone.com/whoop_bug_bounty?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "oppo_bbp",
+    "name": "oppo_bbp",
+    "detail_url": "https://hackerone.com/oppo_bbp?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "inspectorio",
+    "name": "inspectorio",
+    "detail_url": "https://hackerone.com/inspectorio?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  },
+  {
+    "slug": "bumba_bbp",
+    "name": "bumba_bbp",
+    "detail_url": "https://hackerone.com/bumba_bbp?type=team",
+    "website_url": null,
+    "eligible_assets_count": 0,
+    "burp_config_path": null,
+    "attack_surfaces_all": {},
+    "attack_surfaces_eligible": {},
+    "targets": []
+  }
+]

data/mitre/mitre_minimal.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "stages": [
+    {
+      "id": "recon",
+      "name": "Reconnaissance",
+      "mitre_tactic_id": "TA0043",
+      "matrix": "ATTACK",
+      "color": "#1f77b4"
+    },
+    {
+      "id": "initial_access",
+      "name": "Initial Access",
+      "mitre_tactic_id": "TA0001",
+      "matrix": "ATTACK",
+      "color": "#ff7f0e"
+    },
+    {
+      "id": "execution",
+      "name": "Execution",
+      "mitre_tactic_id": "TA0002",
+      "matrix": "ATTACK",
+      "color": "#2ca02c"
+    },
+    {
+      "id": "persistence",
+      "name": "Persistence",
+      "mitre_tactic_id": "TA0003",
+      "matrix": "ATTACK",
+      "color": "#d62728"
+    },
+    {
+      "id": "exfiltration",
+      "name": "Exfiltration",
+      "mitre_tactic_id": "TA0010",
+      "matrix": "ATTACK",
+      "color": "#9467bd"
+    },
+    {
+      "id": "impact",
+      "name": "Impact",
+      "mitre_tactic_id": "TA0040",
+      "matrix": "ATTACK",
+      "color": "#8c564b"
+    }
+  ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+crawl4ai
+beautifulsoup4
+langchain
+gradio==6.0.1
+plotly
+transformers
+huggingface_hub
+matplotlib
+click==8.1.7

scope-analysis.md ADDED Viewed

	@@ -0,0 +1,7 @@

+## Scope analysis
+provides the various datasets , resources that i will be using in order to implement the cyber-vibehacking platform:
+- bug bounty offerings and their scope:
+    - from [hackerone Oppertunity](https://hackerone.com/opportunities/all).
+    - setting up then the necesary infrastructure in order to host the platform.

specs-cyber-vibehacking.md ADDED Viewed

	@@ -0,0 +1,209 @@

+## Building ultimate Vibe hacking platform for validating attack vectors and cyber threat intelligence
+### 1. Hackathon and sponsor context
+This spec describes a Hugging Face Space built for the **MCP 1st Birthday** hackathon, co-hosted by **Anthropic** and **Gradio**, and supported by a broad set of sponsors.
+From the hackathon page:
+- **Cash prizes – $21K total**
+  - Hugging Face: $15K
+  - Modal: $2.5K
+  - Blaxel: $2.5K
+  - LlamaIndex: $1K
+- **API credits and perks**
+  - Anthropic: $25K Claude API credits
+  - OpenAI: $25 credits for all participants + extra awards
+  - Hugging Face: $25 credits for all participants
+  - Nebius Token Factory: $50 credits for all participants
+  - Modal: $250 credits for all participants
+  - Blaxel: $250 credits for all participants
+  - ElevenLabs: membership credits for thousands of participants
+  - SambaNova: $25 credits for 1500 participants
+  - Hyperbolic: $25 credits for 3000 participants
+  - Google Gemini: $30K Gemini API credits for Track 2 winners
+- **Special sponsor awards**
+  - Modal Innovation Award – best project using Modal
+  - Blaxel Choice Award – best project using Blaxel
+  - LlamaIndex category award – best project using LlamaIndex
+  - ElevenLabs award – best project using ElevenLabs
+  - OpenAI category awards – best ChatGPT app / best OpenAI API integration
+  - Google Gemini special category award – best Track 2 use of Gemini API
+This project is designed to be eligible for **Track 2: MCP in Action** (and optionally Track 1 via MCP server components), while also making it easy to plug in selected sponsor technologies.
+### 2. Threat landscape: AI-orchestrated cyber espionage
+Anthropic's report on the [first AI-orchestrated cyber espionage campaign](https://www.anthropic.com/news/disrupting-AI-espionage) describes an operation where:
+- A state-sponsored actor used **agentic AI capabilities** to automate 80–90% of a large-scale cyber campaign.
+- **Claude Code** was used as an automated tool to perform reconnaissance, identify high-value databases, write exploit code, harvest credentials, and exfiltrate data.
+- The attackers broke down their operation into small, seemingly-benign subtasks and **jailbroke** the model by misrepresenting the context ("defensive testing"), undermining guardrails.
+- The attack showcased three crucial capabilities:
+  1. **Intelligence** – models can plan and execute sophisticated multi-step tasks.
+  2. **Agency** – models can operate in long-running loops with minimal human oversight.
+  3. **Tools** – via standards like the [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro), models can control external software and infrastructure.
+This fundamentally lowers the barrier for advanced cyber operations. The same properties, however, can be turned toward **defense** if we build the right tooling.
+### 3. Vision: Cyber Vibe Lab
+The **Cyber Vibe Lab** is a Gradio 6 application plus a set of MCP tools that:
+- Lets defenders and security researchers **explore attack surfaces** (e.g., web apps, APIs, cloud infra) using AI agents.
+- Uses the concept of **"vibe hacking"**—iterative, exploratory prompting and tool use—to map how an agent might behave offensively, while always steering outputs toward defense, detection, and hardening.
+- Grounds its reasoning in:
+  - Real-world threat patterns from Anthropic's espionage case study.
+  - Structured scope data (e.g., programs and assets from HackerOne scraping).
+  - Technical documentation and code pulled via MCP servers like `perplexity-ask` and `deepwiki`.
+Primary user personas:
+- **Red-team engineer** – wants to simulate attacker paths and identify likely weak points.
+- **Blue-team / SOC analyst** – wants playbooks for detection, logging, and response.
+- **Security architect / CISO** – wants high-level summaries of where AI-enabled attacks could hit and what controls to prioritize.
+### 4. System architecture (high level)
+1. **Gradio 6 front-end (app.py)**
+   - Built as a Gradio 6 app (e.g., `Blocks` + `ChatInterface`).
+   - Provides a rich chat and control panel experience: scenario selection, target selection, mode (red vs blue), and visibility into tool calls.
+2. **MCP tool layer**
+   The app interacts with multiple MCP servers (configured externally via the MCP runtime):
+   - **`mcp://perplexity-ask`** – web-scale research and summarization of technologies, CVEs, protocols, and security patterns.
+   - **`mcp://deepwiki`** – deep dives into GitHub repos and docs to understand actual implementations (e.g., auth flows, crypto usage, infra-as-code).
+   - **Local scope server (e.g., `mcp://hackerone-scope`)** – MCP wrapper around the JSON program and asset data scraped from HackerOne.
+3. **Orchestration and safety layer**
+   - **Scenario composer** – converts a user request ("simulate AI-led attack on our web app perimeter") plus selected scope into a structured multi-phase plan mirroring the Anthropic report phases (recon → exploit → persistence → exfiltration).
+   - **Tool router** – chooses when to call Perplexity, DeepWiki, or the scope server to enrich each phase.
+   - **Safety filter** – enforces house rules: no hand-off of ready-to-run exploit code or credentials; outputs are reframed as security testing checklists, monitoring recommendations, and defensive mitigations.
+4. **Data and logging**
+   - Every interaction is split into:
+     - **Attack narrative** – how an AI agent might chain tools and tasks in an offensive scenario.
+     - **Defense narrative** – concrete logging, hardening, and detection actions mapped to each step.
+   - Logs are stored in a structured format (JSON) so they can be indexed by other tools (e.g., LlamaIndex or LangChain) later.
+### 5. Roadmap to the "ultimate" MCP framework
+#### Phase 0 – Foundations
+- Add `app.py` with a minimal Gradio 6 chat experience.
+- Update `requirements.txt` to include `gradio>=6`.
+- Ensure the repo is ready to run as a Hugging Face Space (standard `demo` variable or `app` export).
+#### Phase 1 – MCP in Action MVP (Track 2)
+- Implement a single **Cyber Vibe Agent** function that:
+  - Accepts a free-form security question plus optional target/program name.
+  - Classifies intent (recon vs exploitation vs defense vs unknown).
+  - Produces an analysis that explicitly references the Anthropic phases (intelligence, agency, tools, multi-phase attack) but is framed as **defensive guidance**.
+- Wire in planned MCP calls conceptually (Perplexity + DeepWiki), even if in early versions the calls are stubbed or proxied.
+Deliverable: a working Gradio app that already satisfies hackathon requirements (UI, documentation, demo video) and can be extended without breaking changes.
+#### Phase 2 – Deep MCP integration and HackerOne data
+- Expose scraped HackerOne program data (JSON) as an MCP tool (`hackerone-scope`).
+- In the UI, allow the user to:
+  - Select one or more programs (e.g., `airbnb`, `bookingcom`, `wallet_on_telegram`).
+  - See their categorized attack surface (web app, database, internal network, cloud infra, appliances, etc.).
+- Adjust the Cyber Vibe Agent to:
+  - Incorporate the selected scope into its planning.
+  - Produce tailored red/blue playbooks per attack-surface category.
+#### Phase 3 – Sponsor-aligned extensions
+Optional but desirable modules, depending on time and credits:
+- **Anthropic (Claude)** – use Claude via MCP as the primary reasoning engine for complex multi-step cyber scenarios.
+- **LlamaIndex** – index HackerOne scope, logs, and playbooks so the agent can retrieve and reuse prior analyses.
+- **OpenAI / Gemini** – add alternate model backends behind the same MCP interface for comparison or ensemble reasoning.
+- **ElevenLabs** – generate narrated walkthroughs of attack/defense scenarios for training.
+- **Modal, Nebius Token Factory, SambaNova, Hyperbolic** – offload heavy analysis or large-scale simulations to external compute providers.
+The app README will clearly state which sponsors are actually integrated in the submitted version; the architecture leaves hooks for the rest.
+#### Phase 4 – Polish and judging criteria
+- Match hackathon judging criteria explicitly:
+  - **Completeness** – Hugging Face Space, README, social media post link, demo video.
+  - **Design / polished UI** – clear navigation, visible tool calls, and understandable outputs.
+  - **Functionality** – real MCP usage, not just mock text; integration of at least two MCP tools.
+  - **Creativity** – unique framing of "vibe hacking" for defensive cyber operations.
+  - **Documentation** – detailed architecture and threat-model explanations in the README and this spec.
+  - **Real-world impact** – show how a security team could adopt the Cyber Vibe Lab in their workflows.
+### 6. Gradio 6 app design (app.py)
+Key elements to implement in `app.py`:
+- **Title and header** – clearly highlight the MCP and cyber-defense focus.
+- **Intro text** – 2–3 short paragraphs summarizing:
+  - Anthropic's AI espionage case.
+  - The purpose of the Cyber Vibe Lab.
+  - Which MCP tools and sponsors are used.
+- **Chat interface** – Gradio `ChatInterface` or `Chatbot` wrapping the Cyber Vibe Agent function.
+- **Optional controls** – dropdowns or checkboxes for:
+  - Target program / asset group.
+  - Mode (Red-team simulation vs Blue-team defense).
+  - Level of detail (high-level summary vs step-by-step plan).
+The first implementation can keep the MCP calls abstracted behind a single function; subsequent iterations can gradually introduce real MCP communication as the runtime configuration is finalized.
+### 7. NotebookLM-style tri-panel UI
+The UI is organized into three main panels, inspired by Google's NotebookLM:
+- **Sources (left)** – manage uploaded files, URLs, MITRE docs, and Hugging Face assets (models, datasets). Users can:
+  - Add sources via upload or links.
+  - Toggle whether each source is used for retrieval (context) or as an "attack target" (e.g., HF model to probe using ATLAS-style tests).
+  - Trigger web/MITRE discovery using MCP (`perplexity-ask`) and convert results into new sources.
+- **Chat (center)** – the main dialogue surface between the user and the Cyber Vibe Agent:
+  - Uses a shadcn-style conversation layout (user/agent bubbles, inline tool-call cards).
+  - Shows MCP tool invocations as small cards (Perplexity, DeepWiki, GitHub, Playwright, HF model probes).
+  - Allows attaching specific sources from the left panel to ground the current question.
+- **Studio (right)** – visualization and reporting:
+  - **Mind Map view**: graph of the evolving attack chain, with nodes for stages, ATT&CK tactics/techniques, and ATLAS categories.
+  - **Timeline view**: Plotly-based chart of turns over time, colored by stage/tactic.
+  - **Reports view**: generated summaries of the session (phases exercised, ATT&CK/ATLAS coverage, defensive recommendations).
+The initial implementation will use simple placeholders (markdown + basic charts) for the Studio panel, then progressively integrate Plotly and graph visualizations.
+### 8. Hugging Face ecosystem integration
+The application integrates with the Hugging Face ecosystem at several layers:
+- **Local transformers models** – for fast, on-device tasks:
+  - Stage classification (Recon / Initial Access / Execution / Persistence / Exfiltration / Impact).
+  - Optional ATT&CK/ATLAS tagging via zero-shot or multi-label classifiers.
+- **Hosted Inference via `huggingface_hub` / `inference`**:
+  - Use `InferenceClient` to call larger instruction-tuned models for the Cyber Vibe Agent itself.
+  - Support OpenAI-style chat semantics when beneficial for agent orchestration.
+- **HF models as "targets"**:
+  - Users can register a model ID as a source (e.g., `org/support-bot-7b`).
+  - The system runs a controlled "vibe harness" of prompts to probe for ATLAS-relevant behaviors (data leakage, jailbreak susceptibility, unsafe generations) and logs findings per model.
+- **Embeddings for retrieval**:
+  - Use HF embedding models to index user-provided sources (docs, configs, logs) and MITRE descriptions.
+  - For each question, retrieve relevant chunks and feed them into the LLM prompt, alongside the current attack-chain state.
+These integrations are abstracted behind internal helper modules so that the underlying models (local vs hosted) can be swapped without changing the Gradio UI.
+### 9. Deliverables checklist
+- `specs-cyber-vibehacking.md` (this file) – architecture and roadmap.
+- `app.py` – Gradio 6 main page implementing the Cyber Vibe Lab UI with tri-panel layout.
+- Updated `requirements.txt` with `gradio` (and, in later phases, `transformers`, `huggingface_hub`, `inference`, and `plotly`).
+- Hugging Face Space README including:
+  - Correct track tags (e.g., `mcp-in-action-track-enterprise` / `mcp-in-action-track-creative`).
+  - Clear description of sponsor integrations.
+  - Links to the Anthropic report and relevant MCP docs.
+- Short demo video showing:
+  - A user selecting a program or scenario.
+  - The agent generating an attack narrative and defense recommendations.
+  - The Studio panel updating its mind map / timeline to reflect the simulated attack chain.
+  - Any sponsor-specific enhancements (e.g., LlamaIndex retrieval, ElevenLabs narration).