Dhruv commited on
Commit
52ce761
Β·
verified Β·
1 Parent(s): 1abdb8b

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ .env
app.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import gradio as gr
8
+ import plotly.graph_objects as go
9
+ from huggingface_hub import InferenceClient
10
+ from transformers import pipeline
11
+
12
+ APP_TITLE = "Cyber Vibe Lab – MCP in Action"
13
+
14
+ INTRO_MD = """
15
+ ### Cyber Vibe Lab
16
+
17
+ This prototype Gradio 6 application is designed for the **MCP 1st Birthday** hackathon, co-hosted by **Anthropic** and **Gradio**.
18
+
19
+ It explores how AI agents and the Model Context Protocol (MCP) can be used to:
20
+ - Reflect on **AI-orchestrated cyber espionage** (as described in Anthropic's report).
21
+ - Perform structured **"vibe hacking"** simulations of attack paths.
22
+ - Always translate those simulations into **defensive guidance** for security teams.
23
+
24
+ In a full deployment, this app would call MCP servers such as:
25
+ - `mcp://perplexity-ask` for web-scale security research and summarization.
26
+ - `mcp://deepwiki` for deep dives into code and documentation of particular systems.
27
+
28
+ This local version keeps those calls conceptual so that the app runs without extra setup while still matching the intended architecture.
29
+ """
30
+
31
+
32
+ @dataclass
33
+ class AttackStage:
34
+ id: str
35
+ name: str
36
+ mitre_tactic_id: str
37
+ matrix: str
38
+ color: str
39
+
40
+
41
+ def load_mitre_stages() -> Dict[str, AttackStage]:
42
+ """Load a minimal set of MITRE ATT&CK-style stages from JSON.
43
+
44
+ The file is expected at data/mitre/mitre_minimal.json relative to this script.
45
+ If it is missing, we fall back to an empty dict and show placeholders.
46
+ """
47
+
48
+ base = Path(__file__).parent
49
+ path = base / "data" / "mitre" / "mitre_minimal.json"
50
+ stages: Dict[str, AttackStage] = {}
51
+
52
+ try:
53
+ with path.open("r", encoding="utf-8") as f:
54
+ raw = json.load(f)
55
+ for s in raw.get("stages", []):
56
+ try:
57
+ stages[s["id"]] = AttackStage(
58
+ id=s["id"],
59
+ name=s.get("name", s["id"]),
60
+ mitre_tactic_id=s.get("mitre_tactic_id", ""),
61
+ matrix=s.get("matrix", "ATTACK"),
62
+ color=s.get("color", "#888888"),
63
+ )
64
+ except KeyError:
65
+ continue
66
+ except FileNotFoundError:
67
+ # Keep stages empty; the Studio panel will render a placeholder figure.
68
+ pass
69
+
70
+ return stages
71
+
72
+
73
+ MITRE_STAGES: Dict[str, AttackStage] = load_mitre_stages()
74
+
75
+ HF_STAGE_MODEL_ID = os.getenv("HF_STAGE_MODEL_ID")
76
+ _stage_clf = None
77
+
78
+
79
+ def _get_stage_classifier():
80
+ """Lazily construct a Hugging Face text-classification pipeline, if configured.
81
+
82
+ If HF_STAGE_MODEL_ID is not set or pipeline creation fails, returns None and
83
+ the app falls back to keyword-based heuristics.
84
+ """
85
+
86
+ global _stage_clf
87
+ if _stage_clf is not None:
88
+ return _stage_clf
89
+
90
+ if not HF_STAGE_MODEL_ID:
91
+ return None
92
+
93
+ try:
94
+ _stage_clf = pipeline("text-classification", model=HF_STAGE_MODEL_ID)
95
+ except Exception:
96
+ _stage_clf = None
97
+
98
+ return _stage_clf
99
+
100
+
101
+ def classify_stage(text: str) -> str:
102
+ """Classify a message into a coarse attack stage.
103
+
104
+ 1. Try a configured HF text-classification model (if available).
105
+ 2. Fall back to simple keyword heuristics that map text to stage IDs from
106
+ MITRE_STAGES (e.g., "recon", "initial_access").
107
+ """
108
+
109
+ txt = (text or "").lower()
110
+
111
+ clf = _get_stage_classifier()
112
+ if clf is not None:
113
+ try:
114
+ out = clf(txt, truncation=True, max_length=256)
115
+ label = str(out[0]["label"]).lower()
116
+ if label in MITRE_STAGES:
117
+ return label
118
+ except Exception:
119
+ # Fall back to heuristics
120
+ pass
121
+
122
+ # Heuristic mapping based on common wording
123
+ if any(k in txt for k in ["recon", "scan", "enumerat", "footprint"]):
124
+ return "recon"
125
+ if any(k in txt for k in ["login", "credential", "password", "phish", "initial access"]):
126
+ return "initial_access"
127
+ if any(k in txt for k in ["execute", "payload", "command", "run code"]):
128
+ return "execution"
129
+ if any(k in txt for k in ["persist", "backdoor", "autorun", "startup"]):
130
+ return "persistence"
131
+ if any(k in txt for k in ["exfil", "leak", "download", "expose data"]):
132
+ return "exfiltration"
133
+ if any(k in txt for k in ["destroy", "wipe", "ransom", "impact"]):
134
+ return "impact"
135
+
136
+ # Default bucket
137
+ return "execution"
138
+
139
+
140
+ def build_attack_chain_figure(turns: List[Dict[str, Any]]) -> go.Figure:
141
+ """Aggregate turns into a simple bar chart of stages touched in this session."""
142
+
143
+ if not MITRE_STAGES:
144
+ fig = go.Figure()
145
+ fig.add_annotation(
146
+ text="MITRE stages not loaded yet.",
147
+ showarrow=False,
148
+ x=0.5,
149
+ y=0.5,
150
+ xref="paper",
151
+ yref="paper",
152
+ )
153
+ fig.update_xaxes(visible=False)
154
+ fig.update_yaxes(visible=False)
155
+ return fig
156
+
157
+ if not turns:
158
+ fig = go.Figure()
159
+ fig.add_annotation(
160
+ text="No classified turns yet.",
161
+ showarrow=False,
162
+ x=0.5,
163
+ y=0.5,
164
+ xref="paper",
165
+ yref="paper",
166
+ )
167
+ fig.update_xaxes(visible=False)
168
+ fig.update_yaxes(visible=False)
169
+ return fig
170
+
171
+ counts: Dict[str, int] = {stage_id: 0 for stage_id in MITRE_STAGES.keys()}
172
+ for t in turns:
173
+ sid = t.get("stage_id")
174
+ if sid in counts:
175
+ counts[sid] += 1
176
+
177
+ stage_ids = list(MITRE_STAGES.keys())
178
+ names = [MITRE_STAGES[s].name for s in stage_ids]
179
+ values = [counts.get(s, 0) for s in stage_ids]
180
+ colors = [MITRE_STAGES[s].color for s in stage_ids]
181
+
182
+ fig = go.Figure(
183
+ data=[
184
+ go.Bar(
185
+ x=names,
186
+ y=values,
187
+ marker_color=colors,
188
+ )
189
+ ]
190
+ )
191
+ fig.update_layout(
192
+ title="ATT&CK-style stages touched in this session",
193
+ xaxis_title="Stage",
194
+ yaxis_title="Number of turns",
195
+ )
196
+ return fig
197
+
198
+
199
+ def cyber_vibe_agent(message: str, history, target: str, mode: str, detail_level: str) -> str:
200
+ """Core reasoning function for the Cyber Vibe Lab.
201
+
202
+ This is intentionally defensive: it never returns exploit code or
203
+ actionable credentials. Instead, it frames outputs as:
204
+ - Attacker "vibes" and likely phases, inspired by Anthropic's report.
205
+ - Concrete defensive recommendations, logging, and hardening steps.
206
+
207
+ In a full MCP-enabled version, this function would orchestrate calls to
208
+ MCP tools such as `perplexity-ask` and `deepwiki` to pull in:
209
+ - Relevant threat intelligence and best practices.
210
+ - Implementation-specific details for the selected target system.
211
+ """
212
+
213
+ target_clean = (target or "").strip() or "your system or program"
214
+ mode_clean = mode or "Mixed"
215
+ detail_clean = detail_level or "High-level summary"
216
+
217
+ # Very lightweight history awareness for now; could be extended later.
218
+ turns_so_far = len(history) if isinstance(history, list) else 0
219
+
220
+ attack_narrative_header = "## Conceptual attack narrative (for red-team simulation)"
221
+ defense_header = "## Defensive guidance (blue-team focus)"
222
+
223
+ # High-level narrative aligned with Anthropic's phases.
224
+ attack_lines = [
225
+ f"- **Phase 1  Recon & target scoping**: An AI agent profiles `{target_clean}` using public and internal metadata, searching for entry points (web apps, APIs, cloud services, CI/CD, identity providers).",
226
+ "- **Phase 2  Access & foothold**: The agent chains small, seemingly-benign tasks (e.g., \"test this endpoint\", \"scan this range\") to probe for weak auth, misconfigurations, or exposed secrets.",
227
+ "- **Phase 3  Privilege escalation & lateral movement**: Once a weak point is identified, the agent iteratively refines exploit ideas, tests them, and expands access within the environment.",
228
+ "- **Phase 4  Persistence & exfiltration**: The agent catalogs high-value data stores, automates data collection, and prepares exfiltration channels  all while documenting its steps for future reuse.",
229
+ ]
230
+
231
+ if detail_clean == "Step-by-step plan":
232
+ attack_lines.append(
233
+ "- **Phase 5  Automation & scaling**: The framework replays successful chains of actions across many similar assets (e.g., multiple subdomains or tenants), approaching the kind of scaled automation described in Anthropic's AI-espionage report."
234
+ )
235
+
236
+ if mode_clean == "Blue-team defense":
237
+ mode_note = (
238
+ "_Mode: blue-team only. The attack narrative is kept abstract and is used strictly "
239
+ "to structure defensive thinking._"
240
+ )
241
+ elif mode_clean == "Red-team simulation":
242
+ mode_note = (
243
+ "_Mode: red-team simulation. The narrative focuses on attacker behavior but omits "
244
+ "specific exploit code or instructions._"
245
+ )
246
+ else:
247
+ mode_note = (
248
+ "_Mode: mixed. We balance attacker perspective (red) and defender response (blue), "
249
+ "always biasing outputs toward defense._"
250
+ )
251
+
252
+ defense_lines = [
253
+ f"- **Scope management**: Maintain an up-to-date asset inventory for `{target_clean}` (domains, APIs, cloud resources, data stores). Use it to bound what automated agents can touch.",
254
+ "- **Guardrails on tools and agents**: Enforce strong safety and auditability for any internal AI tooling (e.g., MCP-based agents) so they cannot be repurposed as covert red-team frameworks.",
255
+ "- **Detection engineering**: Instrument logs and alerts for patterns Anthropic highlighted: many small, tool-like requests in succession; repeated reconnaissance on the same surface; iterative attempts around auth boundaries.",
256
+ "- **Least privilege & segmentation**: Assume an AI agent will eventually find a weak link. Design IAM, network segmentation, and blast-radius limits so that a single foothold remains contained.",
257
+ "- **Incident response playbooks**: Prepare playbooks specifically for AI-orchestrated attacks (sudden high-volume but semi-random probing, large-scale code generation, mass credential testing).",
258
+ ]
259
+
260
+ if detail_clean == "Step-by-step plan":
261
+ defense_lines.extend(
262
+ [
263
+ "- **Red/blue rehearsal with agents**: Use the Cyber Vibe Lab to stage hypothetical campaigns and then codify new detections and controls after each simulated run.",
264
+ "- **MCP-aware hardening**: For each MCP tool you expose (perplexity-style research, code repo analysis, internal APIs), document its abuse potential and add explicit rate limits, scopes, and safety filters.",
265
+ ]
266
+ )
267
+
268
+ mcp_note = (
269
+ "\n> In a full setup, this analysis would be enriched by MCP calls to `perplexity-ask` "
270
+ "(for live threat intel and standards) and `deepwiki` (for code/config insights about the selected target)."
271
+ )
272
+
273
+ user_hint = "\n\n> Tip: refine the vibe by asking follow-ups like \"focus on identity\" or \"assume a multi-cloud target\". Each turn can tighten the scenario." # noqa: E501
274
+
275
+ response = (
276
+ f"{mode_note}\n\n"
277
+ f"{attack_narrative_header}\n" + "\n".join(attack_lines) + "\n\n" + defense_header + "\n" + "\n".join(defense_lines)
278
+ )
279
+
280
+ if turns_so_far == 0:
281
+ response += mcp_note
282
+
283
+ response += user_hint
284
+
285
+ return response
286
+
287
+
288
+ def register_sources(files, url, current_sources):
289
+ """Update the in-app list of sources (files/URLs) for the left panel.
290
+
291
+ This is a lightweight placeholder; later we can extend it to track types,
292
+ tags, and whether a source is used for retrieval vs attack-target testing.
293
+ """
294
+
295
+ sources = list(current_sources or [])
296
+
297
+ if files:
298
+ for f in files:
299
+ name = getattr(f, "name", "uploaded") # Gradio File objects expose `.name`
300
+ sources.append({"type": "file", "name": name})
301
+
302
+ if url and url.strip():
303
+ sources.append({"type": "url", "name": url.strip()})
304
+
305
+ # Return updated state, and reset file + URL inputs
306
+ return sources, None, "", sources
307
+
308
+
309
+ with gr.Blocks(fill_height=True) as demo:
310
+ gr.Markdown(f"# {APP_TITLE}")
311
+ gr.Markdown(INTRO_MD)
312
+
313
+ with gr.Row(equal_height=True):
314
+ # Sources / NotebookLM-style left panel
315
+ with gr.Column(scale=1):
316
+ gr.Markdown("## Sources\nManage uploaded files, URLs, MITRE docs, and Hugging Face assets.")
317
+ source_files = gr.File(label="Upload sources", file_count="multiple")
318
+ source_url = gr.Textbox(label="Add URL source", placeholder="https://attack.mitre.org/...")
319
+ add_source = gr.Button("Add source")
320
+ sources_state = gr.State([])
321
+ sources_view = gr.JSON(label="Current sources (preview)", value=[])
322
+
323
+ # Center chat panel
324
+ with gr.Column(scale=2):
325
+ gr.Markdown("## Chat")
326
+ with gr.Row():
327
+ target_input = gr.Textbox(
328
+ label="Target / system name (optional)",
329
+ placeholder="e.g., airbnb (HackerOne), internal CRM, DeFi dapp",
330
+ scale=2,
331
+ )
332
+ mode_input = gr.Dropdown(
333
+ ["Mixed", "Red-team simulation", "Blue-team defense"],
334
+ value="Mixed",
335
+ label="Mode",
336
+ scale=1,
337
+ )
338
+
339
+ detail_level_input = gr.Radio(
340
+ ["High-level summary", "Step-by-step plan"],
341
+ value="High-level summary",
342
+ label="Detail level",
343
+ )
344
+
345
+ chatbot = gr.Chatbot(label="Cyber Vibe dialogue")
346
+ msg = gr.Textbox(
347
+ label="Describe your scenario or question",
348
+ placeholder=(
349
+ "Describe a system and what you want to explore from a cyber 'vibe hacking' "
350
+ "perspective..."
351
+ ),
352
+ )
353
+ clear = gr.Button("Clear conversation")
354
+
355
+ # Right Studio panel
356
+ with gr.Column(scale=1):
357
+ gr.Markdown("## Studio\nAttack-chain mind map, timeline, and reports.")
358
+ studio_plot = gr.Plot(
359
+ label="Attack chain overview",
360
+ value=build_attack_chain_figure([]),
361
+ )
362
+
363
+ attack_turns_state = gr.State([])
364
+
365
+ def respond(user_message, chat_history, target, mode, detail_level, attack_turns):
366
+ if chat_history is None:
367
+ chat_history = []
368
+ reply = cyber_vibe_agent(user_message, chat_history, target, mode, detail_level)
369
+ chat_history = chat_history + [(user_message, reply)]
370
+
371
+ turns = list(attack_turns or [])
372
+ stage_id = classify_stage(user_message)
373
+ turns.append({"text": user_message, "stage_id": stage_id})
374
+ fig = build_attack_chain_figure(turns)
375
+
376
+ return "", chat_history, turns, fig
377
+
378
+ msg.submit(
379
+ respond,
380
+ inputs=[msg, chatbot, target_input, mode_input, detail_level_input, attack_turns_state],
381
+ outputs=[msg, chatbot, attack_turns_state, studio_plot],
382
+ )
383
+
384
+ clear.click(
385
+ lambda: ([], "", [], build_attack_chain_figure([])),
386
+ inputs=None,
387
+ outputs=[chatbot, msg, attack_turns_state, studio_plot],
388
+ )
389
+
390
+ add_source.click(
391
+ register_sources,
392
+ inputs=[source_files, source_url, sources_state],
393
+ outputs=[sources_state, source_files, source_url, sources_view],
394
+ )
395
+
396
+
397
+ if __name__ == "__main__":
398
+ demo.launch()
data-utils/hackerone_scraper.py ADDED
@@ -0,0 +1,794 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Scrape HackerOne opportunities and program scope using crawl4ai + BeautifulSoup.
2
+
3
+ Usage (from repo root):
4
+
5
+ python -m data-utils.hackerone_scraper --limit 10
6
+
7
+ This will:
8
+ - Load the public Opportunities page.
9
+ - Collect all cards with a "See details" link.
10
+ - For each program, visit its main page and scope page.
11
+ - Extract program metadata, rewards, stats, and scope assets (with a focus on
12
+ assets that are both "In scope" and "Eligible").
13
+ - Download the Burp Suite Project Configuration file when available.
14
+ - Store everything as JSON under `data/hackerone/`.
15
+
16
+ Be sure your use complies with HackerOne's terms and robots.txt.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import asyncio
23
+ import json
24
+ from dataclasses import asdict, dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
27
+ from urllib.error import HTTPError, URLError
28
+ from urllib.parse import urljoin, urlparse
29
+ from urllib.request import Request, urlopen
30
+
31
+ from bs4 import BeautifulSoup
32
+ from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, JsonCssExtractionStrategy
33
+ from playwright.async_api import async_playwright
34
+
35
+ BASE_URL = "https://hackerone.com"
36
+ OPPORTUNITIES_URL = f"{BASE_URL}/opportunities/all"
37
+ OPPORTUNITIES_SEARCH_URL = f"{BASE_URL}/opportunities/all/search"
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Data models
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ @dataclass
46
+ class ScopeAsset:
47
+ asset_name: str
48
+ impact_scope: Optional[str]
49
+ asset_type: str
50
+ coverage: str
51
+ max_severity: str
52
+ bounty_eligibility: str
53
+ last_update: str
54
+ resolved_reports: str
55
+ attack_surface: List[str] = field(default_factory=list)
56
+
57
+
58
+ @dataclass
59
+ class ProgramRecord:
60
+ slug: str
61
+ name: Optional[str]
62
+ detail_url: str
63
+ website_url: Optional[str] = None
64
+ reward_summary_card: Optional[str] = None
65
+ rewards_table: Dict[str, Any] = field(default_factory=dict)
66
+ stats: Dict[str, Any] = field(default_factory=dict)
67
+ scope_assets: List[ScopeAsset] = field(default_factory=list)
68
+ eligible_assets: List[ScopeAsset] = field(default_factory=list)
69
+ burp_config_path: Optional[str] = None
70
+ attack_surface_summary_all: Dict[str, int] = field(default_factory=dict)
71
+ attack_surface_summary_eligible: Dict[str, int] = field(default_factory=dict)
72
+
73
+
74
+ ATTACK_SURFACE_CATEGORIES = (
75
+ "web_app",
76
+ "database",
77
+ "internal_network",
78
+ "cloud_infra",
79
+ "appliance",
80
+ "other",
81
+ )
82
+
83
+
84
+ def infer_attack_surface(asset: ScopeAsset) -> List[str]:
85
+ categories: List[str] = []
86
+ t = asset.asset_type.lower()
87
+ name = asset.asset_name.lower()
88
+
89
+ if (
90
+ any(kw in t for kw in ("domain", "url", "web", "website", "api", "host"))
91
+ or any(name.endswith(ext) for ext in (".com", ".net", ".org", ".io", ".co", ".app"))
92
+ or "http://" in name
93
+ or "https://" in name
94
+ or "app store" in t
95
+ or "play store" in t
96
+ ):
97
+ categories.append("web_app")
98
+
99
+ if any(kw in t for kw in ("database", "db")) or any(
100
+ kw in name for kw in ("mysql", "postgres", "pgsql", "oracle", "mongo", "redis", "sql", "db")
101
+ ):
102
+ categories.append("database")
103
+
104
+ if "cidr" in t or any(
105
+ kw in name for kw in ("cidr", "intranet", "vpn", "lan", "10.", "192.168.", "172.16.")
106
+ ):
107
+ categories.append("internal_network")
108
+
109
+ if any(kw in t for kw in ("cloud", "storage", "bucket")) or any(
110
+ kw in name
111
+ for kw in ("s3", "ec2", "gcp", "azure", "digitalocean", "linode", "cloudfront", "cloudflare")
112
+ ):
113
+ categories.append("cloud_infra")
114
+
115
+ if "hardware" in t or any(
116
+ kw in name for kw in ("router", "switch", "firewall", "iot", "device", "appliance")
117
+ ):
118
+ categories.append("appliance")
119
+
120
+ if not categories:
121
+ categories.append("other")
122
+
123
+ return categories
124
+
125
+
126
+ def summarize_attack_surface(assets: List[ScopeAsset]) -> Dict[str, int]:
127
+ summary: Dict[str, int] = {}
128
+ for asset in assets:
129
+ for cat in asset.attack_surface:
130
+ summary[cat] = summary.get(cat, 0) + 1
131
+ return summary
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Paths & helpers
136
+ # ---------------------------------------------------------------------------
137
+
138
+
139
+ def repo_root() -> Path:
140
+ """Assume this file lives in `data-utils/` under the repo root."""
141
+
142
+ return Path(__file__).resolve().parents[1]
143
+
144
+
145
+ def output_dirs() -> Dict[str, Path]:
146
+ root = repo_root()
147
+ base = root / "data" / "hackerone"
148
+ programs_dir = base / "programs"
149
+ burp_dir = base / "burp_configs"
150
+ debug_dir = base / "debug"
151
+ programs_dir.mkdir(parents=True, exist_ok=True)
152
+ burp_dir.mkdir(parents=True, exist_ok=True)
153
+ debug_dir.mkdir(parents=True, exist_ok=True)
154
+ return {"base": base, "programs": programs_dir, "burp": burp_dir, "debug": debug_dir}
155
+
156
+
157
+ def slug_from_program_url(url: str) -> str:
158
+ parsed = urlparse(url)
159
+ path = parsed.path.strip("/")
160
+ if not path:
161
+ return "program"
162
+ slug = path.split("/")[-1]
163
+ slug = slug.split("?")[0]
164
+ safe = [c if (c.isalnum() or c in ("-", "_")) else "_" for c in slug]
165
+ return "".join(safe) or "program"
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Network / crawling helpers
170
+ # ---------------------------------------------------------------------------
171
+
172
+
173
+ async def fetch_html(
174
+ crawler: AsyncWebCrawler,
175
+ url: str,
176
+ *,
177
+ debug_label: Optional[str] = None,
178
+ ) -> Optional[str]:
179
+ """Fetch rendered HTML for a URL using crawl4ai with explicit config.
180
+
181
+ - Uses CacheMode.BYPASS to always get fresh content for dynamic SPA pages.
182
+ - Optionally writes a debug HTML snapshot under data/hackerone/debug/.
183
+ - Logs basic diagnostics (length and presence of key markers).
184
+ """
185
+
186
+ run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
187
+
188
+ result = await crawler.arun(url=url, config=run_config)
189
+ if not result.success:
190
+ error_msg = getattr(result, "error", getattr(result, "error_message", "unknown error"))
191
+ print(f"[WARN] crawl failed for {url}: {error_msg}")
192
+ return None
193
+
194
+ html = result.html or ""
195
+ print(f"[DEBUG] fetch_html: url={url} length={len(html)} chars")
196
+
197
+ if debug_label:
198
+ try:
199
+ dirs = output_dirs()
200
+ debug_dir = dirs["debug"]
201
+ filename = debug_label if debug_label.endswith(".html") else f"{debug_label}.html"
202
+ debug_path = debug_dir / filename
203
+ debug_path.write_text(html, encoding="utf-8")
204
+ print(f"[DEBUG] Saved HTML snapshot for {url} -> {debug_path}")
205
+ except Exception as exc: # pragma: no cover - diagnostics only
206
+ print(f"[WARN] Failed to save debug HTML for {url}: {exc}")
207
+
208
+ if "See details" in html:
209
+ print(f"[DEBUG] fetch_html: 'See details' marker present in HTML for {url}")
210
+
211
+ return html
212
+
213
+
214
+ def download_url(url: str, dest: Path) -> Optional[Path]:
215
+ try:
216
+ req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
217
+ with urlopen(req, timeout=30) as resp: # type: ignore[arg-type]
218
+ data = resp.read()
219
+ dest.write_bytes(data)
220
+ return dest
221
+ except (HTTPError, URLError, TimeoutError) as exc: # pragma: no cover
222
+ print(f"[WARN] failed to download {url}: {exc}")
223
+ return None
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # HTML parsing helpers
228
+ # ---------------------------------------------------------------------------
229
+
230
+
231
+ def parse_opportunity_cards(html: str) -> List[Dict[str, Any]]:
232
+ """Return a list of dicts describing each program card with a details link."""
233
+
234
+ soup = BeautifulSoup(html, "html.parser")
235
+ cards_by_url: Dict[str, Dict[str, Any]] = {}
236
+
237
+ articles = soup.find_all("article")
238
+ print(f"[DEBUG] parse_opportunity_cards: found {len(articles)} <article> elements")
239
+
240
+ # Primary strategy: cards rendered as <article> blocks containing a link to
241
+ # a team page (href ending with ?type=team). This is more robust than
242
+ # relying on the visible "See details" text, which may differ between
243
+ # views.
244
+ per_article_team_links = 0
245
+ for article in articles:
246
+ details_link = article.find(
247
+ "a",
248
+ href=lambda h: isinstance(h, str) and "?type=team" in h,
249
+ )
250
+ if not details_link or not details_link.get("href"):
251
+ continue
252
+
253
+ per_article_team_links += 1
254
+ detail_url = urljoin(BASE_URL, details_link["href"])
255
+ if detail_url in cards_by_url:
256
+ continue
257
+
258
+ img = article.find("img", alt=True)
259
+ name = (img.get("alt") or "").strip() if img else None
260
+
261
+ reward_summary = None
262
+ for txt in article.stripped_strings:
263
+ if "$" in txt and "-" in txt:
264
+ reward_summary = txt
265
+ break
266
+
267
+ cards_by_url[detail_url] = {
268
+ "name": name,
269
+ "detail_url": detail_url,
270
+ "reward_summary": reward_summary,
271
+ }
272
+
273
+ print(
274
+ f"[DEBUG] parse_opportunity_cards: found {per_article_team_links} '?type=team' links inside <article> elements"
275
+ )
276
+
277
+ # Fallback strategy: any '?type=team' links anywhere in the document.
278
+ team_links = soup.find_all(
279
+ "a",
280
+ href=lambda h: isinstance(h, str) and "?type=team" in h,
281
+ )
282
+ print(
283
+ f"[DEBUG] parse_opportunity_cards: found {len(team_links)} '?type=team' links total in document"
284
+ )
285
+
286
+ for a in team_links:
287
+ href = a.get("href")
288
+ if not href:
289
+ continue
290
+ detail_url = urljoin(BASE_URL, href)
291
+ if detail_url in cards_by_url:
292
+ continue
293
+
294
+ container = a.find_parent("article") or a.find_parent("div")
295
+ img = container.find("img", alt=True) if container else None
296
+ name = (img.get("alt") or "").strip() if img else None
297
+
298
+ reward_summary = None
299
+ if container is not None:
300
+ for txt in container.stripped_strings:
301
+ if "$" in txt and "-" in txt:
302
+ reward_summary = txt
303
+ break
304
+
305
+ cards_by_url[detail_url] = {
306
+ "name": name,
307
+ "detail_url": detail_url,
308
+ "reward_summary": reward_summary,
309
+ }
310
+
311
+ cards = list(cards_by_url.values())
312
+ print(f"[DEBUG] parse_opportunity_cards: returning {len(cards)} cards")
313
+ if not cards:
314
+ print("[DEBUG] parse_opportunity_cards: no cards extracted from HTML")
315
+
316
+ return cards
317
+
318
+
319
+ async def extract_opportunity_cards_via_json(
320
+ crawler: AsyncWebCrawler,
321
+ url: str,
322
+ page_label: str,
323
+ ) -> List[Dict[str, Any]]:
324
+ """Use crawl4ai's JsonCssExtractionStrategy to extract opportunity cards.
325
+
326
+ This avoids relying on `result.html` for SPA content and instead uses the
327
+ DOM that Playwright sees inside crawl4ai.
328
+ """
329
+
330
+ schema = {
331
+ "name": "HackerOneOpportunities",
332
+ "baseSelector": "article",
333
+ "fields": [
334
+ {
335
+ "name": "detail_href",
336
+ "selector": 'a[href*="?type=team"]',
337
+ "type": "attribute",
338
+ "attribute": "href",
339
+ },
340
+ {
341
+ "name": "name",
342
+ "selector": "img[alt]",
343
+ "type": "attribute",
344
+ "attribute": "alt",
345
+ },
346
+ ],
347
+ }
348
+
349
+ extraction_strategy = JsonCssExtractionStrategy(schema, verbose=False)
350
+
351
+ # Prefer a config that waits for the SPA cards to render before extracting.
352
+ try:
353
+ run_config = CrawlerRunConfig(
354
+ cache_mode=CacheMode.BYPASS,
355
+ extraction_strategy=extraction_strategy,
356
+ # Wait for at least one program card link to appear in the DOM.
357
+ wait_for_selector='article a[href*="?type=team"]',
358
+ timeout=30_000,
359
+ )
360
+ except TypeError:
361
+ # Older crawl4ai versions may not support wait_for_selector/timeout.
362
+ print(
363
+ "[WARN] CrawlerRunConfig does not support wait_for_selector/timeout; "
364
+ "falling back to basic config. Consider upgrading crawl4ai for SPA pages."
365
+ )
366
+ run_config = CrawlerRunConfig(
367
+ cache_mode=CacheMode.BYPASS,
368
+ extraction_strategy=extraction_strategy,
369
+ )
370
+
371
+ print(f"[INFO] JSON extracting opportunity cards from {url} ({page_label})")
372
+ result = await crawler.arun(url=url, config=run_config)
373
+ if not result.success:
374
+ error_msg = getattr(result, "error", getattr(result, "error_message", "unknown error"))
375
+ print(f"[WARN] JSON extraction failed for {url}: {error_msg}")
376
+ return []
377
+
378
+ if not getattr(result, "extracted_content", None):
379
+ print(f"[DEBUG] extract_opportunity_cards_via_json: no extracted_content for {url}")
380
+ return []
381
+
382
+ try:
383
+ raw_items = json.loads(result.extracted_content)
384
+ except Exception as exc:
385
+ print(f"[WARN] Failed to decode extracted_content for {url}: {exc}")
386
+ return []
387
+
388
+ cards: List[Dict[str, Any]] = []
389
+ for item in raw_items:
390
+ href = (item.get("detail_href") or "").strip()
391
+ if not href:
392
+ continue
393
+ # Focus on team program pages (bug bounty programs and VDP teams)
394
+ detail_url = urljoin(BASE_URL, href)
395
+ name = (item.get("name") or "").strip() or None
396
+
397
+ cards.append(
398
+ {
399
+ "name": name,
400
+ "detail_url": detail_url,
401
+ "reward_summary": None,
402
+ }
403
+ )
404
+
405
+ print(f"[INFO] extract_opportunity_cards_via_json[{page_label}]: produced {len(cards)} cards")
406
+ return cards
407
+
408
+
409
+ async def gather_opportunity_cards_with_playwright(
410
+ max_pages: int = 10,
411
+ ) -> List[Dict[str, Any]]:
412
+ """Fallback: use Playwright directly to gather opportunity cards.
413
+
414
+ This bypasses crawl4ai's HTML/extraction pipeline for the listing pages,
415
+ but still relies on the same parse_opportunity_cards() logic and feeds
416
+ ProgramRecord scraping as before.
417
+ """
418
+
419
+ cards: List[Dict[str, Any]] = []
420
+ seen: Dict[str, Dict[str, Any]] = {}
421
+
422
+ async with async_playwright() as p:
423
+ browser = await p.chromium.launch(headless=True)
424
+ page = await browser.new_page()
425
+
426
+ # Main overview page
427
+ print("[INFO] Playwright fallback: loading opportunities overview page")
428
+ await page.goto(OPPORTUNITIES_URL, wait_until="networkidle")
429
+ html = await page.content()
430
+ index_cards = parse_opportunity_cards(html)
431
+ print(
432
+ f"[INFO] Playwright fallback: main page produced {len(index_cards)} cards"
433
+ )
434
+ for card in index_cards:
435
+ url = card["detail_url"]
436
+ if url not in seen:
437
+ seen[url] = card
438
+ cards.append(card)
439
+
440
+ # Paginated search pages
441
+ for page_no in range(1, max_pages + 1):
442
+ search_url = f"{OPPORTUNITIES_SEARCH_URL}?bbp=true&page={page_no}"
443
+ print(
444
+ f"[INFO] Playwright fallback: loading search page {page_no}: {search_url}"
445
+ )
446
+ await page.goto(search_url, wait_until="networkidle")
447
+ html = await page.content()
448
+ page_cards = parse_opportunity_cards(html)
449
+ if not page_cards:
450
+ print(
451
+ f"[INFO] Playwright fallback: no cards on search page {page_no}; stopping pagination."
452
+ )
453
+ break
454
+
455
+ new_count = 0
456
+ for card in page_cards:
457
+ url = card["detail_url"]
458
+ if url not in seen:
459
+ seen[url] = card
460
+ cards.append(card)
461
+ new_count += 1
462
+
463
+ if new_count == 0:
464
+ print(
465
+ f"[INFO] Playwright fallback: no new cards on search page {page_no}; stopping pagination."
466
+ )
467
+ break
468
+
469
+ await browser.close()
470
+
471
+ print(f"[INFO] Playwright fallback: total unique cards collected = {len(cards)}")
472
+ return cards
473
+
474
+
475
+ async def gather_all_opportunity_cards(
476
+ crawler: AsyncWebCrawler,
477
+ max_pages: int = 10,
478
+ ) -> List[Dict[str, Any]]:
479
+ """Collect cards from the main opportunities page and paginated search.
480
+
481
+ We first scrape /opportunities/all, then iterate over
482
+ /opportunities/all/search?bbp=true&page=N until no new cards appear or
483
+ max_pages is reached. This focuses on public bug bounty programs.
484
+ """
485
+
486
+ cards: List[Dict[str, Any]] = []
487
+ seen: Dict[str, Dict[str, Any]] = {}
488
+
489
+ # Main overview page (popular campaigns & recommendations)
490
+ index_cards = await extract_opportunity_cards_via_json(
491
+ crawler,
492
+ OPPORTUNITIES_URL,
493
+ page_label="opportunities_all",
494
+ )
495
+ print(f"[INFO] gather_all_opportunity_cards: main page produced {len(index_cards)} cards")
496
+ for card in index_cards:
497
+ url = card["detail_url"]
498
+ if url not in seen:
499
+ seen[url] = card
500
+ cards.append(card)
501
+
502
+ # Paginated search for bug bounty programs
503
+ for page in range(1, max_pages + 1):
504
+ search_url = f"{OPPORTUNITIES_SEARCH_URL}?bbp=true&page={page}"
505
+ print(f"[INFO] Fetching search page {page}: {search_url}")
506
+ page_cards = await extract_opportunity_cards_via_json(
507
+ crawler,
508
+ search_url,
509
+ page_label=f"opportunities_search_page_{page}",
510
+ )
511
+ if not page_cards:
512
+ print(f"[INFO] No cards parsed on search page {page}; stopping pagination.")
513
+ break
514
+
515
+ new_count = 0
516
+ for card in page_cards:
517
+ url = card["detail_url"]
518
+ if url not in seen:
519
+ seen[url] = card
520
+ cards.append(card)
521
+ new_count += 1
522
+
523
+ if new_count == 0:
524
+ # No new programs discovered on this page; assume we've exhausted results.
525
+ break
526
+
527
+ return cards
528
+
529
+
530
+ def extract_first_external_link(soup: BeautifulSoup) -> Optional[str]:
531
+ for a in soup.find_all("a", href=True):
532
+ href = a["href"]
533
+ if href.startswith("http") and "hackerone.com" not in href:
534
+ return href
535
+ return None
536
+
537
+
538
+ def extract_kv_table_after_heading(soup: BeautifulSoup, heading_substring: str) -> Dict[str, Any]:
539
+ heading = soup.find(
540
+ lambda tag: tag.name in ("h1", "h2", "h3")
541
+ and heading_substring.lower() in tag.get_text(strip=True).lower()
542
+ )
543
+ if not heading:
544
+ return {}
545
+
546
+ table = heading.find_next("table")
547
+ if not table:
548
+ return {}
549
+
550
+ result: Dict[str, Any] = {}
551
+ for row in table.find_all("tr"):
552
+ cells = row.find_all(["th", "td"])
553
+ if len(cells) < 2:
554
+ continue
555
+ key = cells[0].get_text(" ", strip=True)
556
+ value = cells[1].get_text(" ", strip=True)
557
+ if key:
558
+ result[key] = value
559
+ return result
560
+
561
+
562
+ def parse_program_page(html: str) -> Tuple[Optional[str], Optional[str], Dict[str, Any], Dict[str, Any]]:
563
+ soup = BeautifulSoup(html, "html.parser")
564
+
565
+ name = None
566
+ title = soup.find("h1")
567
+ if title:
568
+ name = title.get_text(" ", strip=True)
569
+
570
+ website = extract_first_external_link(soup)
571
+
572
+ rewards = extract_kv_table_after_heading(soup, "Rewards summary")
573
+ stats = extract_kv_table_after_heading(soup, "Stats")
574
+
575
+ return name, website, rewards, stats
576
+
577
+
578
+ def parse_scope_table(html: str) -> Tuple[List[ScopeAsset], List[ScopeAsset], Optional[str]]:
579
+ """Parse the scope table and Burp Suite link.
580
+
581
+ Returns (all_assets, eligible_assets, burp_url).
582
+ """
583
+
584
+ soup = BeautifulSoup(html, "html.parser")
585
+
586
+ burp_link = soup.find(
587
+ "a",
588
+ string=lambda s: isinstance(s, str)
589
+ and "Burp Suite Project Configuration File" in s,
590
+ )
591
+ burp_url = urljoin(BASE_URL, burp_link["href"]) if burp_link else None
592
+
593
+ table = None
594
+ for t in soup.find_all("table"):
595
+ header = t.find("tr")
596
+ if not header:
597
+ continue
598
+ hdr_text = header.get_text(" ", strip=True)
599
+ if all(key in hdr_text for key in ("Asset name", "Coverage", "Bounty")):
600
+ table = t
601
+ break
602
+
603
+ all_assets: List[ScopeAsset] = []
604
+ eligible: List[ScopeAsset] = []
605
+
606
+ if not table:
607
+ return all_assets, eligible, burp_url
608
+
609
+ for tr in table.find_all("tr")[1:]: # skip header
610
+ tds = tr.find_all("td")
611
+ if len(tds) < 7:
612
+ continue
613
+
614
+ name_cell = tds[0]
615
+ asset_name_el = name_cell.find("strong")
616
+ asset_name = (
617
+ asset_name_el.get_text(" ", strip=True)
618
+ if asset_name_el
619
+ else name_cell.get_text(" ", strip=True)
620
+ )
621
+ impact_scope = None
622
+ extra = [s for s in name_cell.stripped_strings]
623
+ if len(extra) > 1:
624
+ impact_scope = extra[1]
625
+
626
+ asset_type = tds[1].get_text(" ", strip=True)
627
+ coverage = tds[2].get_text(" ", strip=True)
628
+ max_severity = tds[3].get_text(" ", strip=True)
629
+ bounty_eligibility = tds[4].get_text(" ", strip=True)
630
+ last_update = tds[5].get_text(" ", strip=True)
631
+ resolved_reports = tds[6].get_text(" ", strip=True)
632
+
633
+ asset = ScopeAsset(
634
+ asset_name=asset_name,
635
+ impact_scope=impact_scope,
636
+ asset_type=asset_type,
637
+ coverage=coverage,
638
+ max_severity=max_severity,
639
+ bounty_eligibility=bounty_eligibility,
640
+ last_update=last_update,
641
+ resolved_reports=resolved_reports,
642
+ )
643
+ asset.attack_surface = infer_attack_surface(asset)
644
+ all_assets.append(asset)
645
+
646
+ if "in scope" in coverage.lower() and "eligible" in bounty_eligibility.lower():
647
+ eligible.append(asset)
648
+
649
+ return all_assets, eligible, burp_url
650
+
651
+
652
+ # ---------------------------------------------------------------------------
653
+ # Main scraping flow
654
+ # ---------------------------------------------------------------------------
655
+
656
+
657
+ async def scrape_programs(limit: Optional[int] = None, max_pages: int = 10) -> List[ProgramRecord]:
658
+ dirs = output_dirs()
659
+
660
+ async with AsyncWebCrawler() as crawler:
661
+ cards = await gather_all_opportunity_cards(crawler, max_pages=max_pages)
662
+ if not cards:
663
+ print(
664
+ "[WARN] crawl4ai-based extraction found no opportunity cards; "
665
+ "falling back to direct Playwright scraping for listings."
666
+ )
667
+ cards = await gather_opportunity_cards_with_playwright(max_pages=max_pages)
668
+ if not cards:
669
+ print("[ERROR] No opportunity cards found (even with Playwright fallback)")
670
+ return []
671
+ if limit is not None:
672
+ cards = cards[:limit]
673
+
674
+ programs: List[ProgramRecord] = []
675
+
676
+ for card in cards:
677
+ detail_url = card["detail_url"]
678
+ slug = slug_from_program_url(detail_url)
679
+ print(f"[INFO] Scraping {slug} -> {detail_url}")
680
+
681
+ main_html = await fetch_html(crawler, detail_url)
682
+ if not main_html:
683
+ continue
684
+
685
+ parsed = urlparse(detail_url)
686
+ scope_url = urljoin(BASE_URL, parsed.path.rstrip("/") + "/policy_scopes")
687
+ scope_html = await fetch_html(crawler, scope_url)
688
+
689
+ name, website, rewards, stats = parse_program_page(main_html)
690
+
691
+ all_assets: List[ScopeAsset] = []
692
+ eligible_assets: List[ScopeAsset] = []
693
+ burp_cfg_path: Optional[str] = None
694
+
695
+ if scope_html:
696
+ all_assets, eligible_assets, burp_url = parse_scope_table(scope_html)
697
+
698
+ if burp_url:
699
+ dest = dirs["burp"] / f"{slug}.json"
700
+ downloaded = download_url(burp_url, dest)
701
+ if downloaded is not None:
702
+ burp_cfg_path = str(downloaded.relative_to(repo_root()))
703
+
704
+ record = ProgramRecord(
705
+ slug=slug,
706
+ name=name or card.get("name"),
707
+ detail_url=detail_url,
708
+ website_url=website,
709
+ reward_summary_card=card.get("reward_summary"),
710
+ rewards_table=rewards,
711
+ stats=stats,
712
+ scope_assets=all_assets,
713
+ eligible_assets=eligible_assets,
714
+ burp_config_path=burp_cfg_path,
715
+ attack_surface_summary_all=summarize_attack_surface(all_assets),
716
+ attack_surface_summary_eligible=summarize_attack_surface(eligible_assets),
717
+ )
718
+
719
+ program_path = dirs["programs"] / f"{slug}.json"
720
+ program_path.write_text(json.dumps(asdict(record), indent=2), encoding="utf-8")
721
+
722
+ programs.append(record)
723
+
724
+ index_path = dirs["base"] / "programs_index.json"
725
+ index_data = [
726
+ {
727
+ "slug": p.slug,
728
+ "name": p.name,
729
+ "detail_url": p.detail_url,
730
+ "website_url": p.website_url,
731
+ "eligible_assets_count": len(p.eligible_assets),
732
+ "burp_config_path": p.burp_config_path,
733
+ "attack_surfaces_all": p.attack_surface_summary_all,
734
+ "attack_surfaces_eligible": p.attack_surface_summary_eligible,
735
+ "targets": [
736
+ cat
737
+ for cat, count in p.attack_surface_summary_eligible.items()
738
+ if count > 0
739
+ ],
740
+ }
741
+ for p in programs
742
+ ]
743
+ index_path.write_text(json.dumps(index_data, indent=2), encoding="utf-8")
744
+
745
+ # Flattened index: one entry per eligible asset with attack-surface labels.
746
+ # This is convenient for LangChain / MCP agents to reason about targets.
747
+ surface_index_path = dirs["base"] / "attack_surface_index.json"
748
+ surface_index: List[Dict[str, Any]] = []
749
+ for p in programs:
750
+ for asset in p.eligible_assets:
751
+ surface_index.append(
752
+ {
753
+ "program_slug": p.slug,
754
+ "program_name": p.name,
755
+ "program_detail_url": p.detail_url,
756
+ "program_website_url": p.website_url,
757
+ "asset_name": asset.asset_name,
758
+ "impact_scope": asset.impact_scope,
759
+ "asset_type": asset.asset_type,
760
+ "coverage": asset.coverage,
761
+ "max_severity": asset.max_severity,
762
+ "bounty_eligibility": asset.bounty_eligibility,
763
+ "last_update": asset.last_update,
764
+ "resolved_reports": asset.resolved_reports,
765
+ "attack_surface": asset.attack_surface,
766
+ "targets": asset.attack_surface,
767
+ }
768
+ )
769
+ surface_index_path.write_text(json.dumps(surface_index, indent=2), encoding="utf-8")
770
+
771
+ return programs
772
+
773
+
774
+ def main(argv: Optional[Iterable[str]] = None) -> None:
775
+ parser = argparse.ArgumentParser(description="Scrape HackerOne opportunities & scopes")
776
+ parser.add_argument(
777
+ "--limit",
778
+ type=int,
779
+ default=None,
780
+ help="Max number of programs to scrape (default: all visible on main page)",
781
+ )
782
+ parser.add_argument(
783
+ "--max-pages",
784
+ type=int,
785
+ default=10,
786
+ help="Max number of paginated search result pages to crawl",
787
+ )
788
+ args = parser.parse_args(list(argv) if argv is not None else None)
789
+
790
+ asyncio.run(scrape_programs(limit=args.limit, max_pages=args.max_pages))
791
+
792
+
793
+ if __name__ == "__main__": # pragma: no cover
794
+ main()
data/hackerone/attack_surface_index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
data/hackerone/programs/audible.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "audible",
3
+ "name": "audible",
4
+ "detail_url": "https://hackerone.com/audible?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/braze_inc.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "braze_inc",
3
+ "name": "braze_inc",
4
+ "detail_url": "https://hackerone.com/braze_inc?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/bumba_bbp.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "bumba_bbp",
3
+ "name": "bumba_bbp",
4
+ "detail_url": "https://hackerone.com/bumba_bbp?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/doordash.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "doordash",
3
+ "name": "doordash",
4
+ "detail_url": "https://hackerone.com/doordash?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/dyson.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "dyson",
3
+ "name": "dyson",
4
+ "detail_url": "https://hackerone.com/dyson?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/flipkart.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "flipkart",
3
+ "name": "flipkart",
4
+ "detail_url": "https://hackerone.com/flipkart?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/hubspot.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "hubspot",
3
+ "name": "hubspot",
4
+ "detail_url": "https://hackerone.com/hubspot?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/inspectorio.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "inspectorio",
3
+ "name": "inspectorio",
4
+ "detail_url": "https://hackerone.com/inspectorio?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/kong.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "kong",
3
+ "name": "kong",
4
+ "detail_url": "https://hackerone.com/kong?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/mpesa.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "mpesa",
3
+ "name": "mpesa",
4
+ "detail_url": "https://hackerone.com/mpesa?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/neon_bbp.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "neon_bbp",
3
+ "name": "neon_bbp",
4
+ "detail_url": "https://hackerone.com/neon_bbp?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/netscaler_public_program.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "netscaler_public_program",
3
+ "name": "netscaler_public_program",
4
+ "detail_url": "https://hackerone.com/netscaler_public_program?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/northerntechhq.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "northerntechhq",
3
+ "name": "northerntechhq",
4
+ "detail_url": "https://hackerone.com/northerntechhq?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/notion.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "notion",
3
+ "name": "notion",
4
+ "detail_url": "https://hackerone.com/notion?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/oppo_bbp.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "oppo_bbp",
3
+ "name": "oppo_bbp",
4
+ "detail_url": "https://hackerone.com/oppo_bbp?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/porsche.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "porsche",
3
+ "name": "porsche",
4
+ "detail_url": "https://hackerone.com/porsche?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/ripio.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "ripio",
3
+ "name": "ripio",
4
+ "detail_url": "https://hackerone.com/ripio?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/robinhood.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "robinhood",
3
+ "name": "robinhood",
4
+ "detail_url": "https://hackerone.com/robinhood?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/silabs.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "silabs",
3
+ "name": "silabs",
4
+ "detail_url": "https://hackerone.com/silabs?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/stripchat.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "stripchat",
3
+ "name": "stripchat",
4
+ "detail_url": "https://hackerone.com/stripchat?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/syfe_bbp.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "syfe_bbp",
3
+ "name": "syfe_bbp",
4
+ "detail_url": "https://hackerone.com/syfe_bbp?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/wallet_on_telegram.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "wallet_on_telegram",
3
+ "name": "wallet_on_telegram",
4
+ "detail_url": "https://hackerone.com/wallet_on_telegram?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/whoop_bug_bounty.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "whoop_bug_bounty",
3
+ "name": "whoop_bug_bounty",
4
+ "detail_url": "https://hackerone.com/whoop_bug_bounty?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs/zooplus.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "slug": "zooplus",
3
+ "name": "zooplus",
4
+ "detail_url": "https://hackerone.com/zooplus?type=team",
5
+ "website_url": null,
6
+ "reward_summary_card": null,
7
+ "rewards_table": {},
8
+ "stats": {},
9
+ "scope_assets": [],
10
+ "eligible_assets": [],
11
+ "burp_config_path": null,
12
+ "attack_surface_summary_all": {},
13
+ "attack_surface_summary_eligible": {}
14
+ }
data/hackerone/programs_index.json ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "slug": "northerntechhq",
4
+ "name": "northerntechhq",
5
+ "detail_url": "https://hackerone.com/northerntechhq?type=team",
6
+ "website_url": null,
7
+ "eligible_assets_count": 0,
8
+ "burp_config_path": null,
9
+ "attack_surfaces_all": {},
10
+ "attack_surfaces_eligible": {},
11
+ "targets": []
12
+ },
13
+ {
14
+ "slug": "notion",
15
+ "name": "notion",
16
+ "detail_url": "https://hackerone.com/notion?type=team",
17
+ "website_url": null,
18
+ "eligible_assets_count": 0,
19
+ "burp_config_path": null,
20
+ "attack_surfaces_all": {},
21
+ "attack_surfaces_eligible": {},
22
+ "targets": []
23
+ },
24
+ {
25
+ "slug": "stripchat",
26
+ "name": "stripchat",
27
+ "detail_url": "https://hackerone.com/stripchat?type=team",
28
+ "website_url": null,
29
+ "eligible_assets_count": 0,
30
+ "burp_config_path": null,
31
+ "attack_surfaces_all": {},
32
+ "attack_surfaces_eligible": {},
33
+ "targets": []
34
+ },
35
+ {
36
+ "slug": "doordash",
37
+ "name": "doordash",
38
+ "detail_url": "https://hackerone.com/doordash?type=team",
39
+ "website_url": null,
40
+ "eligible_assets_count": 0,
41
+ "burp_config_path": null,
42
+ "attack_surfaces_all": {},
43
+ "attack_surfaces_eligible": {},
44
+ "targets": []
45
+ },
46
+ {
47
+ "slug": "kong",
48
+ "name": "kong",
49
+ "detail_url": "https://hackerone.com/kong?type=team",
50
+ "website_url": null,
51
+ "eligible_assets_count": 0,
52
+ "burp_config_path": null,
53
+ "attack_surfaces_all": {},
54
+ "attack_surfaces_eligible": {},
55
+ "targets": []
56
+ },
57
+ {
58
+ "slug": "robinhood",
59
+ "name": "robinhood",
60
+ "detail_url": "https://hackerone.com/robinhood?type=team",
61
+ "website_url": null,
62
+ "eligible_assets_count": 0,
63
+ "burp_config_path": null,
64
+ "attack_surfaces_all": {},
65
+ "attack_surfaces_eligible": {},
66
+ "targets": []
67
+ },
68
+ {
69
+ "slug": "netscaler_public_program",
70
+ "name": "netscaler_public_program",
71
+ "detail_url": "https://hackerone.com/netscaler_public_program?type=team",
72
+ "website_url": null,
73
+ "eligible_assets_count": 0,
74
+ "burp_config_path": null,
75
+ "attack_surfaces_all": {},
76
+ "attack_surfaces_eligible": {},
77
+ "targets": []
78
+ },
79
+ {
80
+ "slug": "ripio",
81
+ "name": "ripio",
82
+ "detail_url": "https://hackerone.com/ripio?type=team",
83
+ "website_url": null,
84
+ "eligible_assets_count": 0,
85
+ "burp_config_path": null,
86
+ "attack_surfaces_all": {},
87
+ "attack_surfaces_eligible": {},
88
+ "targets": []
89
+ },
90
+ {
91
+ "slug": "porsche",
92
+ "name": "porsche",
93
+ "detail_url": "https://hackerone.com/porsche?type=team",
94
+ "website_url": null,
95
+ "eligible_assets_count": 0,
96
+ "burp_config_path": null,
97
+ "attack_surfaces_all": {},
98
+ "attack_surfaces_eligible": {},
99
+ "targets": []
100
+ },
101
+ {
102
+ "slug": "mpesa",
103
+ "name": "mpesa",
104
+ "detail_url": "https://hackerone.com/mpesa?type=team",
105
+ "website_url": null,
106
+ "eligible_assets_count": 0,
107
+ "burp_config_path": null,
108
+ "attack_surfaces_all": {},
109
+ "attack_surfaces_eligible": {},
110
+ "targets": []
111
+ },
112
+ {
113
+ "slug": "dyson",
114
+ "name": "dyson",
115
+ "detail_url": "https://hackerone.com/dyson?type=team",
116
+ "website_url": null,
117
+ "eligible_assets_count": 0,
118
+ "burp_config_path": null,
119
+ "attack_surfaces_all": {},
120
+ "attack_surfaces_eligible": {},
121
+ "targets": []
122
+ },
123
+ {
124
+ "slug": "hubspot",
125
+ "name": "hubspot",
126
+ "detail_url": "https://hackerone.com/hubspot?type=team",
127
+ "website_url": null,
128
+ "eligible_assets_count": 0,
129
+ "burp_config_path": null,
130
+ "attack_surfaces_all": {},
131
+ "attack_surfaces_eligible": {},
132
+ "targets": []
133
+ },
134
+ {
135
+ "slug": "braze_inc",
136
+ "name": "braze_inc",
137
+ "detail_url": "https://hackerone.com/braze_inc?type=team",
138
+ "website_url": null,
139
+ "eligible_assets_count": 0,
140
+ "burp_config_path": null,
141
+ "attack_surfaces_all": {},
142
+ "attack_surfaces_eligible": {},
143
+ "targets": []
144
+ },
145
+ {
146
+ "slug": "wallet_on_telegram",
147
+ "name": "wallet_on_telegram",
148
+ "detail_url": "https://hackerone.com/wallet_on_telegram?type=team",
149
+ "website_url": null,
150
+ "eligible_assets_count": 0,
151
+ "burp_config_path": null,
152
+ "attack_surfaces_all": {},
153
+ "attack_surfaces_eligible": {},
154
+ "targets": []
155
+ },
156
+ {
157
+ "slug": "audible",
158
+ "name": "audible",
159
+ "detail_url": "https://hackerone.com/audible?type=team",
160
+ "website_url": null,
161
+ "eligible_assets_count": 0,
162
+ "burp_config_path": null,
163
+ "attack_surfaces_all": {},
164
+ "attack_surfaces_eligible": {},
165
+ "targets": []
166
+ },
167
+ {
168
+ "slug": "silabs",
169
+ "name": "silabs",
170
+ "detail_url": "https://hackerone.com/silabs?type=team",
171
+ "website_url": null,
172
+ "eligible_assets_count": 0,
173
+ "burp_config_path": null,
174
+ "attack_surfaces_all": {},
175
+ "attack_surfaces_eligible": {},
176
+ "targets": []
177
+ },
178
+ {
179
+ "slug": "flipkart",
180
+ "name": "flipkart",
181
+ "detail_url": "https://hackerone.com/flipkart?type=team",
182
+ "website_url": null,
183
+ "eligible_assets_count": 0,
184
+ "burp_config_path": null,
185
+ "attack_surfaces_all": {},
186
+ "attack_surfaces_eligible": {},
187
+ "targets": []
188
+ },
189
+ {
190
+ "slug": "zooplus",
191
+ "name": "zooplus",
192
+ "detail_url": "https://hackerone.com/zooplus?type=team",
193
+ "website_url": null,
194
+ "eligible_assets_count": 0,
195
+ "burp_config_path": null,
196
+ "attack_surfaces_all": {},
197
+ "attack_surfaces_eligible": {},
198
+ "targets": []
199
+ },
200
+ {
201
+ "slug": "syfe_bbp",
202
+ "name": "syfe_bbp",
203
+ "detail_url": "https://hackerone.com/syfe_bbp?type=team",
204
+ "website_url": null,
205
+ "eligible_assets_count": 0,
206
+ "burp_config_path": null,
207
+ "attack_surfaces_all": {},
208
+ "attack_surfaces_eligible": {},
209
+ "targets": []
210
+ },
211
+ {
212
+ "slug": "neon_bbp",
213
+ "name": "neon_bbp",
214
+ "detail_url": "https://hackerone.com/neon_bbp?type=team",
215
+ "website_url": null,
216
+ "eligible_assets_count": 0,
217
+ "burp_config_path": null,
218
+ "attack_surfaces_all": {},
219
+ "attack_surfaces_eligible": {},
220
+ "targets": []
221
+ },
222
+ {
223
+ "slug": "whoop_bug_bounty",
224
+ "name": "whoop_bug_bounty",
225
+ "detail_url": "https://hackerone.com/whoop_bug_bounty?type=team",
226
+ "website_url": null,
227
+ "eligible_assets_count": 0,
228
+ "burp_config_path": null,
229
+ "attack_surfaces_all": {},
230
+ "attack_surfaces_eligible": {},
231
+ "targets": []
232
+ },
233
+ {
234
+ "slug": "oppo_bbp",
235
+ "name": "oppo_bbp",
236
+ "detail_url": "https://hackerone.com/oppo_bbp?type=team",
237
+ "website_url": null,
238
+ "eligible_assets_count": 0,
239
+ "burp_config_path": null,
240
+ "attack_surfaces_all": {},
241
+ "attack_surfaces_eligible": {},
242
+ "targets": []
243
+ },
244
+ {
245
+ "slug": "inspectorio",
246
+ "name": "inspectorio",
247
+ "detail_url": "https://hackerone.com/inspectorio?type=team",
248
+ "website_url": null,
249
+ "eligible_assets_count": 0,
250
+ "burp_config_path": null,
251
+ "attack_surfaces_all": {},
252
+ "attack_surfaces_eligible": {},
253
+ "targets": []
254
+ },
255
+ {
256
+ "slug": "bumba_bbp",
257
+ "name": "bumba_bbp",
258
+ "detail_url": "https://hackerone.com/bumba_bbp?type=team",
259
+ "website_url": null,
260
+ "eligible_assets_count": 0,
261
+ "burp_config_path": null,
262
+ "attack_surfaces_all": {},
263
+ "attack_surfaces_eligible": {},
264
+ "targets": []
265
+ }
266
+ ]
data/mitre/mitre_minimal.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stages": [
3
+ {
4
+ "id": "recon",
5
+ "name": "Reconnaissance",
6
+ "mitre_tactic_id": "TA0043",
7
+ "matrix": "ATTACK",
8
+ "color": "#1f77b4"
9
+ },
10
+ {
11
+ "id": "initial_access",
12
+ "name": "Initial Access",
13
+ "mitre_tactic_id": "TA0001",
14
+ "matrix": "ATTACK",
15
+ "color": "#ff7f0e"
16
+ },
17
+ {
18
+ "id": "execution",
19
+ "name": "Execution",
20
+ "mitre_tactic_id": "TA0002",
21
+ "matrix": "ATTACK",
22
+ "color": "#2ca02c"
23
+ },
24
+ {
25
+ "id": "persistence",
26
+ "name": "Persistence",
27
+ "mitre_tactic_id": "TA0003",
28
+ "matrix": "ATTACK",
29
+ "color": "#d62728"
30
+ },
31
+ {
32
+ "id": "exfiltration",
33
+ "name": "Exfiltration",
34
+ "mitre_tactic_id": "TA0010",
35
+ "matrix": "ATTACK",
36
+ "color": "#9467bd"
37
+ },
38
+ {
39
+ "id": "impact",
40
+ "name": "Impact",
41
+ "mitre_tactic_id": "TA0040",
42
+ "matrix": "ATTACK",
43
+ "color": "#8c564b"
44
+ }
45
+ ]
46
+ }
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ crawl4ai
2
+ beautifulsoup4
3
+ langchain
4
+ gradio==6.0.1
5
+ plotly
6
+ transformers
7
+ huggingface_hub
8
+ matplotlib
9
+ click==8.1.7
scope-analysis.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ## Scope analysis
2
+
3
+ provides the various datasets , resources that i will be using in order to implement the cyber-vibehacking platform:
4
+
5
+ - bug bounty offerings and their scope:
6
+ - from [hackerone Oppertunity](https://hackerone.com/opportunities/all).
7
+ - setting up then the necesary infrastructure in order to host the platform.
specs-cyber-vibehacking.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Building ultimate Vibe hacking platform for validating attack vectors and cyber threat intelligence
2
+
3
+ ### 1. Hackathon and sponsor context
4
+
5
+ This spec describes a Hugging Face Space built for the **MCP 1st Birthday** hackathon, co-hosted by **Anthropic** and **Gradio**, and supported by a broad set of sponsors.
6
+
7
+ From the hackathon page:
8
+
9
+ - **Cash prizes – $21K total**
10
+ - Hugging Face: $15K
11
+ - Modal: $2.5K
12
+ - Blaxel: $2.5K
13
+ - LlamaIndex: $1K
14
+ - **API credits and perks**
15
+ - Anthropic: $25K Claude API credits
16
+ - OpenAI: $25 credits for all participants + extra awards
17
+ - Hugging Face: $25 credits for all participants
18
+ - Nebius Token Factory: $50 credits for all participants
19
+ - Modal: $250 credits for all participants
20
+ - Blaxel: $250 credits for all participants
21
+ - ElevenLabs: membership credits for thousands of participants
22
+ - SambaNova: $25 credits for 1500 participants
23
+ - Hyperbolic: $25 credits for 3000 participants
24
+ - Google Gemini: $30K Gemini API credits for Track 2 winners
25
+ - **Special sponsor awards**
26
+ - Modal Innovation Award – best project using Modal
27
+ - Blaxel Choice Award – best project using Blaxel
28
+ - LlamaIndex category award – best project using LlamaIndex
29
+ - ElevenLabs award – best project using ElevenLabs
30
+ - OpenAI category awards – best ChatGPT app / best OpenAI API integration
31
+ - Google Gemini special category award – best Track 2 use of Gemini API
32
+
33
+ This project is designed to be eligible for **Track 2: MCP in Action** (and optionally Track 1 via MCP server components), while also making it easy to plug in selected sponsor technologies.
34
+
35
+ ### 2. Threat landscape: AI-orchestrated cyber espionage
36
+
37
+ Anthropic's report on the [first AI-orchestrated cyber espionage campaign](https://www.anthropic.com/news/disrupting-AI-espionage) describes an operation where:
38
+
39
+ - A state-sponsored actor used **agentic AI capabilities** to automate 80–90% of a large-scale cyber campaign.
40
+ - **Claude Code** was used as an automated tool to perform reconnaissance, identify high-value databases, write exploit code, harvest credentials, and exfiltrate data.
41
+ - The attackers broke down their operation into small, seemingly-benign subtasks and **jailbroke** the model by misrepresenting the context ("defensive testing"), undermining guardrails.
42
+ - The attack showcased three crucial capabilities:
43
+ 1. **Intelligence** – models can plan and execute sophisticated multi-step tasks.
44
+ 2. **Agency** – models can operate in long-running loops with minimal human oversight.
45
+ 3. **Tools** – via standards like the [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro), models can control external software and infrastructure.
46
+
47
+ This fundamentally lowers the barrier for advanced cyber operations. The same properties, however, can be turned toward **defense** if we build the right tooling.
48
+
49
+ ### 3. Vision: Cyber Vibe Lab
50
+
51
+ The **Cyber Vibe Lab** is a Gradio 6 application plus a set of MCP tools that:
52
+
53
+ - Lets defenders and security researchers **explore attack surfaces** (e.g., web apps, APIs, cloud infra) using AI agents.
54
+ - Uses the concept of **"vibe hacking"**β€”iterative, exploratory prompting and tool useβ€”to map how an agent might behave offensively, while always steering outputs toward defense, detection, and hardening.
55
+ - Grounds its reasoning in:
56
+ - Real-world threat patterns from Anthropic's espionage case study.
57
+ - Structured scope data (e.g., programs and assets from HackerOne scraping).
58
+ - Technical documentation and code pulled via MCP servers like `perplexity-ask` and `deepwiki`.
59
+
60
+ Primary user personas:
61
+
62
+ - **Red-team engineer** – wants to simulate attacker paths and identify likely weak points.
63
+ - **Blue-team / SOC analyst** – wants playbooks for detection, logging, and response.
64
+ - **Security architect / CISO** – wants high-level summaries of where AI-enabled attacks could hit and what controls to prioritize.
65
+
66
+ ### 4. System architecture (high level)
67
+
68
+ 1. **Gradio 6 front-end (app.py)**
69
+ - Built as a Gradio 6 app (e.g., `Blocks` + `ChatInterface`).
70
+ - Provides a rich chat and control panel experience: scenario selection, target selection, mode (red vs blue), and visibility into tool calls.
71
+
72
+ 2. **MCP tool layer**
73
+ The app interacts with multiple MCP servers (configured externally via the MCP runtime):
74
+
75
+ - **`mcp://perplexity-ask`** – web-scale research and summarization of technologies, CVEs, protocols, and security patterns.
76
+ - **`mcp://deepwiki`** – deep dives into GitHub repos and docs to understand actual implementations (e.g., auth flows, crypto usage, infra-as-code).
77
+ - **Local scope server (e.g., `mcp://hackerone-scope`)** – MCP wrapper around the JSON program and asset data scraped from HackerOne.
78
+
79
+ 3. **Orchestration and safety layer**
80
+ - **Scenario composer** – converts a user request ("simulate AI-led attack on our web app perimeter") plus selected scope into a structured multi-phase plan mirroring the Anthropic report phases (recon β†’ exploit β†’ persistence β†’ exfiltration).
81
+ - **Tool router** – chooses when to call Perplexity, DeepWiki, or the scope server to enrich each phase.
82
+ - **Safety filter** – enforces house rules: no hand-off of ready-to-run exploit code or credentials; outputs are reframed as security testing checklists, monitoring recommendations, and defensive mitigations.
83
+
84
+ 4. **Data and logging**
85
+ - Every interaction is split into:
86
+ - **Attack narrative** – how an AI agent might chain tools and tasks in an offensive scenario.
87
+ - **Defense narrative** – concrete logging, hardening, and detection actions mapped to each step.
88
+ - Logs are stored in a structured format (JSON) so they can be indexed by other tools (e.g., LlamaIndex or LangChain) later.
89
+
90
+ ### 5. Roadmap to the "ultimate" MCP framework
91
+
92
+ #### Phase 0 – Foundations
93
+
94
+ - Add `app.py` with a minimal Gradio 6 chat experience.
95
+ - Update `requirements.txt` to include `gradio>=6`.
96
+ - Ensure the repo is ready to run as a Hugging Face Space (standard `demo` variable or `app` export).
97
+
98
+ #### Phase 1 – MCP in Action MVP (Track 2)
99
+
100
+ - Implement a single **Cyber Vibe Agent** function that:
101
+ - Accepts a free-form security question plus optional target/program name.
102
+ - Classifies intent (recon vs exploitation vs defense vs unknown).
103
+ - Produces an analysis that explicitly references the Anthropic phases (intelligence, agency, tools, multi-phase attack) but is framed as **defensive guidance**.
104
+ - Wire in planned MCP calls conceptually (Perplexity + DeepWiki), even if in early versions the calls are stubbed or proxied.
105
+
106
+ Deliverable: a working Gradio app that already satisfies hackathon requirements (UI, documentation, demo video) and can be extended without breaking changes.
107
+
108
+ #### Phase 2 – Deep MCP integration and HackerOne data
109
+
110
+ - Expose scraped HackerOne program data (JSON) as an MCP tool (`hackerone-scope`).
111
+ - In the UI, allow the user to:
112
+ - Select one or more programs (e.g., `airbnb`, `bookingcom`, `wallet_on_telegram`).
113
+ - See their categorized attack surface (web app, database, internal network, cloud infra, appliances, etc.).
114
+ - Adjust the Cyber Vibe Agent to:
115
+ - Incorporate the selected scope into its planning.
116
+ - Produce tailored red/blue playbooks per attack-surface category.
117
+
118
+ #### Phase 3 – Sponsor-aligned extensions
119
+
120
+ Optional but desirable modules, depending on time and credits:
121
+
122
+ - **Anthropic (Claude)** – use Claude via MCP as the primary reasoning engine for complex multi-step cyber scenarios.
123
+ - **LlamaIndex** – index HackerOne scope, logs, and playbooks so the agent can retrieve and reuse prior analyses.
124
+ - **OpenAI / Gemini** – add alternate model backends behind the same MCP interface for comparison or ensemble reasoning.
125
+ - **ElevenLabs** – generate narrated walkthroughs of attack/defense scenarios for training.
126
+ - **Modal, Nebius Token Factory, SambaNova, Hyperbolic** – offload heavy analysis or large-scale simulations to external compute providers.
127
+
128
+ The app README will clearly state which sponsors are actually integrated in the submitted version; the architecture leaves hooks for the rest.
129
+
130
+ #### Phase 4 – Polish and judging criteria
131
+
132
+ - Match hackathon judging criteria explicitly:
133
+ - **Completeness** – Hugging Face Space, README, social media post link, demo video.
134
+ - **Design / polished UI** – clear navigation, visible tool calls, and understandable outputs.
135
+ - **Functionality** – real MCP usage, not just mock text; integration of at least two MCP tools.
136
+ - **Creativity** – unique framing of "vibe hacking" for defensive cyber operations.
137
+ - **Documentation** – detailed architecture and threat-model explanations in the README and this spec.
138
+ - **Real-world impact** – show how a security team could adopt the Cyber Vibe Lab in their workflows.
139
+
140
+ ### 6. Gradio 6 app design (app.py)
141
+
142
+ Key elements to implement in `app.py`:
143
+
144
+ - **Title and header** – clearly highlight the MCP and cyber-defense focus.
145
+ - **Intro text** – 2–3 short paragraphs summarizing:
146
+ - Anthropic's AI espionage case.
147
+ - The purpose of the Cyber Vibe Lab.
148
+ - Which MCP tools and sponsors are used.
149
+ - **Chat interface** – Gradio `ChatInterface` or `Chatbot` wrapping the Cyber Vibe Agent function.
150
+ - **Optional controls** – dropdowns or checkboxes for:
151
+ - Target program / asset group.
152
+ - Mode (Red-team simulation vs Blue-team defense).
153
+ - Level of detail (high-level summary vs step-by-step plan).
154
+
155
+ The first implementation can keep the MCP calls abstracted behind a single function; subsequent iterations can gradually introduce real MCP communication as the runtime configuration is finalized.
156
+
157
+ ### 7. NotebookLM-style tri-panel UI
158
+
159
+ The UI is organized into three main panels, inspired by Google's NotebookLM:
160
+
161
+ - **Sources (left)** – manage uploaded files, URLs, MITRE docs, and Hugging Face assets (models, datasets). Users can:
162
+ - Add sources via upload or links.
163
+ - Toggle whether each source is used for retrieval (context) or as an "attack target" (e.g., HF model to probe using ATLAS-style tests).
164
+ - Trigger web/MITRE discovery using MCP (`perplexity-ask`) and convert results into new sources.
165
+ - **Chat (center)** – the main dialogue surface between the user and the Cyber Vibe Agent:
166
+ - Uses a shadcn-style conversation layout (user/agent bubbles, inline tool-call cards).
167
+ - Shows MCP tool invocations as small cards (Perplexity, DeepWiki, GitHub, Playwright, HF model probes).
168
+ - Allows attaching specific sources from the left panel to ground the current question.
169
+ - **Studio (right)** – visualization and reporting:
170
+ - **Mind Map view**: graph of the evolving attack chain, with nodes for stages, ATT&CK tactics/techniques, and ATLAS categories.
171
+ - **Timeline view**: Plotly-based chart of turns over time, colored by stage/tactic.
172
+ - **Reports view**: generated summaries of the session (phases exercised, ATT&CK/ATLAS coverage, defensive recommendations).
173
+
174
+ The initial implementation will use simple placeholders (markdown + basic charts) for the Studio panel, then progressively integrate Plotly and graph visualizations.
175
+
176
+ ### 8. Hugging Face ecosystem integration
177
+
178
+ The application integrates with the Hugging Face ecosystem at several layers:
179
+
180
+ - **Local transformers models** – for fast, on-device tasks:
181
+ - Stage classification (Recon / Initial Access / Execution / Persistence / Exfiltration / Impact).
182
+ - Optional ATT&CK/ATLAS tagging via zero-shot or multi-label classifiers.
183
+ - **Hosted Inference via `huggingface_hub` / `inference`**:
184
+ - Use `InferenceClient` to call larger instruction-tuned models for the Cyber Vibe Agent itself.
185
+ - Support OpenAI-style chat semantics when beneficial for agent orchestration.
186
+ - **HF models as "targets"**:
187
+ - Users can register a model ID as a source (e.g., `org/support-bot-7b`).
188
+ - The system runs a controlled "vibe harness" of prompts to probe for ATLAS-relevant behaviors (data leakage, jailbreak susceptibility, unsafe generations) and logs findings per model.
189
+ - **Embeddings for retrieval**:
190
+ - Use HF embedding models to index user-provided sources (docs, configs, logs) and MITRE descriptions.
191
+ - For each question, retrieve relevant chunks and feed them into the LLM prompt, alongside the current attack-chain state.
192
+
193
+ These integrations are abstracted behind internal helper modules so that the underlying models (local vs hosted) can be swapped without changing the Gradio UI.
194
+
195
+ ### 9. Deliverables checklist
196
+
197
+ - `specs-cyber-vibehacking.md` (this file) – architecture and roadmap.
198
+ - `app.py` – Gradio 6 main page implementing the Cyber Vibe Lab UI with tri-panel layout.
199
+ - Updated `requirements.txt` with `gradio` (and, in later phases, `transformers`, `huggingface_hub`, `inference`, and `plotly`).
200
+ - Hugging Face Space README including:
201
+ - Correct track tags (e.g., `mcp-in-action-track-enterprise` / `mcp-in-action-track-creative`).
202
+ - Clear description of sponsor integrations.
203
+ - Links to the Anthropic report and relevant MCP docs.
204
+ - Short demo video showing:
205
+ - A user selecting a program or scenario.
206
+ - The agent generating an attack narrative and defense recommendations.
207
+ - The Studio panel updating its mind map / timeline to reflect the simulated attack chain.
208
+ - Any sponsor-specific enhancements (e.g., LlamaIndex retrieval, ElevenLabs narration).
209
+