Allanatrix commited on
Commit
7b88b54
·
verified ·
1 Parent(s): 51299a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1028 -4
app.py CHANGED
@@ -1,7 +1,1031 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ """ Interactive Gradio UI for exploring the local SPECTER2 corpus."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter, defaultdict
6
+ import subprocess
7
+ import sys
8
+ import time
9
+ from functools import lru_cache
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Sequence, Set, Tuple
12
+
13
  import gradio as gr
14
+ import numpy as np
15
+ import pandas as pd
16
+ import plotly.express as px
17
+ import plotly.graph_objects as go
18
+ import matplotlib.pyplot as plt
19
+ from matplotlib.collections import LineCollection
20
+ from matplotlib.colors import to_rgba
21
+ from matplotlib.figure import Figure
22
+
23
+ FULLSCREEN_JS = """
24
+ () => {
25
+ const container = document.getElementById('embedding-plot');
26
+ if (!container) return;
27
+ const plot = container.querySelector('.js-plotly-plot') || container;
28
+ if (!document.fullscreenElement) {
29
+ if (plot.requestFullscreen) {
30
+ plot.requestFullscreen();
31
+ } else if (plot.webkitRequestFullscreen) {
32
+ plot.webkitRequestFullscreen();
33
+ }
34
+ } else {
35
+ if (document.exitFullscreen) {
36
+ document.exitFullscreen();
37
+ } else if (document.webkitExitFullscreen) {
38
+ document.webkitExitFullscreen();
39
+ }
40
+ }
41
+ }
42
+ """
43
+
44
+ ORBIT_JS = """
45
+ () => {
46
+ const container = document.getElementById('embedding-plot');
47
+ if (!container) return;
48
+ const plot = container.querySelector('.js-plotly-plot');
49
+ if (!plot) return;
50
+ window._plotOrbitIntervals = window._plotOrbitIntervals || {};
51
+ const key = 'embedding-plot';
52
+ if (window._plotOrbitIntervals[key]) {
53
+ clearInterval(window._plotOrbitIntervals[key]);
54
+ delete window._plotOrbitIntervals[key];
55
+ return;
56
+ }
57
+ let angle = 0;
58
+ const radius = 1.6;
59
+ window._plotOrbitIntervals[key] = setInterval(() => {
60
+ const updatedPlot = container.querySelector('.js-plotly-plot');
61
+ if (!updatedPlot) {
62
+ clearInterval(window._plotOrbitIntervals[key]);
63
+ delete window._plotOrbitIntervals[key];
64
+ return;
65
+ }
66
+ angle = (angle + 2) % 360;
67
+ const rad = angle * Math.PI / 180;
68
+ Plotly.relayout(updatedPlot, {
69
+ 'scene.camera.eye': {
70
+ x: radius * Math.cos(rad),
71
+ y: radius * Math.sin(rad),
72
+ z: 0.9,
73
+ },
74
+ });
75
+ }, 50);
76
+ }
77
+ """
78
+
79
+ CUSTOM_JS = """
80
+ function(componentId, action) {
81
+ const el = document.getElementById(componentId);
82
+ if (!el) return;
83
+ if (action === "orbit") {
84
+ if (window._orbitIntervals === undefined) {
85
+ window._orbitIntervals = {};
86
+ }
87
+ if (window._orbitIntervals[componentId]) {
88
+ clearInterval(window._orbitIntervals[componentId]);
89
+ delete window._orbitIntervals[componentId];
90
+ } else {
91
+ let angle = 0;
92
+ const interval = setInterval(() => {
93
+ angle = (angle + 2) % 360;
94
+ const rad = angle * Math.PI / 180;
95
+ const r = 1.6;
96
+ const layout = {
97
+ scene: {camera: {eye: {x: r * Math.cos(rad), y: r * Math.sin(rad), z: 0.9}}}
98
+ };
99
+ Plotly.relayout(el, layout);
100
+ }, 50);
101
+ window._orbitIntervals[componentId] = interval;
102
+ }
103
+ } else if (action === "fullscreen") {
104
+ const container = el.closest("div.svelte-1ipelgc");
105
+ const target = container || el;
106
+ if (!document.fullscreenElement) {
107
+ target.requestFullscreen?.();
108
+ } else {
109
+ document.exitFullscreen?.();
110
+ }
111
+ }
112
+ }
113
+ """
114
+
115
+ from pipeline.embed import Specter2Embedder
116
+ from pipeline.storage import load_embeddings, load_canonical_corpus
117
+
118
+ INDEX_DIR = Path(__file__).resolve().parents[1] / "index"
119
+ CORPUS_PATH = INDEX_DIR / "corpus.json"
120
+ EMBEDDINGS_PATH = INDEX_DIR / "embeddings.npy"
121
+
122
+ DEFAULT_COLOR_BASIS = "Cluster"
123
+ DEFAULT_PALETTE = "Plotly"
124
+
125
+ COLOR_BASIS_OPTIONS: Dict[str, str] = {
126
+ "Cluster": "cluster",
127
+ "Primary Category": "primary_category",
128
+ }
129
+
130
+ PALETTE_OPTIONS: Dict[str, List[str]] = {
131
+ "Plotly": px.colors.qualitative.Plotly,
132
+ "Bold": px.colors.qualitative.Bold,
133
+ "Vivid": px.colors.qualitative.Vivid,
134
+ "Pastel": px.colors.qualitative.Pastel,
135
+ "Safe": px.colors.qualitative.Safe,
136
+ }
137
+
138
+ MAX_EDGE_RENDER = 2000
139
+
140
+
141
+ def _float_rgba_to_plotly(rgba: Tuple[float, float, float, float], alpha: float | None = None) -> str:
142
+ r, g, b, a = rgba
143
+ if alpha is not None:
144
+ a = alpha
145
+ return f"rgba({int(r * 255)}, {int(g * 255)}, {int(b * 255)}, {a:.2f})"
146
+
147
+
148
+ def _build_cluster_color_map(cluster_ids: Sequence[int], palette: Sequence[Tuple[float, float, float, float]]) -> Dict[int, Tuple[float, float, float, float]]:
149
+ unique_ids = sorted(set(int(cid) for cid in cluster_ids))
150
+ color_map: Dict[int, Tuple[float, float, float, float]] = {}
151
+ for idx, cluster_id in enumerate(unique_ids):
152
+ color_map[cluster_id] = palette[idx % len(palette)]
153
+ return color_map
154
+
155
+
156
+ def _build_cluster_overview(papers: Sequence[Dict[str, Any]]) -> pd.DataFrame:
157
+ clusters: Dict[int, Dict[str, Any]] = defaultdict(lambda: {
158
+ "cluster_id": None,
159
+ "size": 0,
160
+ "categories": Counter(),
161
+ "sample_titles": [],
162
+ })
163
+
164
+ for paper in papers:
165
+ cluster_id = int(paper.get("cluster_id", -1))
166
+ entry = clusters[cluster_id]
167
+ entry["cluster_id"] = cluster_id
168
+ entry["size"] += 1
169
+ category = paper.get("primary_category") or "unknown"
170
+ entry["categories"][category] += 1
171
+ if len(entry["sample_titles"]) < 3:
172
+ entry["sample_titles"].append(paper.get("title", "(untitled)"))
173
+ entry["major_category"] = category.split(".")[0] if "." in category else category
174
+
175
+ overview_rows = []
176
+ for data in clusters.values():
177
+ dominant_category = data["categories"].most_common(1)[0][0] if data["categories"] else "unknown"
178
+ overview_rows.append(
179
+ {
180
+ "cluster_id": data["cluster_id"],
181
+ "size": data["size"],
182
+ "major_category": data.get("major_category", "unknown"),
183
+ "dominant_category": dominant_category,
184
+ "sample_titles": " | ".join(data["sample_titles"]),
185
+ }
186
+ )
187
+
188
+ overview_rows.sort(key=lambda row: row["cluster_id"])
189
+ return pd.DataFrame(overview_rows)
190
+
191
+
192
+ def _build_cluster_hierarchy_json(papers: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
193
+ hierarchy: Dict[str, Dict[str, List[Dict[str, Any]]]] = defaultdict(lambda: defaultdict(list))
194
+ for paper in papers:
195
+ cluster_id = int(paper.get("cluster_id", -1))
196
+ category = paper.get("primary_category") or "unknown"
197
+ major = category.split(".")[0] if "." in category else category
198
+ hierarchy[major][category].append(
199
+ {
200
+ "cluster_id": cluster_id,
201
+ "paper_id": paper.get("paper_id"),
202
+ "title": paper.get("title"),
203
+ }
204
+ )
205
+
206
+ major_payload = []
207
+ for major, subcategories in hierarchy.items():
208
+ sub_payload = []
209
+ for category, clusters in sorted(subcategories.items()):
210
+ clusters_sorted = sorted(clusters, key=lambda c: c["cluster_id"])
211
+ sub_payload.append({
212
+ "category": category,
213
+ "clusters": clusters_sorted,
214
+ "cluster_ids": sorted({entry["cluster_id"] for entry in clusters_sorted}),
215
+ })
216
+ major_payload.append({
217
+ "major": major,
218
+ "subcategories": sub_payload,
219
+ })
220
+
221
+ major_payload.sort(key=lambda entry: entry["major"])
222
+ return {"major_categories": major_payload}
223
+
224
+
225
+ def _filter_edges(edges: Sequence[Dict[str, Any]], selected: Set[int]) -> List[Dict[str, Any]]:
226
+ """Return only edges whose endpoints are in the selected set."""
227
+
228
+ return [
229
+ edge
230
+ for edge in edges
231
+ if int(edge.get("source", -1)) in selected and int(edge.get("target", -1)) in selected
232
+ ]
233
+
234
+
235
+ def _normalise_embeddings(vectors: np.ndarray) -> np.ndarray:
236
+ """Return L2-normalised embeddings, guarding against zero vectors."""
237
+
238
+ if vectors.size == 0:
239
+ return vectors
240
+ norms = np.linalg.norm(vectors, axis=1, keepdims=True)
241
+ norms[norms == 0] = 1.0
242
+ return vectors / norms
243
+
244
+
245
+ @lru_cache(maxsize=1)
246
+ def load_resources() -> Tuple[
247
+ Dict[str, Any],
248
+ List[Dict[str, Any]],
249
+ np.ndarray,
250
+ np.ndarray,
251
+ np.ndarray,
252
+ np.ndarray,
253
+ List[Dict[str, Any]],
254
+ List[Dict[str, Any]],
255
+ ]:
256
+ """Load canonical corpus data, embeddings, and graph metadata from disk."""
257
+
258
+ if not CORPUS_PATH.exists() or not EMBEDDINGS_PATH.exists():
259
+ raise FileNotFoundError(
260
+ "Corpus artifacts not found. Run `python -m pipeline.build_corpus` first."
261
+ )
262
+
263
+ corpus_doc = load_canonical_corpus(CORPUS_PATH)
264
+ papers = corpus_doc.get("papers", [])
265
+ embeddings = load_embeddings(EMBEDDINGS_PATH)
266
+
267
+ if embeddings.shape[0] != len(papers):
268
+ raise ValueError(
269
+ "Mismatch between embeddings and canonical corpus entries. Rebuild the corpus to continue."
270
+ )
271
+
272
+ papers_sorted = sorted(papers, key=lambda entry: entry.get("embedding_idx", 0))
273
+ if not all(paper.get("embedding_idx") == idx for idx, paper in enumerate(papers_sorted)):
274
+ raise ValueError("Embedding indices in canonical corpus do not match their positions; rebuild the corpus.")
275
+
276
+ umap_2d = np.array([paper.get("umap_2d", [0.0, 0.0]) for paper in papers_sorted], dtype=np.float32)
277
+ umap_3d = np.array([paper.get("umap_3d", [0.0, 0.0, 0.0]) for paper in papers_sorted], dtype=np.float32)
278
+
279
+ normalised = _normalise_embeddings(embeddings.astype(np.float32))
280
+ graph_edges = corpus_doc.get("graph", {}).get("edges", [])
281
+ cluster_metadata = corpus_doc.get("clusters", [])
282
+ return (
283
+ corpus_doc,
284
+ papers_sorted,
285
+ embeddings,
286
+ normalised,
287
+ umap_2d,
288
+ umap_3d,
289
+ graph_edges,
290
+ cluster_metadata,
291
+ )
292
+
293
+
294
+ @lru_cache(maxsize=1)
295
+ def get_embedder(device: str | None = None) -> Specter2Embedder:
296
+ """Instantiate the Specter2 embedder once."""
297
+
298
+ return Specter2Embedder(device=device)
299
+
300
+
301
+ @lru_cache(maxsize=1)
302
+ def _cluster_options() -> List[str]:
303
+ """Return the cluster dropdown options (All + IDs)."""
304
+
305
+ (_, papers, *_rest) = load_resources()
306
+ cluster_ids = sorted({int(paper.get("cluster_id", 0)) for paper in papers})
307
+ return ["All"] + [str(cluster_id) for cluster_id in cluster_ids]
308
+
309
+
310
+ def _resolve_color_basis(choice: str) -> str:
311
+ return COLOR_BASIS_OPTIONS.get(choice, COLOR_BASIS_OPTIONS[DEFAULT_COLOR_BASIS])
312
+
313
+
314
+ def _resolve_palette(choice: str) -> List[Tuple[float, float, float, float]]:
315
+ palette = PALETTE_OPTIONS.get(choice, PALETTE_OPTIONS[DEFAULT_PALETTE])
316
+ resolved: List[Tuple[float, float, float, float]] = []
317
+ for color in palette:
318
+ try:
319
+ resolved.append(to_rgba(color))
320
+ except ValueError:
321
+ if color.startswith("rgb"):
322
+ parts = color[color.find("(") + 1 : color.find(")")].split(",")
323
+ floats = tuple(float(part.strip()) / 255.0 for part in parts)
324
+ resolved.append((*floats, 1.0))
325
+ else:
326
+ raise
327
+ if not resolved:
328
+ resolved.append((0.2, 0.4, 0.8, 1.0))
329
+ return resolved
330
+
331
+
332
+ def _hover_text_for_papers(papers: Sequence[Dict[str, Any]]) -> np.ndarray:
333
+ """Generate hover text for each paper."""
334
+
335
+ hover = []
336
+ for paper in papers:
337
+ hover.append(
338
+ "<br>".join(
339
+ [
340
+ paper.get("title", "(untitled)"),
341
+ f"ID: {paper.get('paper_id', 'n/a')}",
342
+ f"Cluster: {paper.get('cluster_id', 'n/a')}",
343
+ f"Category: {paper.get('primary_category', 'unknown')}",
344
+ f"Authors: {', '.join(paper.get('authors', [])[:3])}" + ("…" if len(paper.get('authors', [])) > 3 else ""),
345
+ ]
346
+ )
347
+ )
348
+ return np.array(hover)
349
+
350
+
351
+ def _group_points(labels: np.ndarray, palette: Sequence[str]) -> List[Tuple[str, np.ndarray, str]]:
352
+ """Return masking information for each unique label."""
353
+
354
+ unique = sorted(np.unique(labels))
355
+ groups: List[Tuple[str, np.ndarray, str]] = []
356
+ for idx, label in enumerate(unique):
357
+ mask = labels == label
358
+ color = palette[idx % len(palette)]
359
+ groups.append((label, mask, color))
360
+ return groups
361
+
362
+
363
+ def _build_2d_plot(
364
+ coords: np.ndarray,
365
+ original_indices: Sequence[int],
366
+ labels: np.ndarray,
367
+ hover_text: np.ndarray,
368
+ edges: Sequence[Dict[str, Any]],
369
+ clusters: Sequence[Dict[str, Any]],
370
+ cluster_ids_subset: np.ndarray,
371
+ point_color_map: Dict[str, Tuple[float, float, float, float]],
372
+ cluster_color_map: Dict[int, Tuple[float, float, float, float]],
373
+ ) -> plt.Figure:
374
+ fig, ax = plt.subplots(figsize=(6.8, 6.2), dpi=120)
375
+
376
+ if coords.shape[0] < 1:
377
+ ax.set_title("Corpus Embedding Map (2D)")
378
+ ax.axis("off")
379
+ return fig
380
+
381
+ label_order = sorted(set(labels))
382
+
383
+ for label in label_order:
384
+ mask = labels == label
385
+ if not np.any(mask):
386
+ continue
387
+ rgba = point_color_map.get(label)
388
+ if rgba is None:
389
+ rgba = (0.25, 0.5, 0.85, 1.0)
390
+ ax.scatter(
391
+ coords[mask, 0],
392
+ coords[mask, 1],
393
+ s=26,
394
+ c=[rgba],
395
+ alpha=0.9,
396
+ linewidths=0.3,
397
+ edgecolors="#f5f5f5",
398
+ label=label,
399
+ )
400
+
401
+ if edges:
402
+ index_map = {orig_idx: pos for pos, orig_idx in enumerate(original_indices)}
403
+ segment_map: Dict[int, List[List[Tuple[float, float]]]] = defaultdict(list)
404
+ for edge in edges[:MAX_EDGE_RENDER]:
405
+ source = int(edge["source"])
406
+ target = int(edge["target"])
407
+ if source not in index_map or target not in index_map:
408
+ continue
409
+ src_idx = index_map[source]
410
+ tgt_idx = index_map[target]
411
+ cluster_id = int(cluster_ids_subset[src_idx]) if src_idx < len(cluster_ids_subset) else -1
412
+ segment_map[cluster_id].append(
413
+ [
414
+ (coords[src_idx, 0], coords[src_idx, 1]),
415
+ (coords[tgt_idx, 0], coords[tgt_idx, 1]),
416
+ ]
417
+ )
418
+
419
+ for cluster_id, segments in segment_map.items():
420
+ base = cluster_color_map.get(cluster_id, (0.55, 0.55, 0.55, 1.0))
421
+ lc = LineCollection(
422
+ segments,
423
+ colors=[(base[0], base[1], base[2], 0.22)],
424
+ linewidths=0.55,
425
+ )
426
+ ax.add_collection(lc)
427
+
428
+ for cluster in clusters:
429
+ centroid = cluster.get("centroid_2d")
430
+ if not centroid:
431
+ continue
432
+ cluster_id = int(cluster.get("cluster_id", -1))
433
+ rgba = cluster_color_map.get(cluster_id, (0.1, 0.1, 0.1, 1.0))
434
+ ax.scatter(
435
+ centroid[0],
436
+ centroid[1],
437
+ s=150,
438
+ marker="D",
439
+ c=[rgba],
440
+ edgecolors="#222222",
441
+ linewidths=0.6,
442
+ alpha=0.95,
443
+ )
444
+ ax.text(
445
+ centroid[0],
446
+ centroid[1],
447
+ f"C{cluster['cluster_id']}",
448
+ fontsize=9,
449
+ ha="center",
450
+ va="bottom",
451
+ color="#222222",
452
+ )
453
+
454
+ ax.set_title("Corpus Embedding Map (2D)")
455
+ ax.set_xlabel("UMAP 1")
456
+ ax.set_ylabel("UMAP 2")
457
+ ax.tick_params(labelsize=8)
458
+ ax.set_aspect("equal", adjustable="datalim")
459
+ ax.grid(alpha=0.15, linestyle="--", linewidth=0.45)
460
+ ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.16), ncol=4, fontsize=7, frameon=False)
461
+ fig.tight_layout()
462
+ return fig
463
+
464
+
465
+ def _build_3d_figure(
466
+ coords: np.ndarray,
467
+ original_indices: Sequence[int],
468
+ labels: np.ndarray,
469
+ hover_text: np.ndarray,
470
+ edges: Sequence[Dict[str, Any]],
471
+ clusters: Sequence[Dict[str, Any]],
472
+ cluster_ids_subset: np.ndarray,
473
+ embedding_indices_subset: np.ndarray,
474
+ point_color_map: Dict[str, Tuple[float, float, float, float]],
475
+ cluster_color_map: Dict[int, Tuple[float, float, float, float]],
476
+ ) -> go.Figure:
477
+ """Generate a 3D Plotly figure for the embedding map."""
478
+
479
+ fig = go.Figure()
480
+
481
+ if coords.shape[0] < 1:
482
+ fig.update_layout(title="Corpus Embedding Map (3D)")
483
+ return fig
484
+
485
+ label_order = sorted(set(labels))
486
+ for label in label_order:
487
+ mask = labels == label
488
+ if not np.any(mask):
489
+ continue
490
+ rgba = point_color_map.get(label)
491
+ rgba_str = _float_rgba_to_plotly(rgba) if rgba else "rgba(52, 120, 198, 0.9)"
492
+ fig.add_trace(
493
+ go.Scatter3d(
494
+ x=coords[mask, 0],
495
+ y=coords[mask, 1],
496
+ z=coords[mask, 2],
497
+ mode="markers",
498
+ marker=dict(color=rgba_str, size=4.8, opacity=0.9, line=dict(width=0.6, color="#101010"), symbol="circle"),
499
+ name=str(label),
500
+ hovertext=hover_text[mask],
501
+ hoverinfo="text",
502
+ customdata=embedding_indices_subset[mask][:, None],
503
+ )
504
+ )
505
+
506
+ if edges:
507
+ index_map = {orig_idx: pos for pos, orig_idx in enumerate(original_indices)}
508
+ edge_segments: Dict[int, Dict[str, List[float]]] = defaultdict(lambda: {"x": [], "y": [], "z": []})
509
+ for edge in edges[:MAX_EDGE_RENDER]:
510
+ source = int(edge["source"])
511
+ target = int(edge["target"])
512
+ if source not in index_map or target not in index_map:
513
+ continue
514
+ src_idx = index_map[source]
515
+ tgt_idx = index_map[target]
516
+ cluster_id = int(cluster_ids_subset[src_idx]) if src_idx < len(cluster_ids_subset) else -1
517
+ seg = edge_segments[cluster_id]
518
+ seg["x"].extend([coords[src_idx, 0], coords[tgt_idx, 0], None])
519
+ seg["y"].extend([coords[src_idx, 1], coords[tgt_idx, 1], None])
520
+ seg["z"].extend([coords[src_idx, 2], coords[tgt_idx, 2], None])
521
+
522
+ for cluster_id, seg in edge_segments.items():
523
+ cluster_color = cluster_color_map.get(cluster_id, (0.4, 0.4, 0.4, 1.0))
524
+ fig.add_trace(
525
+ go.Scatter3d(
526
+ x=seg["x"],
527
+ y=seg["y"],
528
+ z=seg["z"],
529
+ mode="lines",
530
+ line=dict(color=_float_rgba_to_plotly(cluster_color, alpha=0.18), width=1.3),
531
+ hoverinfo="none",
532
+ name=f"Cluster {cluster_id} edges",
533
+ showlegend=False,
534
+ )
535
+ )
536
+
537
+ if clusters:
538
+ fig.add_trace(
539
+ go.Scatter3d(
540
+ x=[c["centroid_3d"][0] for c in clusters],
541
+ y=[c["centroid_3d"][1] for c in clusters],
542
+ z=[c["centroid_3d"][2] for c in clusters],
543
+ mode="markers+text",
544
+ marker=dict(
545
+ symbol="diamond",
546
+ size=12,
547
+ color=[_float_rgba_to_plotly(cluster_color_map.get(int(c["cluster_id"]), (0.3, 0.3, 0.3, 1.0))) for c in clusters],
548
+ line=dict(width=1.5, color="#222222"),
549
+ ),
550
+ text=[f"C{c['cluster_id']}" for c in clusters],
551
+ textposition="top center",
552
+ hovertext=[f"Cluster {c['cluster_id']}<br>Size: {c['size']}" for c in clusters],
553
+ hoverinfo="text",
554
+ name="Centroids",
555
+ showlegend=False,
556
+ )
557
+ )
558
+
559
+ fig.update_layout(
560
+ title="Corpus Embedding Map (3D)",
561
+ scene=dict(
562
+ xaxis_title="UMAP 1",
563
+ yaxis_title="UMAP 2",
564
+ zaxis_title="UMAP 3",
565
+ xaxis=dict(showgrid=True, zeroline=False, showbackground=False),
566
+ yaxis=dict(showgrid=True, zeroline=False, showbackground=False),
567
+ zaxis=dict(showgrid=True, zeroline=False, showbackground=False),
568
+ ),
569
+ legend=dict(orientation="h", y=-0.1),
570
+ margin=dict(l=10, r=10, t=60, b=10),
571
+ template="plotly_white",
572
+ scene_camera=dict(eye=dict(x=1.6, y=1.6, z=0.9)),
573
+ hovermode="closest",
574
+ )
575
+ return fig
576
+
577
+
578
+ def render_plots(
579
+ show_edges: bool,
580
+ cluster_choice: str,
581
+ color_choice: str,
582
+ palette_choice: str,
583
+ ) -> Tuple[Figure, go.Figure, pd.DataFrame, Dict[str, Any], Dict[str, Dict[str, Any]], List[Tuple[str, str]], Dict[str, Any]]:
584
+ """Render the 2D and 3D figures with the requested options."""
585
+
586
+ (
587
+ _corpus,
588
+ papers,
589
+ _embeddings,
590
+ _normalised,
591
+ umap_2d,
592
+ umap_3d,
593
+ graph_edges,
594
+ cluster_metadata,
595
+ ) = load_resources()
596
+
597
+ cluster_ids = np.array([paper.get("cluster_id", 0) for paper in papers], dtype=int)
598
+ if cluster_choice != "All":
599
+ cluster_value = int(cluster_choice)
600
+ mask = cluster_ids == cluster_value
601
+ clusters_for_plot = [c for c in cluster_metadata if int(c.get("cluster_id", -1)) == cluster_value]
602
+ else:
603
+ mask = np.ones(len(papers), dtype=bool)
604
+ clusters_for_plot = cluster_metadata
605
+
606
+ selected_indices = np.where(mask)[0]
607
+ if selected_indices.size == 0:
608
+ metrics_empty = {
609
+ "clusters": 0,
610
+ "points": 0,
611
+ "edges": 0,
612
+ "render_ms": {"2d": 0.0, "3d": 0.0},
613
+ }
614
+ return go.Figure(), go.Figure(), pd.DataFrame(), {}, {}, [], metrics_empty
615
+
616
+ filtered_papers = [papers[idx] for idx in selected_indices]
617
+ coords_2d = umap_2d[selected_indices]
618
+ coords_3d = umap_3d[selected_indices]
619
+ cluster_ids_subset = cluster_ids[selected_indices]
620
+ embedding_indices_subset = np.array([int(filtered_papers[i].get("embedding_idx", selected_indices[i])) for i in range(len(filtered_papers))])
621
+
622
+ selected_set = {int(idx) for idx in selected_indices.tolist()}
623
+ filtered_edges = _filter_edges(graph_edges, selected_set) if show_edges else []
624
+
625
+ color_basis_key = _resolve_color_basis(color_choice)
626
+ palette = _resolve_palette(palette_choice)
627
+ cluster_palette = _resolve_palette(DEFAULT_PALETTE)
628
+ cluster_color_map = _build_cluster_color_map(cluster_ids, cluster_palette)
629
+
630
+ if color_basis_key == "cluster":
631
+ label_values = np.array([str(paper.get("cluster_id", "unknown")) for paper in filtered_papers])
632
+ point_color_map = {str(cluster_id): cluster_color_map.get(int(cluster_id), (0.2, 0.4, 0.8, 1.0)) for cluster_id in label_values}
633
+ else:
634
+ label_values = np.array([paper.get("primary_category") or "unknown" for paper in filtered_papers])
635
+ unique_labels = sorted(set(label_values))
636
+ point_color_map = {label: palette[idx % len(palette)] for idx, label in enumerate(unique_labels)}
637
+
638
+ hover_text = _hover_text_for_papers(filtered_papers)
639
+
640
+ start_2d = time.perf_counter()
641
+ fig2d = _build_2d_plot(
642
+ coords_2d,
643
+ selected_indices,
644
+ label_values,
645
+ hover_text,
646
+ filtered_edges,
647
+ clusters_for_plot,
648
+ cluster_ids_subset,
649
+ point_color_map,
650
+ cluster_color_map,
651
+ )
652
+ render_2d_ms = (time.perf_counter() - start_2d) * 1000.0
653
+
654
+ start_3d = time.perf_counter()
655
+ fig3d = _build_3d_figure(
656
+ coords_3d,
657
+ selected_indices,
658
+ label_values,
659
+ hover_text,
660
+ filtered_edges,
661
+ clusters_for_plot,
662
+ cluster_ids_subset,
663
+ embedding_indices_subset,
664
+ point_color_map,
665
+ cluster_color_map,
666
+ )
667
+ render_3d_ms = (time.perf_counter() - start_3d) * 1000.0
668
+
669
+ overview_df = _build_cluster_overview(filtered_papers)
670
+ hierarchy_json = _build_cluster_hierarchy_json(filtered_papers)
671
+
672
+ paper_lookup = {
673
+ str(int(embedding_indices_subset[i])): {
674
+ "title": paper.get("title", "(untitled)"),
675
+ "paper_id": paper.get("paper_id"),
676
+ "cluster_id": paper.get("cluster_id"),
677
+ "primary_category": paper.get("primary_category"),
678
+ "authors": paper.get("authors", []),
679
+ "abstract": paper.get("abstract", ""),
680
+ "published": paper.get("published"),
681
+ "url": paper.get("meta", {}).get("url") if isinstance(paper.get("meta"), dict) else paper.get("url"),
682
+ }
683
+ for i, paper in enumerate(filtered_papers)
684
+ }
685
+
686
+ paper_options = [
687
+ (f"{details['title']} (C{details['cluster_id']})", str(idx))
688
+ for idx, details in paper_lookup.items()
689
+ ]
690
+ metrics = {
691
+ "clusters": int(len(set(cluster_ids_subset))),
692
+ "points": int(len(selected_indices)),
693
+ "edges": int(len(filtered_edges)),
694
+ "render_ms": {
695
+ "2d": round(render_2d_ms, 2),
696
+ "3d": round(render_3d_ms, 2),
697
+ },
698
+ }
699
+
700
+ return fig2d, fig3d, overview_df, hierarchy_json, paper_lookup, paper_options, metrics
701
+
702
+
703
+ def refresh_embedding_plot() -> None:
704
+ """Clear caches to force plot regeneration on next render."""
705
+
706
+ load_resources.cache_clear()
707
+ get_embedding_plots.cache_clear()
708
+
709
+
710
+ @lru_cache(maxsize=1)
711
+ def get_embedding_plots() -> Tuple[Figure, go.Figure, pd.DataFrame, Dict[str, Any], Dict[str, Dict[str, Any]], List[Tuple[str, str]], Dict[str, Any]]:
712
+ """Return cached 2D and 3D plots plus cluster summaries using default settings."""
713
+ return render_plots(
714
+ show_edges=True,
715
+ cluster_choice="All",
716
+ color_choice=DEFAULT_COLOR_BASIS,
717
+ palette_choice=DEFAULT_PALETTE,
718
+ )
719
+
720
+
721
+ def _format_results(indices: np.ndarray, scores: np.ndarray, papers: Sequence[Dict[str, Any]]) -> List[List[Any]]:
722
+ """Convert ranked results into display-friendly rows."""
723
+
724
+ formatted: List[List[Any]] = []
725
+ for rank, (idx, score) in enumerate(zip(indices, scores), start=1):
726
+ paper = papers[int(idx)]
727
+ abstract = str(paper.get("abstract", "")).strip()
728
+ summary = abstract[:220] + ("…" if len(abstract) > 220 else "")
729
+ formatted.append(
730
+ [
731
+ rank,
732
+ round(float(score), 4),
733
+ paper.get("title", "(untitled)"),
734
+ paper.get("paper_id", "N/A"),
735
+ summary,
736
+ ]
737
+ )
738
+ return formatted
739
+
740
+
741
+ def search_corpus(query: str, top_k: int) -> List[List[Any]]:
742
+ """Perform a cosine-similarity search over the local corpus."""
743
+
744
+ query = (query or "").strip()
745
+ if not query:
746
+ return []
747
+
748
+ _, papers, embeddings, normalised, _, _, _, _ = load_resources()
749
+ embedder = get_embedder(None)
750
+
751
+ query_vector = embedder.embed_query(query)
752
+ query_norm = query_vector / np.linalg.norm(query_vector)
753
+
754
+ scores = normalised @ query_norm
755
+ top_k = max(1, min(int(top_k), len(papers)))
756
+ ranked_indices = np.argsort(scores)[::-1][:top_k]
757
+ ranked_scores = scores[ranked_indices]
758
+
759
+ return _format_results(ranked_indices, ranked_scores, papers)
760
+
761
+
762
+ def _refresh_and_render(
763
+ show_edges: bool,
764
+ cluster_choice: str,
765
+ color_choice: str,
766
+ palette_choice: str,
767
+ ) -> Tuple[Figure, go.Figure, pd.DataFrame, Dict[str, Any], Dict[str, Dict[str, Any]], List[Tuple[str, str]], Dict[str, Any]]:
768
+ refresh_embedding_plot()
769
+ return render_plots(show_edges, cluster_choice, color_choice, palette_choice)
770
+
771
+
772
+ def build_interface() -> gr.Blocks:
773
+ """Assemble and return the Gradio Blocks interface."""
774
+
775
+ with gr.Blocks(title="NexaSci Mini Corpus Search") as demo:
776
+ gr.Markdown(
777
+ """
778
+ # NexaSci Corpus Explorer
779
+ Enter a short description or paper title to retrieve the closest papers from the locally built corpus.
780
+ """
781
+ )
782
+
783
+ with gr.Accordion("Corpus Builder", open=False):
784
+ categories_box = gr.Textbox(
785
+ label="Categories",
786
+ value="cs.AI cs.LG cs.CL stat.ML",
787
+ placeholder="Space-separated arXiv categories",
788
+ )
789
+ max_papers_slider = gr.Slider(label="Max papers", minimum=100, maximum=1000, step=50, value=500)
790
+ num_clusters_slider = gr.Slider(label="KMeans clusters", minimum=5, maximum=60, step=5, value=30)
791
+ batch_size_slider = gr.Slider(label="Embedding batch size", minimum=4, maximum=64, step=4, value=16)
792
+ build_button = gr.Button("Build Corpus", variant="primary")
793
+ build_status = gr.Markdown()
794
+
795
+ with gr.Row():
796
+ show_edges_checkbox = gr.Checkbox(label="Show graph edges", value=True)
797
+ cluster_dropdown = gr.Dropdown(
798
+ label="Cluster filter",
799
+ value="All",
800
+ choices=_cluster_options(),
801
+ )
802
+ color_basis_dropdown = gr.Radio(
803
+ label="Color by",
804
+ choices=list(COLOR_BASIS_OPTIONS.keys()),
805
+ value=DEFAULT_COLOR_BASIS,
806
+ )
807
+ palette_dropdown = gr.Dropdown(
808
+ label="Color palette",
809
+ choices=list(PALETTE_OPTIONS.keys()),
810
+ value=DEFAULT_PALETTE,
811
+ )
812
+
813
+ initial_2d, initial_3d, initial_overview, initial_hierarchy, initial_lookup, initial_options, initial_metrics = get_embedding_plots()
814
+
815
+ view_selector = gr.Radio(
816
+ label="Visualization",
817
+ choices=["2D", "3D"],
818
+ value="2D",
819
+ interactive=True,
820
+ )
821
+ embedding_plot = gr.Plot(label="Embedding", value=initial_2d, elem_id="embedding-plot")
822
+ controls_row = gr.Row()
823
+ with controls_row:
824
+ orbit_button = gr.Button("Toggle Orbit", variant="secondary")
825
+ fullscreen_button = gr.Button("Fullscreen", variant="secondary")
826
+
827
+ cluster_overview_table = gr.Dataframe(
828
+ value=initial_overview,
829
+ label="Cluster Overview",
830
+ interactive=False,
831
+ )
832
+ cluster_hierarchy_json = gr.JSON(value=initial_hierarchy, label="Cluster Hierarchy")
833
+ paper_state = gr.State(initial_lookup)
834
+ gr.Markdown("## Paper Details")
835
+ paper_selector = gr.Dropdown(
836
+ choices=initial_options,
837
+ label="Select Paper",
838
+ value=None,
839
+ )
840
+ paper_detail_display = gr.Markdown("Select a paper from the dropdown.")
841
+ metrics_json = gr.JSON(value=initial_metrics, label="Render Metrics")
842
+
843
+ def _build_corpus(max_papers: int, categories: str, num_clusters: int, batch_size: int,
844
+ show_edges: bool, cluster_choice: str, color_choice: str, palette_choice: str, view: str):
845
+ cat_list = [c.strip() for c in categories.split() if c.strip()]
846
+ if not cat_list:
847
+ cat_list = ["cs.AI"]
848
+ cmd = [
849
+ sys.executable,
850
+ "-m",
851
+ "pipeline.build_corpus",
852
+ "--categories",
853
+ *cat_list,
854
+ "--max-papers",
855
+ str(int(max_papers)),
856
+ "--num-clusters",
857
+ str(int(num_clusters)),
858
+ "--batch-size",
859
+ str(int(batch_size)),
860
+ ]
861
+ start = time.perf_counter()
862
+ result = subprocess.run(cmd, capture_output=True, text=True)
863
+ elapsed = time.perf_counter() - start
864
+ if result.returncode != 0:
865
+ logs = (result.stderr or result.stdout or "").strip()
866
+ if len(logs) > 800:
867
+ logs = "..." + logs[-800:]
868
+ status = f"❌ Corpus build failed in {elapsed:.1f}s\n```\n{logs}\n```"
869
+ else:
870
+ logs = (result.stdout or "Success").strip()
871
+ if len(logs) > 800:
872
+ logs = "..." + logs[-800:]
873
+ status = f"✅ Corpus rebuilt with {int(max_papers)} papers in {elapsed:.1f}s\n```\n{logs}\n```"
874
+
875
+ fig2d, fig3d, overview, hierarchy, lookup, options, metrics = _refresh_and_render(
876
+ show_edges, cluster_choice, color_choice, palette_choice
877
+ )
878
+ return (
879
+ status,
880
+ fig2d if view == "2D" else fig3d,
881
+ overview,
882
+ hierarchy,
883
+ lookup,
884
+ gr.update(choices=options, value=None),
885
+ "Select a paper from the dropdown.",
886
+ metrics,
887
+ )
888
+
889
+ def _update_plots(show_edges: bool, cluster_choice: str, color_choice: str, palette_choice: str):
890
+ return render_plots(show_edges, cluster_choice, color_choice, palette_choice)
891
+
892
+ refresh_button = gr.Button("Refresh Data")
893
+
894
+ def _refresh_and_update(show_edges: bool, cluster_choice: str, color_choice: str, palette_choice: str, view: str):
895
+ fig2d, fig3d, overview, hierarchy, lookup, options, metrics = _refresh_and_render(
896
+ show_edges, cluster_choice, color_choice, palette_choice
897
+ )
898
+ if view == "3D":
899
+ fig3d.update_layout(margin=dict(l=10, r=10, t=60, b=10))
900
+ return (
901
+ fig2d if view == "2D" else fig3d,
902
+ overview,
903
+ hierarchy,
904
+ lookup,
905
+ gr.update(choices=options, value=None),
906
+ "Select a paper from the dropdown.",
907
+ metrics,
908
+ )
909
+
910
+ refresh_button.click(
911
+ _refresh_and_update,
912
+ inputs=[show_edges_checkbox, cluster_dropdown, color_basis_dropdown, palette_dropdown, view_selector],
913
+ outputs=[embedding_plot, cluster_overview_table, cluster_hierarchy_json, paper_state, paper_selector, paper_detail_display, metrics_json],
914
+ )
915
+
916
+ def _update_visual(show_edges: bool, cluster_choice: str, color_choice: str, palette_choice: str, view: str):
917
+ fig2d, fig3d, overview, hierarchy, lookup, options, metrics = _update_plots(
918
+ show_edges, cluster_choice, color_choice, palette_choice
919
+ )
920
+ return (
921
+ fig2d if view == "2D" else fig3d,
922
+ overview,
923
+ hierarchy,
924
+ lookup,
925
+ gr.update(choices=options, value=None),
926
+ "Select a paper from the dropdown.",
927
+ metrics,
928
+ )
929
+
930
+ view_selector.change(
931
+ _update_visual,
932
+ inputs=[show_edges_checkbox, cluster_dropdown, color_basis_dropdown, palette_dropdown, view_selector],
933
+ outputs=[embedding_plot, cluster_overview_table, cluster_hierarchy_json, paper_state, paper_selector, paper_detail_display, metrics_json],
934
+ )
935
+
936
+ for control in [show_edges_checkbox, cluster_dropdown, color_basis_dropdown, palette_dropdown]:
937
+ control.change(
938
+ _update_visual,
939
+ inputs=[show_edges_checkbox, cluster_dropdown, color_basis_dropdown, palette_dropdown, view_selector],
940
+ outputs=[embedding_plot, cluster_overview_table, cluster_hierarchy_json, paper_state, paper_selector, paper_detail_display, metrics_json],
941
+ )
942
+
943
+ orbit_button.click(None, inputs=None, outputs=None, js=ORBIT_JS)
944
+ fullscreen_button.click(None, inputs=None, outputs=None, js=FULLSCREEN_JS)
945
+
946
+ build_button.click(
947
+ _build_corpus,
948
+ inputs=[
949
+ max_papers_slider,
950
+ categories_box,
951
+ num_clusters_slider,
952
+ batch_size_slider,
953
+ show_edges_checkbox,
954
+ cluster_dropdown,
955
+ color_basis_dropdown,
956
+ palette_dropdown,
957
+ view_selector,
958
+ ],
959
+ outputs=[
960
+ build_status,
961
+ embedding_plot,
962
+ cluster_overview_table,
963
+ cluster_hierarchy_json,
964
+ paper_state,
965
+ paper_selector,
966
+ paper_detail_display,
967
+ metrics_json,
968
+ ],
969
+ )
970
+
971
+ gr.Markdown("## Semantic Search")
972
+
973
+ with gr.Row():
974
+ query_input = gr.Textbox(
975
+ label="Query",
976
+ placeholder="e.g. graph neural networks for chemistry",
977
+ lines=2,
978
+ )
979
+ topk_slider = gr.Slider(
980
+ label="Top K Results",
981
+ minimum=1,
982
+ maximum=20,
983
+ step=1,
984
+ value=5,
985
+ )
986
+
987
+ results_table = gr.Dataframe(
988
+ headers=["rank", "score", "title", "paper_id", "summary"],
989
+ label="Results",
990
+ datatype=["number", "number", "str", "str", "str"],
991
+ interactive=False,
992
+ )
993
+
994
+ submit_btn = gr.Button("Search")
995
+ submit_btn.click(search_corpus, inputs=[query_input, topk_slider], outputs=[results_table])
996
+
997
+ def _format_details(selection: str | None, paper_map: Dict[str, Dict[str, Any]]):
998
+ if not selection:
999
+ return "Select a paper from the dropdown."
1000
+ details = paper_map.get(selection)
1001
+ if not details:
1002
+ return "No details available for this paper."
1003
+ authors = ", ".join(details.get("authors", [])) or "Unknown"
1004
+ lines = [
1005
+ f"### {details.get('title', '(untitled)')}",
1006
+ f"**Paper ID:** {details.get('paper_id', 'N/A')}",
1007
+ f"**Cluster:** {details.get('cluster_id', 'N/A')} | **Category:** {details.get('primary_category', 'unknown')}",
1008
+ f"**Authors:** {authors}",
1009
+ f"**Published:** {details.get('published', 'N/A')}",
1010
+ "",
1011
+ details.get("abstract", "No abstract available."),
1012
+ ]
1013
+ url = details.get("url")
1014
+ if url:
1015
+ lines.append(f"\n[View paper]({url})")
1016
+ return "\n\n".join(lines)
1017
+
1018
+ paper_selector.change(_format_details, inputs=[paper_selector, paper_state], outputs=paper_detail_display)
1019
+
1020
+ return demo
1021
+
1022
+
1023
+ def main() -> None:
1024
+ """Launch the Gradio demo."""
1025
+
1026
+ interface = build_interface()
1027
+ interface.launch()
1028
 
 
 
1029
 
1030
+ if __name__ == "__main__": # pragma: no cover - manual launch helper
1031
+ main()