fhueni commited on
Commit
bfaf968
·
1 Parent(s): a811362

feat: setup base code structure for pipeline with todos

Browse files
.gitignore CHANGED
@@ -1,5 +1,5 @@
1
  # Project specific
2
- drugs.csv
3
 
4
  # macOS system files
5
  .DS_Store
 
1
  # Project specific
2
+ medicationlist/drugs.csv
3
 
4
  # macOS system files
5
  .DS_Store
archive/index.js ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ AutoProcessor,
3
+ AutoModelForVision2Seq,
4
+ AutoModelForQuestionAnswering,
5
+ RawImage,
6
+ TextStreamer,
7
+ pipeline
8
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers";
9
+
10
+ const modelLoaderOverlay = document.getElementById("model-loader-overlay");
11
+ const processingIndicator = document.getElementById("processing-indicator");
12
+
13
+ const promptInput = document.getElementById("prompt-input");
14
+ const generateBtn = document.getElementById("process-btn");
15
+ let model, processor;
16
+ let currentImage = null;
17
+
18
+
19
+ /**
20
+ * Loads and initializes the model and processor.
21
+ */
22
+ async function initializeModel() {
23
+ try {
24
+ const model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct";
25
+ // processor = await AutoProcessor.from_pretrained(model_id);
26
+ const progress = {};
27
+
28
+ model ??= pipeline('translation', 'Xenova/nllb-200-distilled-600M', {
29
+ progress_callback: (data) => {
30
+ if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
31
+ progress[data.file] = data;
32
+ if (Object.keys(progress).length !== 3) return;
33
+ let sum = 0;
34
+ let total = 0;
35
+ for (const [key, val] of Object.entries(progress)) {
36
+ sum += val.loaded;
37
+ total += val.total;
38
+ }
39
+ const overallPercent = Math.round((sum / total) * 100);
40
+ document.getElementById("model-progress").value = overallPercent;
41
+ document.getElementById("progress-text").textContent = overallPercent + "%";
42
+ }
43
+ }
44
+ });
45
+
46
+
47
+ /*
48
+ model = await AutoModelForQuestionAnswering.from_pretrained(model_id, {
49
+ dtype: {
50
+ embed_tokens: "fp16",
51
+ decoder_model_merged: "fp32",
52
+ },
53
+ device: "webgpu",
54
+ progress_callback: (data) => {
55
+ if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
56
+ progress[data.file] = data;
57
+ if (Object.keys(progress).length !== 3) return;
58
+ let sum = 0;
59
+ let total = 0;
60
+ for (const [key, val] of Object.entries(progress)) {
61
+ sum += val.loaded;
62
+ total += val.total;
63
+ }
64
+ const overallPercent = Math.round((sum / total) * 100);
65
+ document.getElementById("model-progress").value = overallPercent;
66
+ document.getElementById("progress-text").textContent = overallPercent + "%";
67
+ }
68
+ },
69
+ });
70
+ */
71
+ modelLoaderOverlay.style.display = "none";
72
+ console.log("Model loaded successfully.");
73
+ } catch (error) {
74
+ console.error("Failed to load model:", error);
75
+ modelLoaderOverlay.innerHTML = `
76
+ <h2 class="text-center text-red-500 text-xl font-semibold">Failed to Load Model</h2>
77
+ <p class="text-center text-white text-md mt-2">Please refresh the page to try again. Check the console for errors.</p>
78
+ `;
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Processes an image and generates Docling text.
84
+ * @param {ImageBitmap|HTMLImageElement} imageObject An image object to process.
85
+ */
86
+ async function process(imageObject) {
87
+
88
+ }
89
+
90
+
91
+ /**
92
+ * Manages the visibility of UI components based on the app state.
93
+ * @param {'initial'|'processing'|'result'} state The current state.
94
+ */
95
+ function setUiState(state) {
96
+ processingIndicator.classList.add("hidden");
97
+ if (state === "initial") {
98
+ // Clear previous results when going back to initial
99
+ // document.getElementById('detection-stats').innerHTML = '';
100
+ // document.getElementById('drug-matches').innerHTML = '';
101
+ generateBtn.disabled = true;
102
+ } else if (state === "processing") {
103
+ // Keep stats visible during processing, but clear matches while streaming
104
+ // document.getElementById('drug-matches').innerHTML = '';
105
+ processingIndicator.classList.remove("hidden");
106
+ generateBtn.disabled = true;
107
+ } else if (state === "result") {
108
+ // Preserve the populated stats and matches on result
109
+ generateBtn.disabled = false;
110
+ }
111
+ }
112
+
113
+
114
+ // Event Listeners
115
+ generateBtn.addEventListener("click", () => {
116
+ if (currentImage) {
117
+ processImage(currentImage);
118
+ }
119
+ });
120
+
121
+ document.addEventListener("DOMContentLoaded", () => {
122
+ setUiState("initial");
123
+ initializeModel();
124
+ });
index.html CHANGED
@@ -1,189 +1,68 @@
1
- <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
- <meta charset="UTF-8">
5
- <title>Title</title>
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
7
- <title>Medication List App</title>
8
-
9
- <script src="./index.js" type="module"></script>
10
- <script src="https://cdn.tailwindcss.com"></script>
11
- <link rel="stylesheet" href="./style.css">
12
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet"/>
13
-
14
  </head>
 
 
 
15
 
16
- <body class="bg-gray-100 text-gray-800 antialiased">
17
- <div id="model-loader-overlay"
18
- class="fixed inset-0 bg-black bg-opacity-60 flex flex-col items-center justify-center z-50">
19
- <div class="loader-large ease-linear rounded-full h-24 w-24 mb-4"></div>
20
- <h2 class="text-center text-white text-xl font-semibold">Loading Model...</h2>
21
- <p class="text-center text-white text-md mt-2">This may take a moment. The model is being downloaded to your
22
- browser.</p>
23
- <progress id="model-progress" value="0" max="100" class="w-64 mt-4 bg-gray-200 rounded-full h-2"></progress>
24
- <p id="progress-text" class="text-center text-white text-sm mt-2">0%</p>
25
- </div>
26
 
27
- <main class="container mx-auto p-4 md:p-8">
28
- <header class="text-center mb-8">
29
- <h1 class="text-4xl font-bold text-gray-900">Granite Docling WebGPU</h1>
30
- <p class="text-lg text-gray-600 mt-2">Convert document images to HTML using 🤗 Transformers.js!</p>
31
- </header>
 
32
 
33
- <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
34
- <!-- Left Panel: Image Input -->
35
- <div class="bg-white p-6 rounded-lg shadow-md">
36
- <h2 class="text-2xl font-semibold mb-4">1. Select an Image</h2>
37
 
38
- <div
39
- id="image-drop-area"
40
- class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center cursor-pointer transition-colors duration-200 hover:border-indigo-500 hover:bg-indigo-50"
41
- >
42
- <div id="image-placeholder">
43
- <svg class="mx-auto h-12 w-12 text-gray-400" stroke="currentColor" fill="none" viewBox="0 0 48 48"
44
- aria-hidden="true">
45
- <path
46
- d="M28 8H12a4 4 0 00-4 4v20m32-12v8m0 0v8a4 4 0 01-4 4H12a4 4 0 01-4-4v-4m32-4l-3.172-3.172a4 4 0 00-5.656 0L28 28M8 32l9.172-9.172a4 4 0 015.656 0L28 28m0 0l4 4m4-24h8m-4-4v8"
47
- stroke-width="2"
48
- stroke-linecap="round"
49
- stroke-linejoin="round"
50
- />
51
- </svg>
52
- <p class="mt-2 text-sm text-gray-600">
53
- <span class="font-semibold text-indigo-600">Drag and drop</span>
54
- or click to select a file
55
- </p>
56
- <p class="text-xs text-gray-500">PNG, JPG, WEBP</p>
57
- <input type="file" id="file-input" class="hidden" accept="image/*"/>
58
- </div>
59
- <div id="image-preview-container" class="hidden relative">
60
- <img id="image-preview" src="" alt="Selected image" class="mx-auto rounded-md shadow-sm"/>
61
- <button
62
- id="remove-image-btn"
63
- class="absolute top-2 right-2 z-10 bg-red-500 text-white rounded-full p-2 hover:bg-red-600 transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500"
64
- >
65
- <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
66
- <path
67
- fill-rule="evenodd"
68
- d="M4.293 4.293a1 1 0 011.414 0L10 8.586l4.293-4.293a1 1 0 111.414 1.414L11.414 10l4.293 4.293a1 1 0 01-1.414 1.414L10 11.414l-4.293 4.293a1 1 0 01-1.414-1.414L8.586 10 4.293 5.707a1 1 0 010-1.414z"
69
- clip-rule="evenodd"
70
- />
71
- </svg>
72
- </button>
73
- </div>
74
- </div>
75
 
76
- <div class="mt-4 flex">
77
- <input
78
- type="text"
79
- id="prompt-input"
80
- class="flex-1 px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
81
- value="Convert this page to docling."
82
- />
83
- <button
84
- id="generate-btn"
85
- class="ml-2 px-4 py-2 bg-indigo-600 text-white rounded-md hover:bg-indigo-700 disabled:bg-gray-400 disabled:cursor-not-allowed"
86
- >
87
- Generate
88
- </button>
89
- </div>
90
 
91
- <h3 class="text-lg font-semibold mt-6 mb-3" id="examples-title">Or try an example:</h3>
92
- <div class="flex space-x-4 overflow-x-auto" id="examples-container">
93
- <img
94
- src="./assets/document.png"
95
- class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
96
- alt="Example document"
97
- data-prompt="Convert this page to docling."
98
- title="Document parsing"
99
- />
100
- <img
101
- src="./assets/chart.png"
102
- class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
103
- alt="Example chart"
104
- data-prompt="Convert chart to OTSL."
105
- title="Chart parsing"
106
- />
107
- <img
108
- src="./assets/table.jpg"
109
- class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
110
- alt="Example table"
111
- data-prompt="Convert this table to OTSL."
112
- title="Table parsing"
113
- />
114
- <img
115
- src="./assets/code.jpg"
116
- class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
117
- alt="Example code"
118
- data-prompt="Convert code to text."
119
- title="Code parsing"
120
- />
121
  </div>
122
  </div>
123
 
124
- <!-- Middle Panel: Output -->
125
- <div class="bg-white p-6 rounded-lg shadow-md flex flex-col">
126
- <div class="flex justify-between items-center mb-4">
127
- <h2 class="text-2xl font-semibold">2. View Result</h2>
128
- <div id="processing-indicator" class="flex items-center space-x-2 text-gray-500 hidden">
129
- <div class="loader-small ease-linear rounded-full h-6 w-6"></div>
130
- <p class="text-sm">Processing image...</p>
131
- </div>
132
- <div class="flex items-center space-x-2">
133
- <span class="text-sm font-medium">Docling</span>
134
- <div class="relative inline-block w-10 mr-2 align-middle select-none transition duration-200 ease-in">
135
- <input
136
- type="checkbox"
137
- name="toggle"
138
- id="view-toggle"
139
- class="toggle-checkbox absolute block w-6 h-6 rounded-full bg-white border-4 appearance-none cursor-pointer"
140
- checked
141
- />
142
- <label for="view-toggle"
143
- class="toggle-label block overflow-hidden h-6 rounded-full bg-gray-300 cursor-pointer"></label>
144
- </div>
145
- <span class="text-sm font-medium text-indigo-600">HTML</span>
146
- </div>
147
- </div>
148
-
149
- <div id="output-container" class="flex-1 border border-gray-200 rounded-lg overflow-hidden bg-gray-50">
150
- <div id="welcome-message" class="h-full flex items-center justify-center text-center text-gray-500">
151
- <p>Select an image to see the result here.</p>
152
- </div>
153
-
154
- <!-- Docling Output -->
155
- <div id="docling-view" class="h-full p-4 hidden">
156
- <pre class="h-full whitespace-pre-wrap text-sm overflow-auto"><code
157
- id="docling-output"></code></pre>
158
- </div>
159
 
160
- <!-- HTML Output -->
161
- <div id="html-view" class="h-full w-full">
162
- <iframe id="html-iframe" sandbox="allow-scripts" class="w-full h-full border-0"></iframe>
163
- </div>
164
- </div>
165
  </div>
 
166
 
167
- <!-- Right Panel: Statistics and Drug Matches -->
168
- <div class="bg-white p-6 rounded-lg shadow-md flex flex-col min-h-[600px]">
169
- <h2 class="text-2xl font-semibold mb-4">3. Analysis Results</h2>
170
-
171
- <!-- Detection Statistics -->
172
- <div class="mb-6">
173
- <h3 class="text-lg font-semibold mb-3 text-gray-800">Detection Statistics</h3>
174
- <div id="detection-stats" class="bg-gray-50 p-4 rounded-lg"></div>
175
- </div>
176
 
177
- <!-- Drug Matches -->
178
- <div class="flex-1 min-h-[400px] flex flex-col">
179
- <h3 class="text-lg font-semibold mb-3 text-gray-800">Medication Matches</h3>
180
- <div id="drug-matches" class="bg-gray-50 p-4 rounded-lg flex-1 overflow-auto"></div>
181
- </div>
182
- </div>
183
- </div>
184
  </main>
185
 
186
- <!-- Hidden canvas for image processing -->
187
- <canvas id="hidden-canvas" class="hidden"></canvas>
188
  </body>
189
  </html>
 
1
+ <!doctype html>
2
  <html lang="en">
3
  <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Browser LLM Evaluation</title>
7
+ <link rel="stylesheet" href="styles.css">
8
+ <!-- transformers.js CDN - adjust to latest release if needed -->
9
+ <script src="https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"></script>
 
 
 
 
10
  </head>
11
+ <body>
12
+ <main class="container">
13
+ <h1>Browser LLM Evaluation</h1>
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ <section class="grid">
17
+ <div class="card">
18
+ <h2>Cloud (OpenRouter)</h2>
19
+ <label>API Key <input id="apiKey" type="password" placeholder="sk-..." /></label>
20
+ <label>Model <input id="cloudModel" value="gpt-4o-mini" /></label>
21
+ </div>
22
 
 
 
 
 
23
 
24
+ <div class="card">
25
+ <h2>On-Device</h2>
26
+ <label>Model (transformers.js) <input id="deviceModel" value="distilgpt2" /></label>
27
+ <div id="deviceStatus">Not loaded</div>
28
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ <div class="card">
32
+ <h2>Request Pattern</h2>
33
+ <select id="patternSelect">
34
+ <option value="once-per-sec">1 request / sec</option>
35
+ <option value="ten-per-sec">10 requests / sec</option>
36
+ <option value="batch-10-every-5s">Batch: 10 every 5s</option>
37
+ <option value="burst">Burst: 50 then idle</option>
38
+ </select>
39
+ <label>Route strategy
40
+ <select id="routeStrategy">
41
+ <option value="roundrobin">Round Robin</option>
42
+ <option value="probabilistic">Probabilistic (p to cloud)</option>
43
+ <option value="always_cloud">Always cloud</option>
44
+ <option value="always_device">Always device</option>
45
+ </select>
46
+ </label>
47
+ <label>Cloud probability (for probabilistic) <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5"/></label>
48
+ <div class="buttons">
49
+ <button id="startBtn">Start</button>
50
+ <button id="stopBtn" disabled>Stop</button>
 
 
 
 
 
 
 
 
 
 
51
  </div>
52
  </div>
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ <div class="card wide">
56
+ <h2>Live Log & Results</h2>
57
+ <div id="log" class="log"></div>
58
+ <div id="stats"></div>
 
59
  </div>
60
+ </section>
61
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
 
 
 
 
63
  </main>
64
 
65
+
66
+ <script type="module" src="./src/main.js"></script>
67
  </body>
68
  </html>
src/evaluator.js ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Evaluator class to run multiple evaluation metrics for a tasks such as exact text matching.
3
+ */
4
+ export class Evaluator{
5
+ constructor(){ }
6
+
7
+ /**
8
+ * Run multiple metrics for a prediction against the ground truth and return the results.
9
+ *
10
+ * @param pred - Predicted string
11
+ * @param truth - Ground truth string
12
+ * @returns {{exact: number, f1: (number|*)}}
13
+ */
14
+ evaluate(pred, truth){
15
+ return { exact: this._exactMatch(pred, truth), f1: this._myMetric(pred, truth) };
16
+ }
17
+
18
+ /**
19
+ * Check the prediction for exact match against the ground truth
20
+ *
21
+ * @param pred - Predicted string
22
+ * @param truth- Ground truth string
23
+ * @returns {number}
24
+ * @private
25
+ */
26
+ _exactMatch(pred, truth){
27
+ return this._normalize(pred) === this._normalize(truth) ? 1 : 0;
28
+ }
29
+
30
+
31
+ /**
32
+ * TODO: Implement custom metric for classification or NER task.
33
+ *
34
+ * @param pred - Predicted string
35
+ * @param truth - Ground truth string
36
+ * @private
37
+ */
38
+ _myMetric(pred, truth){
39
+ return 0;
40
+ }
41
+
42
+
43
+ /**
44
+ * Normalize a string to avoid false negatives due to spaces or capitalization
45
+ * Convert input to a string in case it is not already
46
+ *
47
+ * @param s - Input string
48
+ * @returns {string}
49
+ * @private
50
+ */
51
+ _normalize(s){
52
+ return String(s||'').trim().toLowerCase();
53
+ }
54
+
55
+ }
src/main.js ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {JobScheduler} from './scheduler.js';
2
+ import {RequestManager} from './requestManager.js';
3
+ import {OnDeviceService} from './services/onDeviceService.js';
4
+ import {CloudService} from './services/cloudService.js';
5
+ import {Evaluator} from './evaluator.js';
6
+ import {logTo} from './utils.js';
7
+
8
+
9
+ // get references to html elements
10
+ const logEl = document.getElementById('log');
11
+ const statsEl = document.getElementById('stats');
12
+ const deviceStatusEl = document.getElementById('deviceStatus');
13
+
14
+
15
+ // instantiate services and components
16
+ const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
17
+ const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
18
+ const evaluator = new Evaluator();
19
+
20
+
21
+ const requestManager = new RequestManager({
22
+ deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
23
+ logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} f1=${evt.evalRes.f1.toFixed(2)}`);
24
+ updateStats();
25
+ }
26
+ });
27
+
28
+
29
+ // instantiate the job scheduler with some mock prompts TODO: replace with real prompts
30
+ const scheduler = new JobScheduler([
31
+ {prompt: 'Translate to German: Hello world', groundTruth: 'Hallo Welt'},
32
+ {
33
+ prompt: 'What is 3*6?',
34
+ groundTruth: '18'
35
+ },
36
+ {prompt: 'Answer: What is 2+2?', groundTruth: '4'},
37
+ {prompt: 'What is the capital of switzerland?', groundTruth: 'Bern'}
38
+ ]);
39
+
40
+
41
+ scheduler.onJob(async (job) => {
42
+ await requestManager.handle(job);
43
+ });
44
+
45
+
46
+ // add event listeners for configuration inputs
47
+ document.getElementById('deviceModel').addEventListener('change', (e) =>
48
+ onDeviceInferenceService.updateConfig({modelName: e.target.value})
49
+ );
50
+ document.getElementById('cloudModel').addEventListener('change', (e) =>
51
+ cloudInferenceService.updateConfig({model: e.target.value})
52
+ );
53
+ document.getElementById('apiKey').addEventListener('input', (e) =>
54
+ cloudInferenceService.updateConfig({apiKey: e.target.value})
55
+ );
56
+
57
+ // add event listener for run button
58
+ document.getElementById('startBtn').addEventListener('click', async () => {
59
+
60
+ // toggle start and stop buttons
61
+ document.getElementById('startBtn').disabled = true;
62
+ document.getElementById('stopBtn').disabled = false;
63
+
64
+ // get configuration values from UI
65
+ const pattern = document.getElementById('patternSelect').value;
66
+ const routeStrategy = document.getElementById('routeStrategy').value;
67
+ const cloudProb = parseFloat(document.getElementById('cloudProb').value);
68
+
69
+ // update request manager routing strategy
70
+ requestManager.updateRouting({routeStrategy, cloudProb});
71
+
72
+
73
+ // TODO Adjust that the model is loaded with a button such that user can see loading status and trigger loading before starting
74
+ // starting is only available when model is loaded
75
+ if (routeStrategy !== 'always_cloud' && !onDeviceInferenceService.isReady()) {
76
+ await loadDeviceModel();
77
+ }
78
+
79
+ // start the job scheduler with the selected pattern
80
+ scheduler.startPattern(pattern);
81
+ });
82
+
83
+
84
+ document.getElementById('stopBtn').addEventListener('click', () => {
85
+ scheduler.stop();
86
+ document.getElementById('startBtn').disabled = false;
87
+ document.getElementById('stopBtn').disabled = true;
88
+ });
89
+
90
+
91
+ async function loadDeviceModel() {
92
+ deviceStatusEl.textContent = 'Loading...';
93
+ try {
94
+ await onDeviceInferenceService.load((s) => deviceStatusEl.textContent = s);
95
+ deviceStatusEl.textContent = 'Ready';
96
+ } catch (e) {
97
+ deviceStatusEl.textContent = `Error: ${e.message}`;
98
+ }
99
+ }
100
+
101
+
102
+ function updateStats() {
103
+ const s = requestManager.stats;
104
+ statsEl.innerHTML = `<pre>Processed: ${s.count}\nCloud: ${s.cloud}\nDevice: ${s.device}\nAvg latency (ms): ${s.count ? (s.totalLatencyMs / s.count).toFixed(1) : 0}\nRecent evaluations: ${Math.min(10, s.evaluations.length)}</pre>`;
105
+ }
src/requestManager.js ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {measureAsync} from './utils.js';
2
+
3
+
4
+ /**
5
+ * RequestManager routes inference requests to on-device or cloud services based on a routing strategy and configurations.
6
+ * The manager does orchestrate the inference requests, collects statistics, evaluates the results and returns the final statistic.
7
+ *
8
+ * We provide different routing strategies:
9
+ * - always_cloud: all requests go to cloud
10
+ * - always_device: all requests go to device
11
+ * - probabilistic: each request goes to cloud with a defined probability
12
+ * - roundrobin: requests alternate between cloud and device
13
+ *
14
+ *
15
+ */
16
+ export class RequestManager {
17
+ constructor({
18
+ deviceService,
19
+ cloudService,
20
+ evaluator,
21
+ logger = null,
22
+ routeStrategy = 'roundrobin',
23
+ cloudProb = 0.5
24
+ } = {}) {
25
+
26
+ /**
27
+ * On-device inference service
28
+ */
29
+ this.device = deviceService;
30
+
31
+ /**
32
+ * Cloud inference service
33
+ */
34
+ this.cloud = cloudService;
35
+
36
+ /**
37
+ * Evaluator instance for evaluating inference results
38
+ */
39
+ this.evaluator = evaluator;
40
+
41
+ /**
42
+ * Optional logger callback function
43
+ * @type {null}
44
+ */
45
+ this.logger = logger;
46
+
47
+ /**
48
+ * Routing strategy (always_cloud, always_device, probabilistic, roundrobin)
49
+ * @type {string}
50
+ */
51
+ this.routeStrategy = routeStrategy;
52
+
53
+ /**
54
+ * Probability of routing to cloud when using 'probabilistic' strategy
55
+ * @type {number}
56
+ */
57
+ this.cloudProb = cloudProb;
58
+
59
+ /**
60
+ * Internal round robin counter (even = cloud, odd = device)
61
+ * @type {number}
62
+ * @private
63
+ */
64
+ this._rrCounter = 0;
65
+
66
+ /**
67
+ * Statistics about routing and evaluations of this job run
68
+ * @type {{cloud: number, evaluations: *[], count: number, device: number, totalLatencyMs: number}}
69
+ */
70
+ this.stats = {count: 0, cloud: 0, device: 0, totalLatencyMs: 0, evaluations: []};
71
+ }
72
+
73
+
74
+ /**
75
+ * Update routing configuration
76
+ *
77
+ * @param routeStrategy - New routing strategy
78
+ * @param cloudProb - New cloud probability for 'probabilistic' strategy
79
+ */
80
+ updateRouting({routeStrategy, cloudProb}) {
81
+ if (routeStrategy) this.routeStrategy = routeStrategy;
82
+ if (cloudProb !== undefined) this.cloudProb = cloudProb;
83
+ }
84
+
85
+
86
+ /**
87
+ * Handle a single inference job by routing it to the appropriate service,
88
+ * performing inference, evaluating the result, and recording statistics.
89
+ *
90
+ * @param job
91
+ * @returns {Promise<{route: string, latency: number, text: string, job, evalRes: (*|XPathResult|{exact: *, f1: *})}>}
92
+ */
93
+ async handle(job) {
94
+ // get routing strategy and inference service
95
+ const route = this._choose(job);
96
+ const service = this._getInferenceService(route);
97
+
98
+ let text, latency;
99
+ try {
100
+ const {res, ms} = await measureAsync(() => service.infer(job.prompt));
101
+ text = res;
102
+ latency = ms;
103
+ } catch (err) {
104
+ text = `__error__:${err.message}`;
105
+ latency = -1;
106
+ }
107
+
108
+ // evaluate result and store results
109
+ const evalRes = this.evaluator.evaluate(text, job.groundTruth);
110
+ this._record(route, latency, evalRes, job, text);
111
+
112
+ // logging the result
113
+ if (this.logger) this.logger({job, route, latency, evalRes, text});
114
+
115
+ return {job, route, latency, evalRes, text};
116
+ }
117
+
118
+
119
+ /**
120
+ * Get the inference service based on the selected route.
121
+ * Could be extended with more services in the future.
122
+ *
123
+ * @param route - The selected route ('cloud' or 'device')
124
+ * @returns {*}
125
+ * @private
126
+ */
127
+ _getInferenceService(route) {
128
+ return route === 'cloud' ? this.cloud : this.device;
129
+ }
130
+
131
+ /**
132
+ * Choose the route for the given job based on the routing strategy.
133
+ *
134
+ * TODO: extend routing to be based on the job characteristics (e.g., prompt length, expected latency, etc.)
135
+ *
136
+ * @param job - The job object (not used in current strategies, could be used for more advanced routing)
137
+ * @returns {string|string}
138
+ * @private
139
+ */
140
+ _choose(job) {
141
+ if (this.routeStrategy === 'always_cloud') return 'cloud';
142
+ if (this.routeStrategy === 'always_device') return 'device';
143
+ if (this.routeStrategy === 'probabilistic') return Math.random() < this.cloudProb ? 'cloud' : 'device';
144
+ // default round robin
145
+ this._rrCounter++;
146
+ return (this._rrCounter % 2 === 0) ? 'cloud' : 'device';
147
+ }
148
+
149
+
150
+ /**
151
+ * Record statistics for the given job evaluation.
152
+ * Increases counters for total requests and cloud/device usage.
153
+ * Updates the total latency.
154
+ *
155
+ * @param route - The route taken ('cloud' or 'device')
156
+ * @param latency - Latency in milliseconds
157
+ * @param evalRes - Evaluation result object
158
+ * @param job - The job object
159
+ * @param text - The inference result text
160
+ * @private
161
+ */
162
+ _record(route, latency, evalRes, job, text) {
163
+ this.stats.count++;
164
+ if (route === 'cloud') this.stats.cloud++; else this.stats.device++;
165
+ if (latency > 0) this.stats.totalLatencyMs += latency;
166
+ this.stats.evaluations.push({jobId: job.id, route, latency, evalRes, text});
167
+ }
168
+ }
src/scheduler.js ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {sleep} from './utils.js';
2
+
3
+
4
+ /**
5
+ * JobScheduler emits jobs based on predefined patterns.
6
+ * Can be used to simulate different load scenarios like batch processing or on-request per second
7
+ */
8
+ export class JobScheduler {
9
+ constructor(promptSource = []) {
10
+ this.promptSource = promptSource;
11
+ this.running = false;
12
+ this._onJob = null; // callback
13
+ }
14
+
15
+
16
+ onJob(cb) {
17
+ this._onJob = cb;
18
+ }
19
+
20
+
21
+ /**
22
+ * Start emitting jobs based on the selected pattern
23
+ * TODO: Implement different patterns to simulate
24
+ * TODO: Run different datasets instead of just simple prompts
25
+ * @param patternName
26
+ * @returns {Promise<void>}
27
+ */
28
+ async startPattern(patternName) {
29
+ this.running = true;
30
+
31
+ // once per second until user stopp evaluation
32
+ if (patternName === 'once-per-sec') {
33
+ let i = 0;
34
+ while (this.running) {
35
+ this._emit(i++);
36
+ await sleep(1000);
37
+ }
38
+ } else if (patternName === 'ten-per-sec') {
39
+ let i = 0;
40
+ const interval = 100; // ms
41
+ while (this.running) {
42
+ this._emit(i++);
43
+ await sleep(interval);
44
+ }
45
+ } else if (patternName === 'batch-10-every-5s') {
46
+ let i = 0;
47
+ while (this.running) {
48
+ for (let j = 0; j < 10 && this.running; j++) this._emit(i++);
49
+ await sleep(5000);
50
+ }
51
+ } else if (patternName === 'burst') {
52
+ // single burst
53
+ for (let i = 0; i < 50; i++) this._emit(i);
54
+ this.running = false;
55
+ }
56
+ }
57
+
58
+
59
+ /**
60
+ * Stop emitting jobs
61
+ */
62
+ stop() {
63
+ this.running = false;
64
+ }
65
+
66
+ _pickPrompt(id) {
67
+ if (this.promptSource.length === 0) return {prompt: `Hello world ${id}`, groundTruth: `Hello world ${id}`};
68
+ return this.promptSource[id % this.promptSource.length];
69
+ }
70
+
71
+
72
+ _emit(id) {
73
+ if (this._onJob) {
74
+ const p = this._pickPrompt(id);
75
+ const job = {id: `job-${Date.now()}-${id}`, prompt: p.prompt, groundTruth: p.groundTruth};
76
+ this._onJob(job);
77
+ }
78
+ }
79
+ }
src/services/cloudService.js ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
2
+
3
+ /**
4
+ * Cloud inference service using a remote API from OpenRouter to access different models over one API.
5
+ *
6
+ */
7
+ export class CloudService {
8
+ constructor({apiKey, model} = {}) {
9
+ this.apiKey = apiKey;
10
+ this.model = model || 'gpt-4o-mini';
11
+ }
12
+
13
+
14
+ /**
15
+ * Update configuration with new values
16
+ *
17
+ * @param apiKey - The API key for authentication
18
+ * @param model - The name of the model to use
19
+ */
20
+ updateConfig({apiKey, model}) {
21
+ if (apiKey) this.apiKey = apiKey;
22
+ if (model) this.model = model;
23
+ }
24
+
25
+
26
+ /**
27
+ * Perform inference on the cloud service
28
+ *
29
+ * @param prompt - The input prompt string
30
+ * @returns {Promise<string>}
31
+ */
32
+ async infer(prompt) {
33
+ if (!this.apiKey) throw new Error('No API key set for CloudService');
34
+
35
+ const payload = {
36
+ model: this.model,
37
+ messages: [{role: 'user', content: prompt}]
38
+ };
39
+
40
+ // call the api
41
+ const resp = await fetch('https://api.openrouter.ai/v1/chat/completions', {
42
+ method: 'POST',
43
+ headers: {
44
+ 'Content-Type': 'application/json',
45
+ 'Authorization': `Bearer ${this.apiKey}`
46
+ },
47
+ body: JSON.stringify(payload)
48
+ });
49
+
50
+ // check for errors during request
51
+ if (!resp.ok) {
52
+ const text = await resp.text();
53
+ throw new Error(`Cloud inference failed: ${resp.status} ${text}`);
54
+ }
55
+
56
+ const json = await resp.json();
57
+
58
+ // TODO check parsing of response for model provider
59
+ let text = '';
60
+ try {
61
+ if (json.choices && json.choices[0]) {
62
+ text = json.choices[0].message?.content || json.choices[0].text || '';
63
+ } else if (json.output) {
64
+ text = Array.isArray(json.output) ? json.output.join('\n') : json.output;
65
+ }
66
+ } catch (e) {
67
+ text = JSON.stringify(json).slice(0, 200);
68
+ }
69
+ return text;
70
+ }
71
+ }
src/services/onDeviceService.js ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // OnDeviceService: uses transformers.js to run a small causal LM in browser
2
+ // Requires the transformers.js script loaded in index.html (cdn).
3
+
4
+
5
+ /**
6
+ * On device llm inference service using transformers.js
7
+ * TODO Implement this class!
8
+ */
9
+ export class OnDeviceService {
10
+ constructor({modelName = 'distilgpt2'} = {}) {
11
+ this.modelName = modelName;
12
+ this._ready = false;
13
+ this._model = null;
14
+ }
15
+
16
+
17
+ /**
18
+ * Load the model into memory to be ready for inference.
19
+ * Download the model if not already cached. Cache the model for future use.
20
+ * TODO Download models from a model hub like HuggingFace using transformers.js
21
+ *
22
+ * @param progressCb
23
+ * @returns {Promise<void>}
24
+ */
25
+ async load(progressCb) {
26
+
27
+ }
28
+
29
+
30
+ /**
31
+ * Returns if the model is loaded and ready for inference
32
+ * @returns {boolean}
33
+ */
34
+ isReady() {
35
+ return this._ready;
36
+ }
37
+
38
+
39
+ /**
40
+ * Perform inference on the on-device model
41
+ * TODO Implement inference
42
+ *
43
+ * @param prompt - The input prompt string
44
+ * @param maxNewTokens - Maximum number of new tokens to generate
45
+ * @returns {Promise<string>}
46
+ */
47
+ async infer(prompt, {maxNewTokens = 50} = {}) {
48
+ return "The Answer is 42!";
49
+ }
50
+
51
+ /**
52
+ * Update configuration with new values
53
+ *
54
+ * @param modelName - The name of the model to use
55
+ */
56
+ updateConfig({modelName}) {
57
+ if (modelName) this.modelName = modelName;
58
+ }
59
+ }
src/utils.js ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // helpers: timing and small utilities
2
+ export function nowMs() {
3
+ return performance.now();
4
+ }
5
+
6
+ export function sleep(ms) {
7
+ return new Promise(res => setTimeout(res, ms));
8
+ }
9
+
10
+
11
+ export function measureAsync(fn) {
12
+ const start = nowMs();
13
+ return Promise.resolve()
14
+ .then(() => fn())
15
+ .then(res => ({res, ms: nowMs() - start}));
16
+ }
17
+
18
+
19
+ /**
20
+ * Log text to a given HTML element with timestamp to show the log in the UI
21
+ *
22
+ * @param el - HTML element to log to
23
+ * @param text - Text to log
24
+ */
25
+ export function logTo(el, text) {
26
+ if (!el) return;
27
+ const p = document.createElement('div');
28
+ p.textContent = `[${new Date().toLocaleTimeString()}] ${text}`;
29
+ el.appendChild(p);
30
+ el.scrollTop = el.scrollHeight;
31
+ }
styles.css ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ font-family: system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial
3
+ }
4
+
5
+ body {
6
+ margin: 0;
7
+ padding: 16px;
8
+ background: #f5f7fb
9
+ }
10
+
11
+ .container {
12
+ max-width: 1100px;
13
+ margin: 0 auto
14
+ }
15
+
16
+ .grid {
17
+ display: grid;
18
+ grid-template-columns:repeat(3, 1fr);
19
+ gap: 12px
20
+ }
21
+
22
+ .card {
23
+ background: white;
24
+ padding: 12px;
25
+ border-radius: 8px;
26
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.06)
27
+ }
28
+
29
+ .card.wide {
30
+ grid-column: 1/4
31
+ }
32
+
33
+ .log {
34
+ height: 220px;
35
+ overflow: auto;
36
+ background: #111;
37
+ color: #0f0;
38
+ padding: 8px;
39
+ font-family: monospace
40
+ }
41
+
42
+ .buttons {
43
+ margin-top: 8px
44
+ }
45
+
46
+ label {
47
+ display: block;
48
+ margin: 6px 0
49
+ }
50
+
51
+ input, select {
52
+ width: 100%;
53
+ padding: 6px;
54
+ margin-top: 4px
55
+ }