Spaces:

fhueni
/

on-device-vs-cloud-llm-inference

Running

App Files Files Community

fhueni commited on Nov 11

Commit

bfaf968

1 Parent(s): a811362

feat: setup base code structure for pipeline with todos

Browse files

Files changed (11) hide show

.gitignore +1 -1
archive/index.js +124 -0
index.html +48 -169
src/evaluator.js +55 -0
src/main.js +105 -0
src/requestManager.js +168 -0
src/scheduler.js +79 -0
src/services/cloudService.js +71 -0
src/services/onDeviceService.js +59 -0
src/utils.js +31 -0
styles.css +55 -0

.gitignore CHANGED Viewed

@@ -1,5 +1,5 @@
 # Project specific
-drugs.csv
 # macOS system files
 .DS_Store

 # Project specific
+medicationlist/drugs.csv
 # macOS system files
 .DS_Store

archive/index.js ADDED Viewed

	@@ -0,0 +1,124 @@

+import {
+    AutoProcessor,
+    AutoModelForVision2Seq,
+    AutoModelForQuestionAnswering,
+    RawImage,
+    TextStreamer,
+    pipeline
+} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers";
+const modelLoaderOverlay = document.getElementById("model-loader-overlay");
+const processingIndicator = document.getElementById("processing-indicator");
+const promptInput = document.getElementById("prompt-input");
+const generateBtn = document.getElementById("process-btn");
+let model, processor;
+let currentImage = null;
+/**
+ * Loads and initializes the model and processor.
+ */
+async function initializeModel() {
+    try {
+        const model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct";
+        // processor = await AutoProcessor.from_pretrained(model_id);
+        const progress = {};
+        model ??= pipeline('translation', 'Xenova/nllb-200-distilled-600M', {
+            progress_callback: (data) => {
+                if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
+                    progress[data.file] = data;
+                    if (Object.keys(progress).length !== 3) return;
+                    let sum = 0;
+                    let total = 0;
+                    for (const [key, val] of Object.entries(progress)) {
+                        sum += val.loaded;
+                        total += val.total;
+                    }
+                    const overallPercent = Math.round((sum / total) * 100);
+                    document.getElementById("model-progress").value = overallPercent;
+                    document.getElementById("progress-text").textContent = overallPercent + "%";
+                }
+            }
+        });
+        /*
+        model = await AutoModelForQuestionAnswering.from_pretrained(model_id, {
+            dtype: {
+                embed_tokens: "fp16",
+                decoder_model_merged: "fp32",
+            },
+            device: "webgpu",
+            progress_callback: (data) => {
+                if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
+                    progress[data.file] = data;
+                    if (Object.keys(progress).length !== 3) return;
+                    let sum = 0;
+                    let total = 0;
+                    for (const [key, val] of Object.entries(progress)) {
+                        sum += val.loaded;
+                        total += val.total;
+                    }
+                    const overallPercent = Math.round((sum / total) * 100);
+                    document.getElementById("model-progress").value = overallPercent;
+                    document.getElementById("progress-text").textContent = overallPercent + "%";
+                }
+            },
+        });
+        */
+        modelLoaderOverlay.style.display = "none";
+        console.log("Model loaded successfully.");
+    } catch (error) {
+        console.error("Failed to load model:", error);
+        modelLoaderOverlay.innerHTML = `
+            <h2 class="text-center text-red-500 text-xl font-semibold">Failed to Load Model</h2>
+            <p class="text-center text-white text-md mt-2">Please refresh the page to try again. Check the console for errors.</p>
+        `;
+    }
+}
+/**
+ * Processes an image and generates Docling text.
+ * @param {ImageBitmap|HTMLImageElement} imageObject An image object to process.
+ */
+async function process(imageObject) {
+}
+/**
+ * Manages the visibility of UI components based on the app state.
+ * @param {'initial'|'processing'|'result'} state The current state.
+ */
+function setUiState(state) {
+    processingIndicator.classList.add("hidden");
+    if (state === "initial") {
+        // Clear previous results when going back to initial
+        // document.getElementById('detection-stats').innerHTML = '';
+        // document.getElementById('drug-matches').innerHTML = '';
+        generateBtn.disabled = true;
+    } else if (state === "processing") {
+        // Keep stats visible during processing, but clear matches while streaming
+        // document.getElementById('drug-matches').innerHTML = '';
+        processingIndicator.classList.remove("hidden");
+        generateBtn.disabled = true;
+    } else if (state === "result") {
+        // Preserve the populated stats and matches on result
+        generateBtn.disabled = false;
+    }
+}
+// Event Listeners
+generateBtn.addEventListener("click", () => {
+    if (currentImage) {
+        processImage(currentImage);
+    }
+});
+document.addEventListener("DOMContentLoaded", () => {
+    setUiState("initial");
+    initializeModel();
+});

index.html CHANGED Viewed

@@ -1,189 +1,68 @@
-<!DOCTYPE html>
 <html lang="en">
 <head>
-    <meta charset="UTF-8">
-    <title>Title</title>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>Medication List App</title>
-    <script src="./index.js" type="module"></script>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <link rel="stylesheet" href="./style.css">
-    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet"/>
 </head>
-<body class="bg-gray-100 text-gray-800 antialiased">
-<div id="model-loader-overlay"
-     class="fixed inset-0 bg-black bg-opacity-60 flex flex-col items-center justify-center z-50">
-    <div class="loader-large ease-linear rounded-full h-24 w-24 mb-4"></div>
-    <h2 class="text-center text-white text-xl font-semibold">Loading Model...</h2>
-    <p class="text-center text-white text-md mt-2">This may take a moment. The model is being downloaded to your
-        browser.</p>
-    <progress id="model-progress" value="0" max="100" class="w-64 mt-4 bg-gray-200 rounded-full h-2"></progress>
-    <p id="progress-text" class="text-center text-white text-sm mt-2">0%</p>
-</div>
-<main class="container mx-auto p-4 md:p-8">
-    <header class="text-center mb-8">
-        <h1 class="text-4xl font-bold text-gray-900">Granite Docling WebGPU</h1>
-        <p class="text-lg text-gray-600 mt-2">Convert document images to HTML using 🤗 Transformers.js!</p>
-    </header>
-    <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
-        <!-- Left Panel: Image Input -->
-        <div class="bg-white p-6 rounded-lg shadow-md">
-            <h2 class="text-2xl font-semibold mb-4">1. Select an Image</h2>
-            <div
-                    id="image-drop-area"
-                    class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center cursor-pointer transition-colors duration-200 hover:border-indigo-500 hover:bg-indigo-50"
-            >
-                <div id="image-placeholder">
-                    <svg class="mx-auto h-12 w-12 text-gray-400" stroke="currentColor" fill="none" viewBox="0 0 48 48"
-                         aria-hidden="true">
-                        <path
-                                d="M28 8H12a4 4 0 00-4 4v20m32-12v8m0 0v8a4 4 0 01-4 4H12a4 4 0 01-4-4v-4m32-4l-3.172-3.172a4 4 0 00-5.656 0L28 28M8 32l9.172-9.172a4 4 0 015.656 0L28 28m0 0l4 4m4-24h8m-4-4v8"
-                                stroke-width="2"
-                                stroke-linecap="round"
-                                stroke-linejoin="round"
-                        />
-                    </svg>
-                    <p class="mt-2 text-sm text-gray-600">
-                        <span class="font-semibold text-indigo-600">Drag and drop</span>
-                        or click to select a file
-                    </p>
-                    <p class="text-xs text-gray-500">PNG, JPG, WEBP</p>
-                    <input type="file" id="file-input" class="hidden" accept="image/*"/>
-                </div>
-                <div id="image-preview-container" class="hidden relative">
-                    <img id="image-preview" src="" alt="Selected image" class="mx-auto rounded-md shadow-sm"/>
-                    <button
-                            id="remove-image-btn"
-                            class="absolute top-2 right-2 z-10 bg-red-500 text-white rounded-full p-2 hover:bg-red-600 transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500"
-                    >
-                        <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
-                            <path
-                                    fill-rule="evenodd"
-                                    d="M4.293 4.293a1 1 0 011.414 0L10 8.586l4.293-4.293a1 1 0 111.414 1.414L11.414 10l4.293 4.293a1 1 0 01-1.414 1.414L10 11.414l-4.293 4.293a1 1 0 01-1.414-1.414L8.586 10 4.293 5.707a1 1 0 010-1.414z"
-                                    clip-rule="evenodd"
-                            />
-                        </svg>
-                    </button>
-                </div>
-            </div>
-            <div class="mt-4 flex">
-                <input
-                        type="text"
-                        id="prompt-input"
-                        class="flex-1 px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
-                        value="Convert this page to docling."
-                />
-                <button
-                        id="generate-btn"
-                        class="ml-2 px-4 py-2 bg-indigo-600 text-white rounded-md hover:bg-indigo-700 disabled:bg-gray-400 disabled:cursor-not-allowed"
-                >
-                    Generate
-                </button>
-            </div>
-            <h3 class="text-lg font-semibold mt-6 mb-3" id="examples-title">Or try an example:</h3>
-            <div class="flex space-x-4 overflow-x-auto" id="examples-container">
-                <img
-                        src="./assets/document.png"
-                        class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
-                        alt="Example document"
-                        data-prompt="Convert this page to docling."
-                        title="Document parsing"
-                />
-                <img
-                        src="./assets/chart.png"
-                        class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
-                        alt="Example chart"
-                        data-prompt="Convert chart to OTSL."
-                        title="Chart parsing"
-                />
-                <img
-                        src="./assets/table.jpg"
-                        class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
-                        alt="Example table"
-                        data-prompt="Convert this table to OTSL."
-                        title="Table parsing"
-                />
-                <img
-                        src="./assets/code.jpg"
-                        class="example-image h-36 w-auto border-2 border-gray-200 rounded-md cursor-pointer hover:border-indigo-500 transition-colors"
-                        alt="Example code"
-                        data-prompt="Convert code to text."
-                        title="Code parsing"
-                />
             </div>
         </div>
-        <!-- Middle Panel: Output -->
-        <div class="bg-white p-6 rounded-lg shadow-md flex flex-col">
-            <div class="flex justify-between items-center mb-4">
-                <h2 class="text-2xl font-semibold">2. View Result</h2>
-                <div id="processing-indicator" class="flex items-center space-x-2 text-gray-500 hidden">
-                    <div class="loader-small ease-linear rounded-full h-6 w-6"></div>
-                    <p class="text-sm">Processing image...</p>
-                </div>
-                <div class="flex items-center space-x-2">
-                    <span class="text-sm font-medium">Docling</span>
-                    <div class="relative inline-block w-10 mr-2 align-middle select-none transition duration-200 ease-in">
-                        <input
-                                type="checkbox"
-                                name="toggle"
-                                id="view-toggle"
-                                class="toggle-checkbox absolute block w-6 h-6 rounded-full bg-white border-4 appearance-none cursor-pointer"
-                                checked
-                        />
-                        <label for="view-toggle"
-                               class="toggle-label block overflow-hidden h-6 rounded-full bg-gray-300 cursor-pointer"></label>
-                    </div>
-                    <span class="text-sm font-medium text-indigo-600">HTML</span>
-                </div>
-            </div>
-            <div id="output-container" class="flex-1 border border-gray-200 rounded-lg overflow-hidden bg-gray-50">
-                <div id="welcome-message" class="h-full flex items-center justify-center text-center text-gray-500">
-                    <p>Select an image to see the result here.</p>
-                </div>
-                <!-- Docling Output -->
-                <div id="docling-view" class="h-full p-4 hidden">
-                    <pre class="h-full whitespace-pre-wrap text-sm overflow-auto"><code
-                            id="docling-output"></code></pre>
-                </div>
-                <!-- HTML Output -->
-                <div id="html-view" class="h-full w-full">
-                    <iframe id="html-iframe" sandbox="allow-scripts" class="w-full h-full border-0"></iframe>
-                </div>
-            </div>
         </div>
-        <!-- Right Panel: Statistics and Drug Matches -->
-        <div class="bg-white p-6 rounded-lg shadow-md flex flex-col min-h-[600px]">
-            <h2 class="text-2xl font-semibold mb-4">3. Analysis Results</h2>
-            <!-- Detection Statistics -->
-            <div class="mb-6">
-                <h3 class="text-lg font-semibold mb-3 text-gray-800">Detection Statistics</h3>
-                <div id="detection-stats" class="bg-gray-50 p-4 rounded-lg"></div>
-            </div>
-            <!-- Drug Matches -->
-            <div class="flex-1 min-h-[400px] flex flex-col">
-                <h3 class="text-lg font-semibold mb-3 text-gray-800">Medication Matches</h3>
-                <div id="drug-matches" class="bg-gray-50 p-4 rounded-lg flex-1 overflow-auto"></div>
-            </div>
-        </div>
-    </div>
 </main>
-<!-- Hidden canvas for image processing -->
-<canvas id="hidden-canvas" class="hidden"></canvas>
 </body>
 </html>

+<!doctype html>
 <html lang="en">
 <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>Browser LLM Evaluation</title>
+    <link rel="stylesheet" href="styles.css">
+    <!-- transformers.js CDN - adjust to latest release if needed -->
+    <script src="https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"></script>
 </head>
+<body>
+<main class="container">
+    <h1>Browser LLM Evaluation</h1>
+    <section class="grid">
+        <div class="card">
+            <h2>Cloud (OpenRouter)</h2>
+            <label>API Key <input id="apiKey" type="password" placeholder="sk-..." /></label>
+            <label>Model <input id="cloudModel" value="gpt-4o-mini" /></label>
+        </div>
+        <div class="card">
+            <h2>On-Device</h2>
+            <label>Model (transformers.js) <input id="deviceModel" value="distilgpt2" /></label>
+            <div id="deviceStatus">Not loaded</div>
+        </div>
+        <div class="card">
+            <h2>Request Pattern</h2>
+            <select id="patternSelect">
+                <option value="once-per-sec">1 request / sec</option>
+                <option value="ten-per-sec">10 requests / sec</option>
+                <option value="batch-10-every-5s">Batch: 10 every 5s</option>
+                <option value="burst">Burst: 50 then idle</option>
+            </select>
+            <label>Route strategy
+                <select id="routeStrategy">
+                    <option value="roundrobin">Round Robin</option>
+                    <option value="probabilistic">Probabilistic (p to cloud)</option>
+                    <option value="always_cloud">Always cloud</option>
+                    <option value="always_device">Always device</option>
+                </select>
+            </label>
+            <label>Cloud probability (for probabilistic) <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5"/></label>
+            <div class="buttons">
+                <button id="startBtn">Start</button>
+                <button id="stopBtn" disabled>Stop</button>
             </div>
         </div>
+        <div class="card wide">
+            <h2>Live Log & Results</h2>
+            <div id="log" class="log"></div>
+            <div id="stats"></div>
         </div>
+    </section>
 </main>
+<script type="module" src="./src/main.js"></script>
 </body>
 </html>

src/evaluator.js ADDED Viewed

	@@ -0,0 +1,55 @@

+/**
+ * Evaluator class to run multiple evaluation metrics for a tasks such as exact text matching.
+ */
+export class Evaluator{
+    constructor(){ }
+    /**
+     * Run multiple metrics for a prediction against the ground truth and return the results.
+     *
+     * @param pred - Predicted string
+     * @param truth - Ground truth string
+     * @returns {{exact: number, f1: (number|*)}}
+     */
+    evaluate(pred, truth){
+        return { exact: this._exactMatch(pred, truth), f1: this._myMetric(pred, truth) };
+    }
+    /**
+     * Check the prediction for exact match against the ground truth
+     *
+     * @param pred - Predicted string
+     * @param truth- Ground truth string
+     * @returns {number}
+     * @private
+     */
+    _exactMatch(pred, truth){
+        return this._normalize(pred) === this._normalize(truth) ? 1 : 0;
+    }
+    /**
+     * TODO: Implement custom metric for classification or NER task.
+     *
+     * @param pred - Predicted string
+     * @param truth - Ground truth string
+     * @private
+     */
+    _myMetric(pred, truth){
+        return 0;
+    }
+    /**
+     * Normalize a string to avoid false negatives due to spaces or capitalization
+     * Convert input to a string in case it is not already
+     *
+     * @param s - Input string
+     * @returns {string}
+     * @private
+     */
+    _normalize(s){
+        return String(s||'').trim().toLowerCase();
+    }
+}

src/main.js ADDED Viewed

	@@ -0,0 +1,105 @@

+import {JobScheduler} from './scheduler.js';
+import {RequestManager} from './requestManager.js';
+import {OnDeviceService} from './services/onDeviceService.js';
+import {CloudService} from './services/cloudService.js';
+import {Evaluator} from './evaluator.js';
+import {logTo} from './utils.js';
+// get references to html elements
+const logEl = document.getElementById('log');
+const statsEl = document.getElementById('stats');
+const deviceStatusEl = document.getElementById('deviceStatus');
+// instantiate services and components
+const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
+const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
+const evaluator = new Evaluator();
+const requestManager = new RequestManager({
+    deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
+        logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} f1=${evt.evalRes.f1.toFixed(2)}`);
+        updateStats();
+    }
+});
+// instantiate the job scheduler with some mock prompts TODO: replace with real prompts
+const scheduler = new JobScheduler([
+    {prompt: 'Translate to German: Hello world', groundTruth: 'Hallo Welt'},
+    {
+        prompt: 'What is 3*6?',
+        groundTruth: '18'
+    },
+    {prompt: 'Answer: What is 2+2?', groundTruth: '4'},
+    {prompt: 'What is the capital of switzerland?', groundTruth: 'Bern'}
+]);
+scheduler.onJob(async (job) => {
+    await requestManager.handle(job);
+});
+// add event listeners for configuration inputs
+document.getElementById('deviceModel').addEventListener('change', (e) =>
+    onDeviceInferenceService.updateConfig({modelName: e.target.value})
+);
+document.getElementById('cloudModel').addEventListener('change', (e) =>
+    cloudInferenceService.updateConfig({model: e.target.value})
+);
+document.getElementById('apiKey').addEventListener('input', (e) =>
+    cloudInferenceService.updateConfig({apiKey: e.target.value})
+);
+// add event listener for run button
+document.getElementById('startBtn').addEventListener('click', async () => {
+    // toggle start and stop buttons
+    document.getElementById('startBtn').disabled = true;
+    document.getElementById('stopBtn').disabled = false;
+    // get configuration values from UI
+    const pattern = document.getElementById('patternSelect').value;
+    const routeStrategy = document.getElementById('routeStrategy').value;
+    const cloudProb = parseFloat(document.getElementById('cloudProb').value);
+    // update request manager routing strategy
+    requestManager.updateRouting({routeStrategy, cloudProb});
+    // TODO Adjust that the model is loaded with a button such that user can see loading status and trigger loading before starting
+    // starting is only available when model is loaded
+    if (routeStrategy !== 'always_cloud' && !onDeviceInferenceService.isReady()) {
+        await loadDeviceModel();
+    }
+    // start the job scheduler with the selected pattern
+    scheduler.startPattern(pattern);
+});
+document.getElementById('stopBtn').addEventListener('click', () => {
+    scheduler.stop();
+    document.getElementById('startBtn').disabled = false;
+    document.getElementById('stopBtn').disabled = true;
+});
+async function loadDeviceModel() {
+    deviceStatusEl.textContent = 'Loading...';
+    try {
+        await onDeviceInferenceService.load((s) => deviceStatusEl.textContent = s);
+        deviceStatusEl.textContent = 'Ready';
+    } catch (e) {
+        deviceStatusEl.textContent = `Error: ${e.message}`;
+    }
+}
+function updateStats() {
+    const s = requestManager.stats;
+    statsEl.innerHTML = `<pre>Processed: ${s.count}\nCloud: ${s.cloud}\nDevice: ${s.device}\nAvg latency (ms): ${s.count ? (s.totalLatencyMs / s.count).toFixed(1) : 0}\nRecent evaluations: ${Math.min(10, s.evaluations.length)}</pre>`;
+}

src/requestManager.js ADDED Viewed

	@@ -0,0 +1,168 @@

+import {measureAsync} from './utils.js';
+/**
+ * RequestManager routes inference requests to on-device or cloud services based on a routing strategy and configurations.
+ * The manager does orchestrate the inference requests, collects statistics, evaluates the results and returns the final statistic.
+ *
+ * We provide different routing strategies:
+ * - always_cloud: all requests go to cloud
+ * - always_device: all requests go to device
+ * - probabilistic: each request goes to cloud with a defined probability
+ * - roundrobin: requests alternate between cloud and device
+ *
+ *
+ */
+export class RequestManager {
+    constructor({
+                    deviceService,
+                    cloudService,
+                    evaluator,
+                    logger = null,
+                    routeStrategy = 'roundrobin',
+                    cloudProb = 0.5
+                } = {}) {
+        /**
+         * On-device inference service
+         */
+        this.device = deviceService;
+        /**
+         * Cloud inference service
+         */
+        this.cloud = cloudService;
+        /**
+         * Evaluator instance for evaluating inference results
+         */
+        this.evaluator = evaluator;
+        /**
+         * Optional logger callback function
+         * @type {null}
+         */
+        this.logger = logger;
+        /**
+         * Routing strategy (always_cloud, always_device, probabilistic, roundrobin)
+         * @type {string}
+         */
+        this.routeStrategy = routeStrategy;
+        /**
+         * Probability of routing to cloud when using 'probabilistic' strategy
+         * @type {number}
+         */
+        this.cloudProb = cloudProb;
+        /**
+         * Internal round robin counter (even = cloud, odd = device)
+         * @type {number}
+         * @private
+         */
+        this._rrCounter = 0;
+        /**
+         * Statistics about routing and evaluations of this job run
+         * @type {{cloud: number, evaluations: *[], count: number, device: number, totalLatencyMs: number}}
+         */
+        this.stats = {count: 0, cloud: 0, device: 0, totalLatencyMs: 0, evaluations: []};
+    }
+    /**
+     * Update routing configuration
+     *
+     * @param routeStrategy - New routing strategy
+     * @param cloudProb - New cloud probability for 'probabilistic' strategy
+     */
+    updateRouting({routeStrategy, cloudProb}) {
+        if (routeStrategy) this.routeStrategy = routeStrategy;
+        if (cloudProb !== undefined) this.cloudProb = cloudProb;
+    }
+    /**
+     * Handle a single inference job by routing it to the appropriate service,
+     * performing inference, evaluating the result, and recording statistics.
+     *
+     * @param job
+     * @returns {Promise<{route: string, latency: number, text: string, job, evalRes: (*|XPathResult|{exact: *, f1: *})}>}
+     */
+    async handle(job) {
+        // get routing strategy and inference service
+        const route = this._choose(job);
+        const service = this._getInferenceService(route);
+        let text, latency;
+        try {
+            const {res, ms} = await measureAsync(() => service.infer(job.prompt));
+            text = res;
+            latency = ms;
+        } catch (err) {
+            text = `__error__:${err.message}`;
+            latency = -1;
+        }
+        // evaluate result and store results
+        const evalRes = this.evaluator.evaluate(text, job.groundTruth);
+        this._record(route, latency, evalRes, job, text);
+        // logging the result
+        if (this.logger) this.logger({job, route, latency, evalRes, text});
+        return {job, route, latency, evalRes, text};
+    }
+    /**
+     * Get the inference service based on the selected route.
+     * Could be extended with more services in the future.
+     *
+     * @param route - The selected route ('cloud' or 'device')
+     * @returns {*}
+     * @private
+     */
+    _getInferenceService(route) {
+        return route === 'cloud' ? this.cloud : this.device;
+    }
+    /**
+     * Choose the route for the given job based on the routing strategy.
+     *
+     * TODO: extend routing to be based on the job characteristics (e.g., prompt length, expected latency, etc.)
+     *
+     * @param job - The job object (not used in current strategies, could be used for more advanced routing)
+     * @returns {string|string}
+     * @private
+     */
+    _choose(job) {
+        if (this.routeStrategy === 'always_cloud') return 'cloud';
+        if (this.routeStrategy === 'always_device') return 'device';
+        if (this.routeStrategy === 'probabilistic') return Math.random() < this.cloudProb ? 'cloud' : 'device';
+        // default round robin
+        this._rrCounter++;
+        return (this._rrCounter % 2 === 0) ? 'cloud' : 'device';
+    }
+    /**
+     * Record statistics for the given job evaluation.
+     * Increases counters for total requests and cloud/device usage.
+     * Updates the total latency.
+     *
+     * @param route - The route taken ('cloud' or 'device')
+     * @param latency - Latency in milliseconds
+     * @param evalRes - Evaluation result object
+     * @param job - The job object
+     * @param text - The inference result text
+     * @private
+     */
+    _record(route, latency, evalRes, job, text) {
+        this.stats.count++;
+        if (route === 'cloud') this.stats.cloud++; else this.stats.device++;
+        if (latency > 0) this.stats.totalLatencyMs += latency;
+        this.stats.evaluations.push({jobId: job.id, route, latency, evalRes, text});
+    }
+}

src/scheduler.js ADDED Viewed

	@@ -0,0 +1,79 @@

+import {sleep} from './utils.js';
+/**
+ * JobScheduler emits jobs based on predefined patterns.
+ * Can be used to simulate different load scenarios like batch processing or on-request per second
+ */
+export class JobScheduler {
+    constructor(promptSource = []) {
+        this.promptSource = promptSource;
+        this.running = false;
+        this._onJob = null; // callback
+    }
+    onJob(cb) {
+        this._onJob = cb;
+    }
+    /**
+     * Start emitting jobs based on the selected pattern
+     * TODO: Implement different patterns to simulate
+     * TODO: Run different datasets instead of just simple prompts
+     * @param patternName
+     * @returns {Promise<void>}
+     */
+    async startPattern(patternName) {
+        this.running = true;
+        // once per second until user stopp evaluation
+        if (patternName === 'once-per-sec') {
+            let i = 0;
+            while (this.running) {
+                this._emit(i++);
+                await sleep(1000);
+            }
+        } else if (patternName === 'ten-per-sec') {
+            let i = 0;
+            const interval = 100; // ms
+            while (this.running) {
+                this._emit(i++);
+                await sleep(interval);
+            }
+        } else if (patternName === 'batch-10-every-5s') {
+            let i = 0;
+            while (this.running) {
+                for (let j = 0; j < 10 && this.running; j++) this._emit(i++);
+                await sleep(5000);
+            }
+        } else if (patternName === 'burst') {
+            // single burst
+            for (let i = 0; i < 50; i++) this._emit(i);
+            this.running = false;
+        }
+    }
+    /**
+     * Stop emitting jobs
+     */
+    stop() {
+        this.running = false;
+    }
+    _pickPrompt(id) {
+        if (this.promptSource.length === 0) return {prompt: `Hello world ${id}`, groundTruth: `Hello world ${id}`};
+        return this.promptSource[id % this.promptSource.length];
+    }
+    _emit(id) {
+        if (this._onJob) {
+            const p = this._pickPrompt(id);
+            const job = {id: `job-${Date.now()}-${id}`, prompt: p.prompt, groundTruth: p.groundTruth};
+            this._onJob(job);
+        }
+    }
+}

src/services/cloudService.js ADDED Viewed

	@@ -0,0 +1,71 @@

+// CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
+/**
+ * Cloud inference service using a remote API from OpenRouter to access different models over one API.
+ *
+ */
+export class CloudService {
+    constructor({apiKey, model} = {}) {
+        this.apiKey = apiKey;
+        this.model = model || 'gpt-4o-mini';
+    }
+    /**
+     * Update configuration with new values
+     *
+     * @param apiKey - The API key for authentication
+     * @param model - The name of the model to use
+     */
+    updateConfig({apiKey, model}) {
+        if (apiKey) this.apiKey = apiKey;
+        if (model) this.model = model;
+    }
+    /**
+     * Perform inference on the cloud service
+     *
+     * @param prompt - The input prompt string
+     * @returns {Promise<string>}
+     */
+    async infer(prompt) {
+        if (!this.apiKey) throw new Error('No API key set for CloudService');
+        const payload = {
+            model: this.model,
+            messages: [{role: 'user', content: prompt}]
+        };
+        // call the api
+        const resp = await fetch('https://api.openrouter.ai/v1/chat/completions', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'Authorization': `Bearer ${this.apiKey}`
+            },
+            body: JSON.stringify(payload)
+        });
+        // check for errors during request
+        if (!resp.ok) {
+            const text = await resp.text();
+            throw new Error(`Cloud inference failed: ${resp.status} ${text}`);
+        }
+        const json = await resp.json();
+        // TODO check parsing of response for model provider
+        let text = '';
+        try {
+            if (json.choices && json.choices[0]) {
+                text = json.choices[0].message?.content || json.choices[0].text || '';
+            } else if (json.output) {
+                text = Array.isArray(json.output) ? json.output.join('\n') : json.output;
+            }
+        } catch (e) {
+            text = JSON.stringify(json).slice(0, 200);
+        }
+        return text;
+    }
+}

src/services/onDeviceService.js ADDED Viewed

	@@ -0,0 +1,59 @@

+// OnDeviceService: uses transformers.js to run a small causal LM in browser
+// Requires the transformers.js script loaded in index.html (cdn).
+/**
+ * On device llm inference service using transformers.js
+ * TODO Implement this class!
+ */
+export class OnDeviceService {
+    constructor({modelName = 'distilgpt2'} = {}) {
+        this.modelName = modelName;
+        this._ready = false;
+        this._model = null;
+    }
+    /**
+     * Load the model into memory to be ready for inference.
+     * Download the model if not already cached. Cache the model for future use.
+     * TODO Download models from a model hub like HuggingFace using transformers.js
+     *
+     * @param progressCb
+     * @returns {Promise<void>}
+     */
+    async load(progressCb) {
+    }
+    /**
+     * Returns if the model is loaded and ready for inference
+     * @returns {boolean}
+     */
+    isReady() {
+        return this._ready;
+    }
+    /**
+     * Perform inference on the on-device model
+     * TODO Implement inference
+     *
+     * @param prompt - The input prompt string
+     * @param maxNewTokens - Maximum number of new tokens to generate
+     * @returns {Promise<string>}
+     */
+    async infer(prompt, {maxNewTokens = 50} = {}) {
+        return "The Answer is 42!";
+    }
+    /**
+     * Update configuration with new values
+     *
+     * @param modelName - The name of the model to use
+     */
+    updateConfig({modelName}) {
+        if (modelName) this.modelName = modelName;
+    }
+}

src/utils.js ADDED Viewed

	@@ -0,0 +1,31 @@

+// helpers: timing and small utilities
+export function nowMs() {
+    return performance.now();
+}
+export function sleep(ms) {
+    return new Promise(res => setTimeout(res, ms));
+}
+export function measureAsync(fn) {
+    const start = nowMs();
+    return Promise.resolve()
+        .then(() => fn())
+        .then(res => ({res, ms: nowMs() - start}));
+}
+/**
+ * Log text to a given HTML element with timestamp to show the log in the UI
+ *
+ * @param el - HTML element to log to
+ * @param text - Text to log
+ */
+export function logTo(el, text) {
+    if (!el) return;
+    const p = document.createElement('div');
+    p.textContent = `[${new Date().toLocaleTimeString()}] ${text}`;
+    el.appendChild(p);
+    el.scrollTop = el.scrollHeight;
+}

styles.css ADDED Viewed

	@@ -0,0 +1,55 @@

+:root {
+    font-family: system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial
+}
+body {
+    margin: 0;
+    padding: 16px;
+    background: #f5f7fb
+}
+.container {
+    max-width: 1100px;
+    margin: 0 auto
+}
+.grid {
+    display: grid;
+    grid-template-columns:repeat(3, 1fr);
+    gap: 12px
+}
+.card {
+    background: white;
+    padding: 12px;
+    border-radius: 8px;
+    box-shadow: 0 2px 6px rgba(0, 0, 0, 0.06)
+}
+.card.wide {
+    grid-column: 1/4
+}
+.log {
+    height: 220px;
+    overflow: auto;
+    background: #111;
+    color: #0f0;
+    padding: 8px;
+    font-family: monospace
+}
+.buttons {
+    margin-top: 8px
+}
+label {
+    display: block;
+    margin: 6px 0
+}
+input, select {
+    width: 100%;
+    padding: 6px;
+    margin-top: 4px
+}