Spaces:

fhueni
/

on-device-vs-cloud-llm-inference

Running

App Files Files Community

Philip Kehl commited on 16 days ago

Commit

6b11ecf

2 Parent(s): b9991b1 6c50dd9

Merge branch 'main' of github.com:fabianHueni/UNIBE_MSGAI_HS25_Project

Browse files

Files changed (5) hide show

index.html +2 -5
src/main.js +36 -59
src/requestManager.js +3 -3
src/scheduler.js +41 -23
src/utils.js +13 -1

index.html CHANGED Viewed

@@ -129,7 +129,8 @@
                        class="w-full h-64 overflow-scroll bg-gray-50 p-3 rounded-lg border border-gray-200 text-sm">
                     <thead>
                     <tr>
-                        <th class="text-left">Timestamp</th>
                         <th class="text-left">Route</th>
                         <th class="text-left">Total Latency (ms)</th>
                         <th class="text-left">Queue (ms)</th>
@@ -145,10 +146,6 @@
             <div id="stats" class="mt-4 text-sm text-gray-800"></div>
             <div class="flex flex-col md:flex-row gap-4 mt-4">
-                <button id="downloadStatsJson"
-                        class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">
-                    Download Statistics as JSON
-                </button>
                 <button id="downloadStatsCsv"
                         class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">
                     Download Statistics as CSV

                        class="w-full h-64 overflow-scroll bg-gray-50 p-3 rounded-lg border border-gray-200 text-sm">
                     <thead>
                     <tr>
+                        <th class="text-left">ID</th>
+                        <th class="text-left">Time</th>
                         <th class="text-left">Route</th>
                         <th class="text-left">Total Latency (ms)</th>
                         <th class="text-left">Queue (ms)</th>
             <div id="stats" class="mt-4 text-sm text-gray-800"></div>
             <div class="flex flex-col md:flex-row gap-4 mt-4">
                 <button id="downloadStatsCsv"
                         class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">
                     Download Statistics as CSV

src/main.js CHANGED Viewed

@@ -3,7 +3,7 @@ import {RequestManager} from './requestManager.js';
 import {OnDeviceService} from './services/onDeviceService.js';
 import {CloudService} from './services/cloudService.js';
 import {Evaluator} from './evaluator.js';
-import {logTo, sleep} from './utils.js';
 // get references to html elements
@@ -13,7 +13,6 @@ const deviceStatusEl = document.getElementById('deviceStatus');
 // instantiate services and components
-console.log(getModelSelection())
 const onDeviceInferenceService = new OnDeviceService(getModelSelection());
 const cloudInferenceService = new CloudService({
     apiKey: document.getElementById('cloudApiKey').value,
@@ -30,7 +29,7 @@ const requestManager = new RequestManager({
 });
-// instantiate the job scheduler with some mock prompts TODO: replace with real prompts
 const scheduler = new JobScheduler('boolq_validation');
@@ -78,13 +77,11 @@ document.getElementById('startBtn').addEventListener('click', async () => {
 document.getElementById('stopBtn').addEventListener('click', () => {
     scheduler.stop();
     document.getElementById('startBtn').disabled = false;
     document.getElementById('stopBtn').disabled = true;
 });
-document.getElementById('downloadStatsJson').addEventListener('click', () => {
-    downloadStatsAsJson();
-});
 document.getElementById('downloadStatsCsv').addEventListener('click', () => {
     downloadStatsAsCSV();
 });
@@ -103,7 +100,7 @@ let currentExperiment = null;
 let experimentJobCount = 0;
 let experimentTargetJobs = 0;
 let isExperimentRunning = false;
-const TARGET_JOBS = 1000;
 document.getElementById('start1000Btn').addEventListener('click', async () => {
@@ -134,7 +131,7 @@ document.getElementById('start1000Btn').addEventListener('click', async () => {
         cloudModel,
         routeStrategy,
         pattern,
-        startTime: new Date().toISOString()
     };
     experimentJobCount = 0;
@@ -183,8 +180,6 @@ document.getElementById('start1000Btn').addEventListener('click', async () => {
 });
 function finishExperiment() {
-    if (!isExperimentRunning) return;
     isExperimentRunning = false;
     console.log('✅ Experiment complete!');
@@ -222,7 +217,7 @@ function downloadExperimentResults() {
     const stats = {
         experiment: {
             ...currentExperiment,
-            endTime: new Date().toISOString(),
             completedJobs: requestManager.stats.count
         },
         stats: requestManager.stats
@@ -257,12 +252,12 @@ function buildExperimentCSV(stats) {
     const lines = [];
     // Header
-    lines.push('job_id,route,latency_ms,total_latency_ms,queueing_time_ms,inference_time_ms,exact_match,f1_score,ground_truth,answer');
     // Data rows
     stats.stats.results.forEach((result, index) => {
         const row = [
-            index,
             result.route || '',
             (result.latency || 0).toFixed(2),
             (result.totalLatency || 0).toFixed(2),
@@ -271,7 +266,19 @@ function buildExperimentCSV(stats) {
             result.evalRes?.exactMatch || false,
             (result.evalRes?.f1WordLevel || 0).toFixed(4),
             `"${(result.job?.groundTruth || '').replace(/"/g, '""')}"`,
-            `"${(result.text?.answer || '').replace(/"/g, '""')}"`
         ];
         lines.push(row.join(','));
     });
@@ -403,53 +410,23 @@ async function loadDeviceModel() {
     }
 }
-function downloadStatsAsJson() {
-    const s = requestManager.stats;
-    // add average latency to stats for device and cloud
-    s.avgLatencyMs = s.count ? (s.totalLatencyMs / s.count) : 0;
-    s.avgDeviceLatencyMs = s.device ? (s.results.filter(e => e.route === 'device').reduce((a, b) => a + b.latency, 0) / s.device) : 0;
-    s.avgCloudLatencyMs = s.cloud ? (s.results.filter(e => e.route === 'cloud').reduce((a, b) => a + b.latency, 0) / s.cloud) : 0;
-    const dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(s, null, 2));
-    const dlAnchorElem = document.createElement('a');
-    dlAnchorElem.setAttribute("href", dataStr);
-    dlAnchorElem.setAttribute("download", "stats.json");
-    dlAnchorElem.click();
-}
 function downloadStatsAsCSV() {
-    const s = requestManager.stats;
-    const flattened_evals = s.results.map(evaluation => ({
-            route: evaluation.route,
-            latency: evaluation.latency,
-            totalLatency: evaluation.totalLatency || 0,
-            queueingTime: evaluation.queueingTime || 0,
-            inferenceTime: evaluation.inferenceTime || 0,
-            prompt: evaluation.job.prompt,
-            // job details
-            groundTruth: evaluation.job.groundTruth,
-            answer: evaluation.text.answer,
-            // evaluation results
-            exactMatch: evaluation.evalRes.exactMatch,
-            f1: evaluation.evalRes.f1WordLevel,
-            tokensPerSecond: evaluation.evalRes.tokensPerSecond,
-            totalTokens: evaluation.evalRes.totalTokens,
-            // further stats
-            input_tokens: evaluation.text.stats.input_tokens,
-            output_tokens: evaluation.text.stats.output_tokens,
-        })
-    );
-    // Convert stats to CSV format
-    const headers = Object.keys(flattened_evals[0] || {}).join(',');
-    const rows = flattened_evals.map(evaluation =>
-        Object.values(evaluation).map(value => `"${value}"`).join(',')
-    );
-    const csvContent = [headers, ...rows].join('\n');
     const dataStr = "data:text/csv;charset=utf-8," + encodeURIComponent(csvContent);
     const dlAnchorElem = document.createElement('a');

 import {OnDeviceService} from './services/onDeviceService.js';
 import {CloudService} from './services/cloudService.js';
 import {Evaluator} from './evaluator.js';
+import {getNumberOfWords, logTo, sleep} from './utils.js';
 // get references to html elements
 // instantiate services and components
 const onDeviceInferenceService = new OnDeviceService(getModelSelection());
 const cloudInferenceService = new CloudService({
     apiKey: document.getElementById('cloudApiKey').value,
 });
+// instantiate the job scheduler with some mock prompts
 const scheduler = new JobScheduler('boolq_validation');
 document.getElementById('stopBtn').addEventListener('click', () => {
     scheduler.stop();
+    isExperimentRunning = false;
     document.getElementById('startBtn').disabled = false;
     document.getElementById('stopBtn').disabled = true;
 });
 document.getElementById('downloadStatsCsv').addEventListener('click', () => {
     downloadStatsAsCSV();
 });
 let experimentJobCount = 0;
 let experimentTargetJobs = 0;
 let isExperimentRunning = false;
+const TARGET_JOBS = 500;
 document.getElementById('start1000Btn').addEventListener('click', async () => {
         cloudModel,
         routeStrategy,
         pattern,
+        startTime: Date.now()
     };
     experimentJobCount = 0;
 });
 function finishExperiment() {
     isExperimentRunning = false;
     console.log('✅ Experiment complete!');
     const stats = {
         experiment: {
             ...currentExperiment,
+            endTime: Date.now(),
             completedJobs: requestManager.stats.count
         },
         stats: requestManager.stats
     const lines = [];
     // Header
+    lines.push('dataset_item_id,route,latency_ms,total_latency_ms,queueing_time_ms,inference_time_ms,exact_match,f1_score,ground_truth,answer,job_start_ts,inference_start_ts,inference_end_ts,prompt,number_of_words,number_of_characters,experiment_start_time_ms,experiment_end_time_ms,route_strategy,pattern,device_model,cloud_model');
     // Data rows
     stats.stats.results.forEach((result, index) => {
         const row = [
+            result.job.id,
             result.route || '',
             (result.latency || 0).toFixed(2),
             (result.totalLatency || 0).toFixed(2),
             result.evalRes?.exactMatch || false,
             (result.evalRes?.f1WordLevel || 0).toFixed(4),
             `"${(result.job?.groundTruth || '').replace(/"/g, '""')}"`,
+            `"${(result.text?.answer || '').replace(/"/g, '""')}"`,
+            result.job.timestamps.jobStart || 0,
+            result.job.timestamps.inferenceStart || 0,
+            result.job.timestamps.inferenceEnd || 0,
+            `"${(result.job.prompt || '').replace(/"/g, '""')}"`,
+            getNumberOfWords(result.job.prompt || ''),
+            result.job.prompt.length,
+            stats.experiment.startTime || 0,
+            stats.experiment.endTime || 0,
+            stats.experiment.routeStrategy,
+            stats.experiment.pattern,
+            stats.experiment.deviceModel,
+            stats.experiment.cloudModel
         ];
         lines.push(row.join(','));
     });
     }
 }
 function downloadStatsAsCSV() {
+    // make the stats compatible with buildExperimentCSV method for reuse
+    const stats = {
+        experiment: {
+            deviceModel: getModelSelection().modelName,
+            cloudModel: document.getElementById('cloudModel').value,
+            routeStrategy: document.getElementById('routeStrategy').value,
+            pattern: document.getElementById('patternSelect').value,
+            startTime: null,
+            endTime: Date.now(),
+            completedJobs: requestManager.stats.count
+        },
+        stats: requestManager.stats
+    };
+    const csvContent = buildExperimentCSV(stats);
     const dataStr = "data:text/csv;charset=utf-8," + encodeURIComponent(csvContent);
     const dlAnchorElem = document.createElement('a');

src/requestManager.js CHANGED Viewed

@@ -179,18 +179,18 @@ export class RequestManager {
         let response, latencyMs, cleanedResponse; // response is object with .answer and .stats
         try {
             // Mark inference start
-            job.timestamps.inferenceStart = performance.now();
             const {res, ms} = await measureAsync(() => service.infer(full_prompt));
             response = res;
             latencyMs = ms;
             // Mark inference end
-            job.timestamps.inferenceEnd = performance.now();
         } catch (err) {
             response = `__error__:${err.message}`;
             latencyMs = -1;
-            job.timestamps.inferenceEnd = performance.now();
         }
         // Calculate timing metrics

         let response, latencyMs, cleanedResponse; // response is object with .answer and .stats
         try {
             // Mark inference start
+            job.timestamps.inferenceStart = Date.now();
             const {res, ms} = await measureAsync(() => service.infer(full_prompt));
             response = res;
             latencyMs = ms;
             // Mark inference end
+            job.timestamps.inferenceEnd = Date.now();
         } catch (err) {
             response = `__error__:${err.message}`;
             latencyMs = -1;
+            job.timestamps.inferenceEnd = Date.now();
         }
         // Calculate timing metrics

src/scheduler.js CHANGED Viewed

@@ -24,38 +24,55 @@ export class JobScheduler {
     /**
      * Start emitting jobs based on the selected pattern
-     * TODO: Implement different patterns to simulate
-     * TODO: Run different datasets instead of just simple prompts
-     * @param patternName
-     * @returns {Promise<void>}
      */
-    async startPattern(patternName) {
         this.running = true;
-        // once per second until user stopp evaluation
         if (patternName === 'once-per-sec') {
-            let i = 0;
-            while (this._dataset.length > 0 && this.running) {
-                const item = this._dataset.shift(); //shift instead of pop for FIFO
                 this._emit(item);
-                await sleep(1000);
             }
         } else if (patternName === 'every-ten-sec') {
-            let i = 0;
-            while (this._dataset.length > 0 && this.running) {
                 const item = this._dataset.shift();
                 this._emit(item);
-                await sleep(10000);
             }
         } else if (patternName === 'exponential-arrival') {
-            let i = 0;
-            while (this._dataset.length > 0 && this.running) {
                 const item = this._dataset.shift();
                 this._emit(item);
-                const timeToNextArrival = this._generateInterarrivalTime(this._interArrivalTimeLambda);
-                await sleep(timeToNextArrival);
             }
         }
     }
@@ -98,10 +115,11 @@ export class JobScheduler {
     _emit(item) {
         if (this._onJob) {
             const job = {
                 prompt: item.prompt,
                 groundTruth: item.groundTruth,
                 timestamps: {
-                    jobStart: performance.now(),
                     inferenceStart: null,
                     inferenceEnd: null
                 }
@@ -141,15 +159,15 @@ export class JobScheduler {
                         }
                         fields.push(field);
                     }
-                    const [question, answer, context] = fields;
                     // More explicit prompt to get concise answers
                     const full_prompt = `Question: ${question}
                                         Context: ${context}
                                         Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
                                         Answer:`;
-                    return {prompt: full_prompt, groundTruth: answer};
                 });
                 console.log(`✅ Dataset '${name}' loaded with ${this._dataset.length} items.`);
             })
@@ -160,13 +178,13 @@ export class JobScheduler {
     /**
-     * Generate interarrival time based on exponential distribution
      *
      * @param lambda - rate parameter (requests per second)
      * @returns {number} - interarrival time in milliseconds
      * @private
      */
-    _generateInterarrivalTime(lambda) {
         const u = Math.random(); // uniform random number between 0 and 1
         return -Math.log(u) / lambda * 1000; // convert to milliseconds
     }

     /**
      * Start emitting jobs based on the selected pattern
+     * @param {string} patternName - The pattern to use
+     * @param {number} maxJobs - Maximum number of jobs to emit (defaults to Infinity)
+     * @returns {Promise<number>} - Number of jobs emitted
      */
+    async startPattern(patternName, maxJobs = Infinity) {
         this.running = true;
+        let jobsEmitted = 0;
+        if (maxJobs !== Infinity) {
+            console.log(`🚀 Starting limited run: ${maxJobs} jobs with pattern '${patternName}'`);
+        }
         if (patternName === 'once-per-sec') {
+            while (this._dataset.length > 0 && this.running && jobsEmitted < maxJobs) {
+                const item = this._dataset.shift();
                 this._emit(item);
+                jobsEmitted++;
+                if (jobsEmitted < maxJobs && this._dataset.length > 0 && this.running) {
+                    await sleep(1000);
+                }
             }
         } else if (patternName === 'every-ten-sec') {
+            while (this._dataset.length > 0 && this.running && jobsEmitted < maxJobs) {
                 const item = this._dataset.shift();
                 this._emit(item);
+                jobsEmitted++;
+                if (jobsEmitted < maxJobs && this._dataset.length > 0 && this.running) {
+                    await sleep(10000);
+                }
             }
         } else if (patternName === 'exponential-arrival') {
+            while (this._dataset.length > 0 && this.running && jobsEmitted < maxJobs) {
                 const item = this._dataset.shift();
                 this._emit(item);
+                jobsEmitted++;
+                if (jobsEmitted < maxJobs && this._dataset.length > 0 && this.running) {
+                    const timeToNextArrival = this._generateExponentialInterarrivalTime(this._interArrivalTimeLambda);
+                    await sleep(timeToNextArrival);
+                }
             }
         }
+        if (maxJobs !== Infinity) {
+            console.log(`✅ Limited run complete: ${jobsEmitted} jobs emitted.`);
+        } else {
+            console.log(`🛑 Job emission stopped. Total jobs emitted: ${jobsEmitted}`);
+        }
+        return jobsEmitted;
     }
     _emit(item) {
         if (this._onJob) {
             const job = {
+                id: item.id,
                 prompt: item.prompt,
                 groundTruth: item.groundTruth,
                 timestamps: {
+                    jobStart: Date.now(),
                     inferenceStart: null,
                     inferenceEnd: null
                 }
                         }
                         fields.push(field);
                     }
+                    const [id, question, answer, context] = fields;
                     // More explicit prompt to get concise answers
                     const full_prompt = `Question: ${question}
                                         Context: ${context}
                                         Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
                                         Answer:`;
+                    return {id: id, prompt: full_prompt, groundTruth: answer};
                 });
                 console.log(`✅ Dataset '${name}' loaded with ${this._dataset.length} items.`);
             })
     /**
+     * Generate interarrival time based on exponential interarrival distribution (equals a poisson process)
      *
      * @param lambda - rate parameter (requests per second)
      * @returns {number} - interarrival time in milliseconds
      * @private
      */
+    _generateExponentialInterarrivalTime(lambda) {
         const u = Math.random(); // uniform random number between 0 and 1
         return -Math.log(u) / lambda * 1000; // convert to milliseconds
     }

src/utils.js CHANGED Viewed

@@ -26,6 +26,7 @@ export function logTo(el, evt) {
     if (!el) return;
     const row = document.createElement('tr');
     row.innerHTML = `
         <td>${new Date().toLocaleTimeString()}</td>
         <td>${evt.route}</td>
         <td>${evt.totalLatency?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
@@ -37,4 +38,15 @@ export function logTo(el, evt) {
     `;
     el.appendChild(row);
     el.scrollTop = el.scrollHeight;
-}

     if (!el) return;
     const row = document.createElement('tr');
     row.innerHTML = `
+        <td>${evt.job.id}</td>
         <td>${new Date().toLocaleTimeString()}</td>
         <td>${evt.route}</td>
         <td>${evt.totalLatency?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
     `;
     el.appendChild(row);
     el.scrollTop = el.scrollHeight;
+}
+/**
+ * Approximates the number of words in a given text string
+ *
+ * @param text - Input text string
+ * @returns {number} - Approximate number of words
+ */
+export function getNumberOfWords(text) {
+    return text.trim().split(/\s+/).length;
+}