Spaces:

fhueni
/

on-device-vs-cloud-llm-inference

Running

App Files Files Community

fhueni commited on 23 days ago

Commit

7f224da

1 Parent(s): 8b3b676

feat: added additional models

Browse files

Files changed (4) hide show

index.html +8 -2
src/main.js +1 -1
src/services/onDeviceService.js +1 -1
src/utils.js +1 -1

index.html CHANGED Viewed

@@ -18,6 +18,10 @@
                 <select id="cloudModel"
                         class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
                     <option value="openai/gpt-4o-mini">openai/gpt-4o-mini</option>
                 </select>
             </label>
@@ -38,8 +42,10 @@
                 <select id="deviceModel"
                         class="w-full mb-2 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
                     <option value="onnx-community/gemma-3-270m-it-ONNX">gemma-3-270m-it-ONNX</option>
-                    <option value="onnx-community/Llama-3.2-1B-Instruct">Llama-3.2-1B-Instruct</option>
-                    <option value="Xenova/distilgpt2">distilgpt2</option>
                 </select>
             </label>

                 <select id="cloudModel"
                         class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
                     <option value="openai/gpt-4o-mini">openai/gpt-4o-mini</option>
+                    <option value="meta-llama/llama-3.2-1b-instruct">meta-llama/llama-3.2-1b-instruct</option>
+                    <option value="google/gemma-3n-e2b-it:free">google/gemma-3n-e2b-it:free</option>
+                    <option value="meta-llama/llama-3.2-1b-instruct">meta-llama/llama-3.2-1b-instruct</option>
+                    <option value="meta-llama/llama-3.2-3b-instruct">meta-llama/llama-3.2-3b-instruct</option>
                 </select>
             </label>
                 <select id="deviceModel"
                         class="w-full mb-2 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
                     <option value="onnx-community/gemma-3-270m-it-ONNX">gemma-3-270m-it-ONNX</option>
+                    <option value="onnx-community/gemma-3-1b-it-ONNX">gemma-3-1b-it-ONNX</option>
+                    <option value="onnx-community/Llama-3.2-1B-Instruct-ONNX">Llama-3.2-1B-Instruct-ONNX</option>
+                    <option value="onnx-community/Llama-3.2-3B-Instruct-ONNX">Llama-3.2-3B-Instruct-ONNX</option>
+                    <option disabled value="onnx-community/gemma-3n-E2B-it-ONNX">gemma-3n-E2B-it-ONNX (not working)</option>
                 </select>
             </label>

src/main.js CHANGED Viewed

@@ -107,7 +107,7 @@ async function loadDeviceModel() {
         if (progress && progress.loaded && progress.total) {
             const percent = ((progress.loaded / progress.total) * 100).toFixed(1);
             loadingBar.style.width = percent + '%';
-            loadingText.textContent = `Loading: ${percent}% (${progress.loaded}/${progress.total} bytes)`;
         } else if (progress && progress.status) {
             loadingText.textContent = progress.status;
         } else if (typeof progress === 'string') {

         if (progress && progress.loaded && progress.total) {
             const percent = ((progress.loaded / progress.total) * 100).toFixed(1);
             loadingBar.style.width = percent + '%';
+            loadingText.textContent = `Loading: ${percent}% (${(progress.loaded / (1024 ** 3)).toFixed(2)} GB / ${(progress.total / (1024 ** 3)).toFixed(2)} GB)`;
         } else if (progress && progress.status) {
             loadingText.textContent = progress.status;
         } else if (typeof progress === 'string') {

src/services/onDeviceService.js CHANGED Viewed

@@ -38,7 +38,7 @@ export class OnDeviceService {
                 console.log(`[Model Loading] Progress:`, progress);
             }
         };
-        // Xenova's pipeline API (ES module)
         this._model = await pipeline('text-generation', this.modelName, {
             progress_callback: progressCb || defaultProgressCb,
             device: 'webgpu', // run on WebGPU if available

                 console.log(`[Model Loading] Progress:`, progress);
             }
         };
         this._model = await pipeline('text-generation', this.modelName, {
             progress_callback: progressCb || defaultProgressCb,
             device: 'webgpu', // run on WebGPU if available

src/utils.js CHANGED Viewed

@@ -30,7 +30,7 @@ export function logTo(el, evt) {
         <td>${evt.route}</td>
         <td>${evt.latency.toFixed(2)}ms</td>
         <td title="${evt.job.prompt}">${evt.job.prompt.substring(0, 30)}...</td>
-        <td title="${evt.response.answer}">${evt.response.answer.substring(0, 30)}...</td>
         <td>${evt.evalRes.exactMatch}</td>
     `;
     el.appendChild(row);

         <td>${evt.route}</td>
         <td>${evt.latency.toFixed(2)}ms</td>
         <td title="${evt.job.prompt}">${evt.job.prompt.substring(0, 30)}...</td>
+        <td title="${evt.response.answer}">${evt.response.answer.substring(0, 30)}</td>
         <td>${evt.evalRes.exactMatch}</td>
     `;
     el.appendChild(row);