File size: 5,615 Bytes
bfaf968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c8fb6b
bfaf968
 
 
 
 
 
5c8fb6b
bfaf968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c8fb6b
 
 
 
 
 
 
bfaf968
 
 
5c8fb6b
bfaf968
 
5c8fb6b
bfaf968
 
5c8fb6b
bfaf968
 
 
5c8fb6b
 
 
 
 
 
 
 
 
 
 
 
 
bfaf968
5c8fb6b
 
 
bfaf968
 
5c8fb6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfaf968
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import {JobScheduler} from './scheduler.js';
import {RequestManager} from './requestManager.js';
import {OnDeviceService} from './services/onDeviceService.js';
import {CloudService} from './services/cloudService.js';
import {Evaluator} from './evaluator.js';
import {logTo} from './utils.js';


// get references to html elements
const logEl = document.getElementById('log');
const statsEl = document.getElementById('stats');
const deviceStatusEl = document.getElementById('deviceStatus');


// instantiate services and components
const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
const evaluator = new Evaluator();


const requestManager = new RequestManager({
    deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
        logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
        updateStats();
    }
});


// instantiate the job scheduler with some mock prompts TODO: replace with real prompts
const scheduler = new JobScheduler('boolq_validation');


scheduler.onJob(async (job) => {
    await requestManager.handle(job);
});


// add event listeners for configuration inputs
document.getElementById('deviceModel').addEventListener('change', (e) =>
    onDeviceInferenceService.updateConfig({modelName: e.target.value})
);
document.getElementById('cloudModel').addEventListener('change', (e) =>
    cloudInferenceService.updateConfig({model: e.target.value})
);
document.getElementById('apiKey').addEventListener('input', (e) =>
    cloudInferenceService.updateConfig({apiKey: e.target.value})
);

// add event listener for run button
document.getElementById('startBtn').addEventListener('click', async () => {

    // toggle start and stop buttons
    document.getElementById('startBtn').disabled = true;
    document.getElementById('stopBtn').disabled = false;

    // get configuration values from UI
    const pattern = document.getElementById('patternSelect').value;
    const routeStrategy = document.getElementById('routeStrategy').value;
    const cloudProb = parseFloat(document.getElementById('cloudProb').value);

    // update request manager routing strategy
    requestManager.updateRouting({routeStrategy, cloudProb});


    // TODO Adjust that the model is loaded with a button such that user can see loading status and trigger loading before starting
    // starting is only available when model is loaded
    if (routeStrategy !== 'always_cloud' && !onDeviceInferenceService.isReady()) {
        await loadDeviceModel();
    }

    // start the job scheduler with the selected pattern
    scheduler.startPattern(pattern);
});


document.getElementById('stopBtn').addEventListener('click', () => {
    scheduler.stop();
    document.getElementById('startBtn').disabled = false;
    document.getElementById('stopBtn').disabled = true;
});

document.getElementById('downloadStats').addEventListener('click', () => {
    downloadStats();
});
document.getElementById('loadDeviceModelBtn').addEventListener('click', () => {
    loadDeviceModel();
});


async function loadDeviceModel() {
    deviceStatusEl.textContent = 'Loading...';
    document.getElementById('loadDeviceModelBtn').disabled = true;
    try {
        await onDeviceInferenceService.load((s) => deviceStatusEl.textContent = s);
        deviceStatusEl.textContent = 'Model Ready';
    } catch (e) {
        deviceStatusEl.textContent = `Error: ${e.message}`;
        document.getElementById('loadDeviceModelBtn').disabled = false;
    }
}

function downloadStats() {
    const s = requestManager.stats;
    // add average latency to stats for device and cloud
    s.avgLatencyMs = s.count ? (s.totalLatencyMs / s.count) : 0;
    s.avgDeviceLatencyMs = s.device ? (s.evaluations.filter(e => e.route === 'device').reduce((a, b) => a + b.latency, 0) / s.device) : 0;
    s.avgCloudLatencyMs = s.cloud ? (s.evaluations.filter(e => e.route === 'cloud').reduce((a, b) => a + b.latency, 0) / s.cloud) : 0;

    const dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(s, null, 2));
    const dlAnchorElem = document.createElement('a');
    dlAnchorElem.setAttribute("href", dataStr);
    dlAnchorElem.setAttribute("download", "stats.json");
    dlAnchorElem.click();
}

/**
 * Update the statistics display in the UI based on the request manager's stats
 */
function updateStats() {
    const s = requestManager.stats;

    statsEl.innerHTML = `
        <div style="display: flex; justify-content: space-between;">
            <div>
                <h3>General Stats</h3>
                <pre>
Processed: ${s.count}
Avg latency (ms): ${s.count ? (s.totalLatencyMs / s.count).toFixed(1) : 0}
Recent evaluations: ${Math.min(10, s.evaluations.length)}
                </pre>
            </div>
            <div>
                <h3>Cloud Stats</h3>
                <pre>
Requests: ${s.cloud}
Avg latency (ms): ${s.cloud ? (s.evaluations.filter(e => e.route === 'cloud').reduce((a, b) => a + b.latency, 0) / s.cloud).toFixed(1) : 0}
                </pre>
            </div>
            <div>
                <h3>On-Device Stats</h3>
                <pre>
Requests: ${s.device}
Avg latency (ms): ${s.cloud ? (s.evaluations.filter(e => e.route === 'device').reduce((a, b) => a + b.latency, 0) / s.cloud).toFixed(1) : 0}
                </pre>
            </div>
        </div>`;
}