marstin commited on
Commit
c0874a8
Β·
1 Parent(s): cf24c8d

[martin-dev] update layers

Browse files
demo/logs/CohereLabs/aya-vision-8b.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/MBZUAI/GLaMM-FullScope.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
{logs β†’ demo/logs}/Qwen/Qwen2-VL-2B-Instruct.txt RENAMED
@@ -1,730 +1,641 @@
1
-
2
- visual
3
- visual.patch_embed
4
- visual.patch_embed.proj
5
- visual.rotary_pos_emb
6
- visual.blocks
7
- visual.blocks.0
8
- visual.blocks.0.norm1
9
- visual.blocks.0.norm2
10
- visual.blocks.0.attn
11
- visual.blocks.0.attn.qkv
12
- visual.blocks.0.attn.proj
13
- visual.blocks.0.mlp
14
- visual.blocks.0.mlp.fc1
15
- visual.blocks.0.mlp.act
16
- visual.blocks.0.mlp.fc2
17
- visual.blocks.1
18
- visual.blocks.1.norm1
19
- visual.blocks.1.norm2
20
- visual.blocks.1.attn
21
- visual.blocks.1.attn.qkv
22
- visual.blocks.1.attn.proj
23
- visual.blocks.1.mlp
24
- visual.blocks.1.mlp.fc1
25
- visual.blocks.1.mlp.act
26
- visual.blocks.1.mlp.fc2
27
- visual.blocks.2
28
- visual.blocks.2.norm1
29
- visual.blocks.2.norm2
30
- visual.blocks.2.attn
31
- visual.blocks.2.attn.qkv
32
- visual.blocks.2.attn.proj
33
- visual.blocks.2.mlp
34
- visual.blocks.2.mlp.fc1
35
- visual.blocks.2.mlp.act
36
- visual.blocks.2.mlp.fc2
37
- visual.blocks.3
38
- visual.blocks.3.norm1
39
- visual.blocks.3.norm2
40
- visual.blocks.3.attn
41
- visual.blocks.3.attn.qkv
42
- visual.blocks.3.attn.proj
43
- visual.blocks.3.mlp
44
- visual.blocks.3.mlp.fc1
45
- visual.blocks.3.mlp.act
46
- visual.blocks.3.mlp.fc2
47
- visual.blocks.4
48
- visual.blocks.4.norm1
49
- visual.blocks.4.norm2
50
- visual.blocks.4.attn
51
- visual.blocks.4.attn.qkv
52
- visual.blocks.4.attn.proj
53
- visual.blocks.4.mlp
54
- visual.blocks.4.mlp.fc1
55
- visual.blocks.4.mlp.act
56
- visual.blocks.4.mlp.fc2
57
- visual.blocks.5
58
- visual.blocks.5.norm1
59
- visual.blocks.5.norm2
60
- visual.blocks.5.attn
61
- visual.blocks.5.attn.qkv
62
- visual.blocks.5.attn.proj
63
- visual.blocks.5.mlp
64
- visual.blocks.5.mlp.fc1
65
- visual.blocks.5.mlp.act
66
- visual.blocks.5.mlp.fc2
67
- visual.blocks.6
68
- visual.blocks.6.norm1
69
- visual.blocks.6.norm2
70
- visual.blocks.6.attn
71
- visual.blocks.6.attn.qkv
72
- visual.blocks.6.attn.proj
73
- visual.blocks.6.mlp
74
- visual.blocks.6.mlp.fc1
75
- visual.blocks.6.mlp.act
76
- visual.blocks.6.mlp.fc2
77
- visual.blocks.7
78
- visual.blocks.7.norm1
79
- visual.blocks.7.norm2
80
- visual.blocks.7.attn
81
- visual.blocks.7.attn.qkv
82
- visual.blocks.7.attn.proj
83
- visual.blocks.7.mlp
84
- visual.blocks.7.mlp.fc1
85
- visual.blocks.7.mlp.act
86
- visual.blocks.7.mlp.fc2
87
- visual.blocks.8
88
- visual.blocks.8.norm1
89
- visual.blocks.8.norm2
90
- visual.blocks.8.attn
91
- visual.blocks.8.attn.qkv
92
- visual.blocks.8.attn.proj
93
- visual.blocks.8.mlp
94
- visual.blocks.8.mlp.fc1
95
- visual.blocks.8.mlp.act
96
- visual.blocks.8.mlp.fc2
97
- visual.blocks.9
98
- visual.blocks.9.norm1
99
- visual.blocks.9.norm2
100
- visual.blocks.9.attn
101
- visual.blocks.9.attn.qkv
102
- visual.blocks.9.attn.proj
103
- visual.blocks.9.mlp
104
- visual.blocks.9.mlp.fc1
105
- visual.blocks.9.mlp.act
106
- visual.blocks.9.mlp.fc2
107
- visual.blocks.10
108
- visual.blocks.10.norm1
109
- visual.blocks.10.norm2
110
- visual.blocks.10.attn
111
- visual.blocks.10.attn.qkv
112
- visual.blocks.10.attn.proj
113
- visual.blocks.10.mlp
114
- visual.blocks.10.mlp.fc1
115
- visual.blocks.10.mlp.act
116
- visual.blocks.10.mlp.fc2
117
- visual.blocks.11
118
- visual.blocks.11.norm1
119
- visual.blocks.11.norm2
120
- visual.blocks.11.attn
121
- visual.blocks.11.attn.qkv
122
- visual.blocks.11.attn.proj
123
- visual.blocks.11.mlp
124
- visual.blocks.11.mlp.fc1
125
- visual.blocks.11.mlp.act
126
- visual.blocks.11.mlp.fc2
127
- visual.blocks.12
128
- visual.blocks.12.norm1
129
- visual.blocks.12.norm2
130
- visual.blocks.12.attn
131
- visual.blocks.12.attn.qkv
132
- visual.blocks.12.attn.proj
133
- visual.blocks.12.mlp
134
- visual.blocks.12.mlp.fc1
135
- visual.blocks.12.mlp.act
136
- visual.blocks.12.mlp.fc2
137
- visual.blocks.13
138
- visual.blocks.13.norm1
139
- visual.blocks.13.norm2
140
- visual.blocks.13.attn
141
- visual.blocks.13.attn.qkv
142
- visual.blocks.13.attn.proj
143
- visual.blocks.13.mlp
144
- visual.blocks.13.mlp.fc1
145
- visual.blocks.13.mlp.act
146
- visual.blocks.13.mlp.fc2
147
- visual.blocks.14
148
- visual.blocks.14.norm1
149
- visual.blocks.14.norm2
150
- visual.blocks.14.attn
151
- visual.blocks.14.attn.qkv
152
- visual.blocks.14.attn.proj
153
- visual.blocks.14.mlp
154
- visual.blocks.14.mlp.fc1
155
- visual.blocks.14.mlp.act
156
- visual.blocks.14.mlp.fc2
157
- visual.blocks.15
158
- visual.blocks.15.norm1
159
- visual.blocks.15.norm2
160
- visual.blocks.15.attn
161
- visual.blocks.15.attn.qkv
162
- visual.blocks.15.attn.proj
163
- visual.blocks.15.mlp
164
- visual.blocks.15.mlp.fc1
165
- visual.blocks.15.mlp.act
166
- visual.blocks.15.mlp.fc2
167
- visual.blocks.16
168
- visual.blocks.16.norm1
169
- visual.blocks.16.norm2
170
- visual.blocks.16.attn
171
- visual.blocks.16.attn.qkv
172
- visual.blocks.16.attn.proj
173
- visual.blocks.16.mlp
174
- visual.blocks.16.mlp.fc1
175
- visual.blocks.16.mlp.act
176
- visual.blocks.16.mlp.fc2
177
- visual.blocks.17
178
- visual.blocks.17.norm1
179
- visual.blocks.17.norm2
180
- visual.blocks.17.attn
181
- visual.blocks.17.attn.qkv
182
- visual.blocks.17.attn.proj
183
- visual.blocks.17.mlp
184
- visual.blocks.17.mlp.fc1
185
- visual.blocks.17.mlp.act
186
- visual.blocks.17.mlp.fc2
187
- visual.blocks.18
188
- visual.blocks.18.norm1
189
- visual.blocks.18.norm2
190
- visual.blocks.18.attn
191
- visual.blocks.18.attn.qkv
192
- visual.blocks.18.attn.proj
193
- visual.blocks.18.mlp
194
- visual.blocks.18.mlp.fc1
195
- visual.blocks.18.mlp.act
196
- visual.blocks.18.mlp.fc2
197
- visual.blocks.19
198
- visual.blocks.19.norm1
199
- visual.blocks.19.norm2
200
- visual.blocks.19.attn
201
- visual.blocks.19.attn.qkv
202
- visual.blocks.19.attn.proj
203
- visual.blocks.19.mlp
204
- visual.blocks.19.mlp.fc1
205
- visual.blocks.19.mlp.act
206
- visual.blocks.19.mlp.fc2
207
- visual.blocks.20
208
- visual.blocks.20.norm1
209
- visual.blocks.20.norm2
210
- visual.blocks.20.attn
211
- visual.blocks.20.attn.qkv
212
- visual.blocks.20.attn.proj
213
- visual.blocks.20.mlp
214
- visual.blocks.20.mlp.fc1
215
- visual.blocks.20.mlp.act
216
- visual.blocks.20.mlp.fc2
217
- visual.blocks.21
218
- visual.blocks.21.norm1
219
- visual.blocks.21.norm2
220
- visual.blocks.21.attn
221
- visual.blocks.21.attn.qkv
222
- visual.blocks.21.attn.proj
223
- visual.blocks.21.mlp
224
- visual.blocks.21.mlp.fc1
225
- visual.blocks.21.mlp.act
226
- visual.blocks.21.mlp.fc2
227
- visual.blocks.22
228
- visual.blocks.22.norm1
229
- visual.blocks.22.norm2
230
- visual.blocks.22.attn
231
- visual.blocks.22.attn.qkv
232
- visual.blocks.22.attn.proj
233
- visual.blocks.22.mlp
234
- visual.blocks.22.mlp.fc1
235
- visual.blocks.22.mlp.act
236
- visual.blocks.22.mlp.fc2
237
- visual.blocks.23
238
- visual.blocks.23.norm1
239
- visual.blocks.23.norm2
240
- visual.blocks.23.attn
241
- visual.blocks.23.attn.qkv
242
- visual.blocks.23.attn.proj
243
- visual.blocks.23.mlp
244
- visual.blocks.23.mlp.fc1
245
- visual.blocks.23.mlp.act
246
- visual.blocks.23.mlp.fc2
247
- visual.blocks.24
248
- visual.blocks.24.norm1
249
- visual.blocks.24.norm2
250
- visual.blocks.24.attn
251
- visual.blocks.24.attn.qkv
252
- visual.blocks.24.attn.proj
253
- visual.blocks.24.mlp
254
- visual.blocks.24.mlp.fc1
255
- visual.blocks.24.mlp.act
256
- visual.blocks.24.mlp.fc2
257
- visual.blocks.25
258
- visual.blocks.25.norm1
259
- visual.blocks.25.norm2
260
- visual.blocks.25.attn
261
- visual.blocks.25.attn.qkv
262
- visual.blocks.25.attn.proj
263
- visual.blocks.25.mlp
264
- visual.blocks.25.mlp.fc1
265
- visual.blocks.25.mlp.act
266
- visual.blocks.25.mlp.fc2
267
- visual.blocks.26
268
- visual.blocks.26.norm1
269
- visual.blocks.26.norm2
270
- visual.blocks.26.attn
271
- visual.blocks.26.attn.qkv
272
- visual.blocks.26.attn.proj
273
- visual.blocks.26.mlp
274
- visual.blocks.26.mlp.fc1
275
- visual.blocks.26.mlp.act
276
- visual.blocks.26.mlp.fc2
277
- visual.blocks.27
278
- visual.blocks.27.norm1
279
- visual.blocks.27.norm2
280
- visual.blocks.27.attn
281
- visual.blocks.27.attn.qkv
282
- visual.blocks.27.attn.proj
283
- visual.blocks.27.mlp
284
- visual.blocks.27.mlp.fc1
285
- visual.blocks.27.mlp.act
286
- visual.blocks.27.mlp.fc2
287
- visual.blocks.28
288
- visual.blocks.28.norm1
289
- visual.blocks.28.norm2
290
- visual.blocks.28.attn
291
- visual.blocks.28.attn.qkv
292
- visual.blocks.28.attn.proj
293
- visual.blocks.28.mlp
294
- visual.blocks.28.mlp.fc1
295
- visual.blocks.28.mlp.act
296
- visual.blocks.28.mlp.fc2
297
- visual.blocks.29
298
- visual.blocks.29.norm1
299
- visual.blocks.29.norm2
300
- visual.blocks.29.attn
301
- visual.blocks.29.attn.qkv
302
- visual.blocks.29.attn.proj
303
- visual.blocks.29.mlp
304
- visual.blocks.29.mlp.fc1
305
- visual.blocks.29.mlp.act
306
- visual.blocks.29.mlp.fc2
307
- visual.blocks.30
308
- visual.blocks.30.norm1
309
- visual.blocks.30.norm2
310
- visual.blocks.30.attn
311
- visual.blocks.30.attn.qkv
312
- visual.blocks.30.attn.proj
313
- visual.blocks.30.mlp
314
- visual.blocks.30.mlp.fc1
315
- visual.blocks.30.mlp.act
316
- visual.blocks.30.mlp.fc2
317
- visual.blocks.31
318
- visual.blocks.31.norm1
319
- visual.blocks.31.norm2
320
- visual.blocks.31.attn
321
- visual.blocks.31.attn.qkv
322
- visual.blocks.31.attn.proj
323
- visual.blocks.31.mlp
324
- visual.blocks.31.mlp.fc1
325
- visual.blocks.31.mlp.act
326
- visual.blocks.31.mlp.fc2
327
- visual.merger
328
- visual.merger.ln_q
329
- visual.merger.mlp
330
- visual.merger.mlp.0
331
- visual.merger.mlp.1
332
- visual.merger.mlp.2
333
- model
334
  model.embed_tokens
335
- model.layers
336
- model.layers.0
337
- model.layers.0.self_attn
338
- model.layers.0.self_attn.q_proj
339
- model.layers.0.self_attn.k_proj
340
- model.layers.0.self_attn.v_proj
341
- model.layers.0.self_attn.o_proj
342
- model.layers.0.self_attn.rotary_emb
343
  model.layers.0.mlp
 
 
344
  model.layers.0.mlp.gate_proj
345
  model.layers.0.mlp.up_proj
346
- model.layers.0.mlp.down_proj
347
- model.layers.0.mlp.act_fn
348
- model.layers.0.input_layernorm
349
  model.layers.0.post_attention_layernorm
350
- model.layers.1
351
- model.layers.1.self_attn
352
- model.layers.1.self_attn.q_proj
353
- model.layers.1.self_attn.k_proj
354
- model.layers.1.self_attn.v_proj
355
- model.layers.1.self_attn.o_proj
356
- model.layers.1.self_attn.rotary_emb
357
  model.layers.1.mlp
 
 
358
  model.layers.1.mlp.gate_proj
359
  model.layers.1.mlp.up_proj
360
- model.layers.1.mlp.down_proj
361
- model.layers.1.mlp.act_fn
362
- model.layers.1.input_layernorm
363
  model.layers.1.post_attention_layernorm
364
- model.layers.2
365
- model.layers.2.self_attn
366
- model.layers.2.self_attn.q_proj
367
- model.layers.2.self_attn.k_proj
368
- model.layers.2.self_attn.v_proj
369
- model.layers.2.self_attn.o_proj
370
- model.layers.2.self_attn.rotary_emb
371
- model.layers.2.mlp
372
- model.layers.2.mlp.gate_proj
373
- model.layers.2.mlp.up_proj
374
- model.layers.2.mlp.down_proj
375
- model.layers.2.mlp.act_fn
376
- model.layers.2.input_layernorm
377
- model.layers.2.post_attention_layernorm
378
- model.layers.3
379
- model.layers.3.self_attn
380
- model.layers.3.self_attn.q_proj
381
- model.layers.3.self_attn.k_proj
382
- model.layers.3.self_attn.v_proj
383
- model.layers.3.self_attn.o_proj
384
- model.layers.3.self_attn.rotary_emb
385
- model.layers.3.mlp
386
- model.layers.3.mlp.gate_proj
387
- model.layers.3.mlp.up_proj
388
- model.layers.3.mlp.down_proj
389
- model.layers.3.mlp.act_fn
390
- model.layers.3.input_layernorm
391
- model.layers.3.post_attention_layernorm
392
- model.layers.4
393
- model.layers.4.self_attn
394
- model.layers.4.self_attn.q_proj
395
- model.layers.4.self_attn.k_proj
396
- model.layers.4.self_attn.v_proj
397
- model.layers.4.self_attn.o_proj
398
- model.layers.4.self_attn.rotary_emb
399
- model.layers.4.mlp
400
- model.layers.4.mlp.gate_proj
401
- model.layers.4.mlp.up_proj
402
- model.layers.4.mlp.down_proj
403
- model.layers.4.mlp.act_fn
404
- model.layers.4.input_layernorm
405
- model.layers.4.post_attention_layernorm
406
- model.layers.5
407
- model.layers.5.self_attn
408
- model.layers.5.self_attn.q_proj
409
- model.layers.5.self_attn.k_proj
410
- model.layers.5.self_attn.v_proj
411
- model.layers.5.self_attn.o_proj
412
- model.layers.5.self_attn.rotary_emb
413
- model.layers.5.mlp
414
- model.layers.5.mlp.gate_proj
415
- model.layers.5.mlp.up_proj
416
- model.layers.5.mlp.down_proj
417
- model.layers.5.mlp.act_fn
418
- model.layers.5.input_layernorm
419
- model.layers.5.post_attention_layernorm
420
- model.layers.6
421
- model.layers.6.self_attn
422
- model.layers.6.self_attn.q_proj
423
- model.layers.6.self_attn.k_proj
424
- model.layers.6.self_attn.v_proj
425
- model.layers.6.self_attn.o_proj
426
- model.layers.6.self_attn.rotary_emb
427
- model.layers.6.mlp
428
- model.layers.6.mlp.gate_proj
429
- model.layers.6.mlp.up_proj
430
- model.layers.6.mlp.down_proj
431
- model.layers.6.mlp.act_fn
432
- model.layers.6.input_layernorm
433
- model.layers.6.post_attention_layernorm
434
- model.layers.7
435
- model.layers.7.self_attn
436
- model.layers.7.self_attn.q_proj
437
- model.layers.7.self_attn.k_proj
438
- model.layers.7.self_attn.v_proj
439
- model.layers.7.self_attn.o_proj
440
- model.layers.7.self_attn.rotary_emb
441
- model.layers.7.mlp
442
- model.layers.7.mlp.gate_proj
443
- model.layers.7.mlp.up_proj
444
- model.layers.7.mlp.down_proj
445
- model.layers.7.mlp.act_fn
446
- model.layers.7.input_layernorm
447
- model.layers.7.post_attention_layernorm
448
- model.layers.8
449
- model.layers.8.self_attn
450
- model.layers.8.self_attn.q_proj
451
- model.layers.8.self_attn.k_proj
452
- model.layers.8.self_attn.v_proj
453
- model.layers.8.self_attn.o_proj
454
- model.layers.8.self_attn.rotary_emb
455
- model.layers.8.mlp
456
- model.layers.8.mlp.gate_proj
457
- model.layers.8.mlp.up_proj
458
- model.layers.8.mlp.down_proj
459
- model.layers.8.mlp.act_fn
460
- model.layers.8.input_layernorm
461
- model.layers.8.post_attention_layernorm
462
- model.layers.9
463
- model.layers.9.self_attn
464
- model.layers.9.self_attn.q_proj
465
- model.layers.9.self_attn.k_proj
466
- model.layers.9.self_attn.v_proj
467
- model.layers.9.self_attn.o_proj
468
- model.layers.9.self_attn.rotary_emb
469
- model.layers.9.mlp
470
- model.layers.9.mlp.gate_proj
471
- model.layers.9.mlp.up_proj
472
- model.layers.9.mlp.down_proj
473
- model.layers.9.mlp.act_fn
474
- model.layers.9.input_layernorm
475
- model.layers.9.post_attention_layernorm
476
- model.layers.10
477
- model.layers.10.self_attn
478
- model.layers.10.self_attn.q_proj
479
- model.layers.10.self_attn.k_proj
480
- model.layers.10.self_attn.v_proj
481
- model.layers.10.self_attn.o_proj
482
- model.layers.10.self_attn.rotary_emb
483
  model.layers.10.mlp
 
 
484
  model.layers.10.mlp.gate_proj
485
  model.layers.10.mlp.up_proj
486
- model.layers.10.mlp.down_proj
487
- model.layers.10.mlp.act_fn
488
- model.layers.10.input_layernorm
489
  model.layers.10.post_attention_layernorm
490
- model.layers.11
491
- model.layers.11.self_attn
492
- model.layers.11.self_attn.q_proj
493
- model.layers.11.self_attn.k_proj
494
- model.layers.11.self_attn.v_proj
495
- model.layers.11.self_attn.o_proj
496
- model.layers.11.self_attn.rotary_emb
497
  model.layers.11.mlp
 
 
498
  model.layers.11.mlp.gate_proj
499
  model.layers.11.mlp.up_proj
500
- model.layers.11.mlp.down_proj
501
- model.layers.11.mlp.act_fn
502
- model.layers.11.input_layernorm
503
  model.layers.11.post_attention_layernorm
504
- model.layers.12
505
- model.layers.12.self_attn
506
- model.layers.12.self_attn.q_proj
507
- model.layers.12.self_attn.k_proj
508
- model.layers.12.self_attn.v_proj
509
- model.layers.12.self_attn.o_proj
510
- model.layers.12.self_attn.rotary_emb
511
  model.layers.12.mlp
 
 
512
  model.layers.12.mlp.gate_proj
513
  model.layers.12.mlp.up_proj
514
- model.layers.12.mlp.down_proj
515
- model.layers.12.mlp.act_fn
516
- model.layers.12.input_layernorm
517
  model.layers.12.post_attention_layernorm
518
- model.layers.13
519
- model.layers.13.self_attn
520
- model.layers.13.self_attn.q_proj
521
- model.layers.13.self_attn.k_proj
522
- model.layers.13.self_attn.v_proj
523
- model.layers.13.self_attn.o_proj
524
- model.layers.13.self_attn.rotary_emb
525
  model.layers.13.mlp
 
 
526
  model.layers.13.mlp.gate_proj
527
  model.layers.13.mlp.up_proj
528
- model.layers.13.mlp.down_proj
529
- model.layers.13.mlp.act_fn
530
- model.layers.13.input_layernorm
531
  model.layers.13.post_attention_layernorm
532
- model.layers.14
533
- model.layers.14.self_attn
534
- model.layers.14.self_attn.q_proj
535
- model.layers.14.self_attn.k_proj
536
- model.layers.14.self_attn.v_proj
537
- model.layers.14.self_attn.o_proj
538
- model.layers.14.self_attn.rotary_emb
539
  model.layers.14.mlp
 
 
540
  model.layers.14.mlp.gate_proj
541
  model.layers.14.mlp.up_proj
542
- model.layers.14.mlp.down_proj
543
- model.layers.14.mlp.act_fn
544
- model.layers.14.input_layernorm
545
  model.layers.14.post_attention_layernorm
546
- model.layers.15
547
- model.layers.15.self_attn
548
- model.layers.15.self_attn.q_proj
549
- model.layers.15.self_attn.k_proj
550
- model.layers.15.self_attn.v_proj
551
- model.layers.15.self_attn.o_proj
552
- model.layers.15.self_attn.rotary_emb
553
  model.layers.15.mlp
 
 
554
  model.layers.15.mlp.gate_proj
555
  model.layers.15.mlp.up_proj
556
- model.layers.15.mlp.down_proj
557
- model.layers.15.mlp.act_fn
558
- model.layers.15.input_layernorm
559
  model.layers.15.post_attention_layernorm
560
- model.layers.16
561
- model.layers.16.self_attn
562
- model.layers.16.self_attn.q_proj
563
- model.layers.16.self_attn.k_proj
564
- model.layers.16.self_attn.v_proj
565
- model.layers.16.self_attn.o_proj
566
- model.layers.16.self_attn.rotary_emb
567
  model.layers.16.mlp
 
 
568
  model.layers.16.mlp.gate_proj
569
  model.layers.16.mlp.up_proj
570
- model.layers.16.mlp.down_proj
571
- model.layers.16.mlp.act_fn
572
- model.layers.16.input_layernorm
573
  model.layers.16.post_attention_layernorm
574
- model.layers.17
575
- model.layers.17.self_attn
576
- model.layers.17.self_attn.q_proj
577
- model.layers.17.self_attn.k_proj
578
- model.layers.17.self_attn.v_proj
579
- model.layers.17.self_attn.o_proj
580
- model.layers.17.self_attn.rotary_emb
581
  model.layers.17.mlp
 
 
582
  model.layers.17.mlp.gate_proj
583
  model.layers.17.mlp.up_proj
584
- model.layers.17.mlp.down_proj
585
- model.layers.17.mlp.act_fn
586
- model.layers.17.input_layernorm
587
  model.layers.17.post_attention_layernorm
588
- model.layers.18
589
- model.layers.18.self_attn
590
- model.layers.18.self_attn.q_proj
591
- model.layers.18.self_attn.k_proj
592
- model.layers.18.self_attn.v_proj
593
- model.layers.18.self_attn.o_proj
594
- model.layers.18.self_attn.rotary_emb
595
  model.layers.18.mlp
 
 
596
  model.layers.18.mlp.gate_proj
597
  model.layers.18.mlp.up_proj
598
- model.layers.18.mlp.down_proj
599
- model.layers.18.mlp.act_fn
600
- model.layers.18.input_layernorm
601
  model.layers.18.post_attention_layernorm
602
- model.layers.19
603
- model.layers.19.self_attn
604
- model.layers.19.self_attn.q_proj
605
- model.layers.19.self_attn.k_proj
606
- model.layers.19.self_attn.v_proj
607
- model.layers.19.self_attn.o_proj
608
- model.layers.19.self_attn.rotary_emb
609
  model.layers.19.mlp
 
 
610
  model.layers.19.mlp.gate_proj
611
  model.layers.19.mlp.up_proj
612
- model.layers.19.mlp.down_proj
613
- model.layers.19.mlp.act_fn
614
- model.layers.19.input_layernorm
615
  model.layers.19.post_attention_layernorm
616
- model.layers.20
617
- model.layers.20.self_attn
618
- model.layers.20.self_attn.q_proj
619
- model.layers.20.self_attn.k_proj
620
- model.layers.20.self_attn.v_proj
621
- model.layers.20.self_attn.o_proj
622
- model.layers.20.self_attn.rotary_emb
 
 
 
 
 
 
 
 
 
623
  model.layers.20.mlp
624
- model.layers.20.mlp.gate_proj
625
- model.layers.20.mlp.up_proj
626
- model.layers.20.mlp.down_proj
627
  model.layers.20.mlp.act_fn
628
- model.layers.20.input_layernorm
 
 
629
  model.layers.20.post_attention_layernorm
630
- model.layers.21
631
- model.layers.21.self_attn
632
- model.layers.21.self_attn.q_proj
633
- model.layers.21.self_attn.k_proj
634
- model.layers.21.self_attn.v_proj
635
- model.layers.21.self_attn.o_proj
636
- model.layers.21.self_attn.rotary_emb
637
  model.layers.21.mlp
 
 
638
  model.layers.21.mlp.gate_proj
639
  model.layers.21.mlp.up_proj
640
- model.layers.21.mlp.down_proj
641
- model.layers.21.mlp.act_fn
642
- model.layers.21.input_layernorm
643
  model.layers.21.post_attention_layernorm
644
- model.layers.22
645
- model.layers.22.self_attn
646
- model.layers.22.self_attn.q_proj
647
- model.layers.22.self_attn.k_proj
648
- model.layers.22.self_attn.v_proj
649
- model.layers.22.self_attn.o_proj
650
- model.layers.22.self_attn.rotary_emb
651
  model.layers.22.mlp
 
 
652
  model.layers.22.mlp.gate_proj
653
  model.layers.22.mlp.up_proj
654
- model.layers.22.mlp.down_proj
655
- model.layers.22.mlp.act_fn
656
- model.layers.22.input_layernorm
657
  model.layers.22.post_attention_layernorm
658
- model.layers.23
659
- model.layers.23.self_attn
660
- model.layers.23.self_attn.q_proj
661
- model.layers.23.self_attn.k_proj
662
- model.layers.23.self_attn.v_proj
663
- model.layers.23.self_attn.o_proj
664
- model.layers.23.self_attn.rotary_emb
665
  model.layers.23.mlp
 
 
666
  model.layers.23.mlp.gate_proj
667
  model.layers.23.mlp.up_proj
668
- model.layers.23.mlp.down_proj
669
- model.layers.23.mlp.act_fn
670
- model.layers.23.input_layernorm
671
  model.layers.23.post_attention_layernorm
672
- model.layers.24
673
- model.layers.24.self_attn
674
- model.layers.24.self_attn.q_proj
675
- model.layers.24.self_attn.k_proj
676
- model.layers.24.self_attn.v_proj
677
- model.layers.24.self_attn.o_proj
678
- model.layers.24.self_attn.rotary_emb
679
  model.layers.24.mlp
 
 
680
  model.layers.24.mlp.gate_proj
681
  model.layers.24.mlp.up_proj
682
- model.layers.24.mlp.down_proj
683
- model.layers.24.mlp.act_fn
684
- model.layers.24.input_layernorm
685
  model.layers.24.post_attention_layernorm
686
- model.layers.25
687
- model.layers.25.self_attn
688
- model.layers.25.self_attn.q_proj
689
- model.layers.25.self_attn.k_proj
690
- model.layers.25.self_attn.v_proj
691
- model.layers.25.self_attn.o_proj
692
- model.layers.25.self_attn.rotary_emb
693
  model.layers.25.mlp
 
 
694
  model.layers.25.mlp.gate_proj
695
  model.layers.25.mlp.up_proj
696
- model.layers.25.mlp.down_proj
697
- model.layers.25.mlp.act_fn
698
- model.layers.25.input_layernorm
699
  model.layers.25.post_attention_layernorm
700
- model.layers.26
701
- model.layers.26.self_attn
702
- model.layers.26.self_attn.q_proj
703
- model.layers.26.self_attn.k_proj
704
- model.layers.26.self_attn.v_proj
705
- model.layers.26.self_attn.o_proj
706
- model.layers.26.self_attn.rotary_emb
707
  model.layers.26.mlp
 
 
708
  model.layers.26.mlp.gate_proj
709
  model.layers.26.mlp.up_proj
710
- model.layers.26.mlp.down_proj
711
- model.layers.26.mlp.act_fn
712
- model.layers.26.input_layernorm
713
  model.layers.26.post_attention_layernorm
714
- model.layers.27
715
- model.layers.27.self_attn
716
- model.layers.27.self_attn.q_proj
717
- model.layers.27.self_attn.k_proj
718
- model.layers.27.self_attn.v_proj
719
- model.layers.27.self_attn.o_proj
720
- model.layers.27.self_attn.rotary_emb
721
  model.layers.27.mlp
 
 
722
  model.layers.27.mlp.gate_proj
723
  model.layers.27.mlp.up_proj
724
- model.layers.27.mlp.down_proj
725
- model.layers.27.mlp.act_fn
726
- model.layers.27.input_layernorm
727
  model.layers.27.post_attention_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  model.norm
729
- model.rotary_emb
730
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  model.embed_tokens
3
+ model.layers.0.input_layernorm
 
 
 
 
 
 
 
4
  model.layers.0.mlp
5
+ model.layers.0.mlp.act_fn
6
+ model.layers.0.mlp.down_proj
7
  model.layers.0.mlp.gate_proj
8
  model.layers.0.mlp.up_proj
 
 
 
9
  model.layers.0.post_attention_layernorm
10
+ model.layers.0.self_attn.k_proj
11
+ model.layers.0.self_attn.o_proj
12
+ model.layers.0.self_attn.q_proj
13
+ model.layers.0.self_attn.v_proj
14
+ model.layers.1.input_layernorm
 
 
15
  model.layers.1.mlp
16
+ model.layers.1.mlp.act_fn
17
+ model.layers.1.mlp.down_proj
18
  model.layers.1.mlp.gate_proj
19
  model.layers.1.mlp.up_proj
 
 
 
20
  model.layers.1.post_attention_layernorm
21
+ model.layers.1.self_attn.k_proj
22
+ model.layers.1.self_attn.o_proj
23
+ model.layers.1.self_attn.q_proj
24
+ model.layers.1.self_attn.v_proj
25
+ model.layers.10.input_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  model.layers.10.mlp
27
+ model.layers.10.mlp.act_fn
28
+ model.layers.10.mlp.down_proj
29
  model.layers.10.mlp.gate_proj
30
  model.layers.10.mlp.up_proj
 
 
 
31
  model.layers.10.post_attention_layernorm
32
+ model.layers.10.self_attn.k_proj
33
+ model.layers.10.self_attn.o_proj
34
+ model.layers.10.self_attn.q_proj
35
+ model.layers.10.self_attn.v_proj
36
+ model.layers.11.input_layernorm
 
 
37
  model.layers.11.mlp
38
+ model.layers.11.mlp.act_fn
39
+ model.layers.11.mlp.down_proj
40
  model.layers.11.mlp.gate_proj
41
  model.layers.11.mlp.up_proj
 
 
 
42
  model.layers.11.post_attention_layernorm
43
+ model.layers.11.self_attn.k_proj
44
+ model.layers.11.self_attn.o_proj
45
+ model.layers.11.self_attn.q_proj
46
+ model.layers.11.self_attn.v_proj
47
+ model.layers.12.input_layernorm
 
 
48
  model.layers.12.mlp
49
+ model.layers.12.mlp.act_fn
50
+ model.layers.12.mlp.down_proj
51
  model.layers.12.mlp.gate_proj
52
  model.layers.12.mlp.up_proj
 
 
 
53
  model.layers.12.post_attention_layernorm
54
+ model.layers.12.self_attn.k_proj
55
+ model.layers.12.self_attn.o_proj
56
+ model.layers.12.self_attn.q_proj
57
+ model.layers.12.self_attn.v_proj
58
+ model.layers.13.input_layernorm
 
 
59
  model.layers.13.mlp
60
+ model.layers.13.mlp.act_fn
61
+ model.layers.13.mlp.down_proj
62
  model.layers.13.mlp.gate_proj
63
  model.layers.13.mlp.up_proj
 
 
 
64
  model.layers.13.post_attention_layernorm
65
+ model.layers.13.self_attn.k_proj
66
+ model.layers.13.self_attn.o_proj
67
+ model.layers.13.self_attn.q_proj
68
+ model.layers.13.self_attn.v_proj
69
+ model.layers.14.input_layernorm
 
 
70
  model.layers.14.mlp
71
+ model.layers.14.mlp.act_fn
72
+ model.layers.14.mlp.down_proj
73
  model.layers.14.mlp.gate_proj
74
  model.layers.14.mlp.up_proj
 
 
 
75
  model.layers.14.post_attention_layernorm
76
+ model.layers.14.self_attn.k_proj
77
+ model.layers.14.self_attn.o_proj
78
+ model.layers.14.self_attn.q_proj
79
+ model.layers.14.self_attn.v_proj
80
+ model.layers.15.input_layernorm
 
 
81
  model.layers.15.mlp
82
+ model.layers.15.mlp.act_fn
83
+ model.layers.15.mlp.down_proj
84
  model.layers.15.mlp.gate_proj
85
  model.layers.15.mlp.up_proj
 
 
 
86
  model.layers.15.post_attention_layernorm
87
+ model.layers.15.self_attn.k_proj
88
+ model.layers.15.self_attn.o_proj
89
+ model.layers.15.self_attn.q_proj
90
+ model.layers.15.self_attn.v_proj
91
+ model.layers.16.input_layernorm
 
 
92
  model.layers.16.mlp
93
+ model.layers.16.mlp.act_fn
94
+ model.layers.16.mlp.down_proj
95
  model.layers.16.mlp.gate_proj
96
  model.layers.16.mlp.up_proj
 
 
 
97
  model.layers.16.post_attention_layernorm
98
+ model.layers.16.self_attn.k_proj
99
+ model.layers.16.self_attn.o_proj
100
+ model.layers.16.self_attn.q_proj
101
+ model.layers.16.self_attn.v_proj
102
+ model.layers.17.input_layernorm
 
 
103
  model.layers.17.mlp
104
+ model.layers.17.mlp.act_fn
105
+ model.layers.17.mlp.down_proj
106
  model.layers.17.mlp.gate_proj
107
  model.layers.17.mlp.up_proj
 
 
 
108
  model.layers.17.post_attention_layernorm
109
+ model.layers.17.self_attn.k_proj
110
+ model.layers.17.self_attn.o_proj
111
+ model.layers.17.self_attn.q_proj
112
+ model.layers.17.self_attn.v_proj
113
+ model.layers.18.input_layernorm
 
 
114
  model.layers.18.mlp
115
+ model.layers.18.mlp.act_fn
116
+ model.layers.18.mlp.down_proj
117
  model.layers.18.mlp.gate_proj
118
  model.layers.18.mlp.up_proj
 
 
 
119
  model.layers.18.post_attention_layernorm
120
+ model.layers.18.self_attn.k_proj
121
+ model.layers.18.self_attn.o_proj
122
+ model.layers.18.self_attn.q_proj
123
+ model.layers.18.self_attn.v_proj
124
+ model.layers.19.input_layernorm
 
 
125
  model.layers.19.mlp
126
+ model.layers.19.mlp.act_fn
127
+ model.layers.19.mlp.down_proj
128
  model.layers.19.mlp.gate_proj
129
  model.layers.19.mlp.up_proj
 
 
 
130
  model.layers.19.post_attention_layernorm
131
+ model.layers.19.self_attn.k_proj
132
+ model.layers.19.self_attn.o_proj
133
+ model.layers.19.self_attn.q_proj
134
+ model.layers.19.self_attn.v_proj
135
+ model.layers.2.input_layernorm
136
+ model.layers.2.mlp
137
+ model.layers.2.mlp.act_fn
138
+ model.layers.2.mlp.down_proj
139
+ model.layers.2.mlp.gate_proj
140
+ model.layers.2.mlp.up_proj
141
+ model.layers.2.post_attention_layernorm
142
+ model.layers.2.self_attn.k_proj
143
+ model.layers.2.self_attn.o_proj
144
+ model.layers.2.self_attn.q_proj
145
+ model.layers.2.self_attn.v_proj
146
+ model.layers.20.input_layernorm
147
  model.layers.20.mlp
 
 
 
148
  model.layers.20.mlp.act_fn
149
+ model.layers.20.mlp.down_proj
150
+ model.layers.20.mlp.gate_proj
151
+ model.layers.20.mlp.up_proj
152
  model.layers.20.post_attention_layernorm
153
+ model.layers.20.self_attn.k_proj
154
+ model.layers.20.self_attn.o_proj
155
+ model.layers.20.self_attn.q_proj
156
+ model.layers.20.self_attn.v_proj
157
+ model.layers.21.input_layernorm
 
 
158
  model.layers.21.mlp
159
+ model.layers.21.mlp.act_fn
160
+ model.layers.21.mlp.down_proj
161
  model.layers.21.mlp.gate_proj
162
  model.layers.21.mlp.up_proj
 
 
 
163
  model.layers.21.post_attention_layernorm
164
+ model.layers.21.self_attn.k_proj
165
+ model.layers.21.self_attn.o_proj
166
+ model.layers.21.self_attn.q_proj
167
+ model.layers.21.self_attn.v_proj
168
+ model.layers.22.input_layernorm
 
 
169
  model.layers.22.mlp
170
+ model.layers.22.mlp.act_fn
171
+ model.layers.22.mlp.down_proj
172
  model.layers.22.mlp.gate_proj
173
  model.layers.22.mlp.up_proj
 
 
 
174
  model.layers.22.post_attention_layernorm
175
+ model.layers.22.self_attn.k_proj
176
+ model.layers.22.self_attn.o_proj
177
+ model.layers.22.self_attn.q_proj
178
+ model.layers.22.self_attn.v_proj
179
+ model.layers.23.input_layernorm
 
 
180
  model.layers.23.mlp
181
+ model.layers.23.mlp.act_fn
182
+ model.layers.23.mlp.down_proj
183
  model.layers.23.mlp.gate_proj
184
  model.layers.23.mlp.up_proj
 
 
 
185
  model.layers.23.post_attention_layernorm
186
+ model.layers.23.self_attn.k_proj
187
+ model.layers.23.self_attn.o_proj
188
+ model.layers.23.self_attn.q_proj
189
+ model.layers.23.self_attn.v_proj
190
+ model.layers.24.input_layernorm
 
 
191
  model.layers.24.mlp
192
+ model.layers.24.mlp.act_fn
193
+ model.layers.24.mlp.down_proj
194
  model.layers.24.mlp.gate_proj
195
  model.layers.24.mlp.up_proj
 
 
 
196
  model.layers.24.post_attention_layernorm
197
+ model.layers.24.self_attn.k_proj
198
+ model.layers.24.self_attn.o_proj
199
+ model.layers.24.self_attn.q_proj
200
+ model.layers.24.self_attn.v_proj
201
+ model.layers.25.input_layernorm
 
 
202
  model.layers.25.mlp
203
+ model.layers.25.mlp.act_fn
204
+ model.layers.25.mlp.down_proj
205
  model.layers.25.mlp.gate_proj
206
  model.layers.25.mlp.up_proj
 
 
 
207
  model.layers.25.post_attention_layernorm
208
+ model.layers.25.self_attn.k_proj
209
+ model.layers.25.self_attn.o_proj
210
+ model.layers.25.self_attn.q_proj
211
+ model.layers.25.self_attn.v_proj
212
+ model.layers.26.input_layernorm
 
 
213
  model.layers.26.mlp
214
+ model.layers.26.mlp.act_fn
215
+ model.layers.26.mlp.down_proj
216
  model.layers.26.mlp.gate_proj
217
  model.layers.26.mlp.up_proj
 
 
 
218
  model.layers.26.post_attention_layernorm
219
+ model.layers.26.self_attn.k_proj
220
+ model.layers.26.self_attn.o_proj
221
+ model.layers.26.self_attn.q_proj
222
+ model.layers.26.self_attn.v_proj
223
+ model.layers.27.input_layernorm
 
 
224
  model.layers.27.mlp
225
+ model.layers.27.mlp.act_fn
226
+ model.layers.27.mlp.down_proj
227
  model.layers.27.mlp.gate_proj
228
  model.layers.27.mlp.up_proj
 
 
 
229
  model.layers.27.post_attention_layernorm
230
+ model.layers.27.self_attn.k_proj
231
+ model.layers.27.self_attn.o_proj
232
+ model.layers.27.self_attn.q_proj
233
+ model.layers.27.self_attn.v_proj
234
+ model.layers.3.input_layernorm
235
+ model.layers.3.mlp
236
+ model.layers.3.mlp.act_fn
237
+ model.layers.3.mlp.down_proj
238
+ model.layers.3.mlp.gate_proj
239
+ model.layers.3.mlp.up_proj
240
+ model.layers.3.post_attention_layernorm
241
+ model.layers.3.self_attn.k_proj
242
+ model.layers.3.self_attn.o_proj
243
+ model.layers.3.self_attn.q_proj
244
+ model.layers.3.self_attn.v_proj
245
+ model.layers.4.input_layernorm
246
+ model.layers.4.mlp
247
+ model.layers.4.mlp.act_fn
248
+ model.layers.4.mlp.down_proj
249
+ model.layers.4.mlp.gate_proj
250
+ model.layers.4.mlp.up_proj
251
+ model.layers.4.post_attention_layernorm
252
+ model.layers.4.self_attn.k_proj
253
+ model.layers.4.self_attn.o_proj
254
+ model.layers.4.self_attn.q_proj
255
+ model.layers.4.self_attn.v_proj
256
+ model.layers.5.input_layernorm
257
+ model.layers.5.mlp
258
+ model.layers.5.mlp.act_fn
259
+ model.layers.5.mlp.down_proj
260
+ model.layers.5.mlp.gate_proj
261
+ model.layers.5.mlp.up_proj
262
+ model.layers.5.post_attention_layernorm
263
+ model.layers.5.self_attn.k_proj
264
+ model.layers.5.self_attn.o_proj
265
+ model.layers.5.self_attn.q_proj
266
+ model.layers.5.self_attn.v_proj
267
+ model.layers.6.input_layernorm
268
+ model.layers.6.mlp
269
+ model.layers.6.mlp.act_fn
270
+ model.layers.6.mlp.down_proj
271
+ model.layers.6.mlp.gate_proj
272
+ model.layers.6.mlp.up_proj
273
+ model.layers.6.post_attention_layernorm
274
+ model.layers.6.self_attn.k_proj
275
+ model.layers.6.self_attn.o_proj
276
+ model.layers.6.self_attn.q_proj
277
+ model.layers.6.self_attn.v_proj
278
+ model.layers.7.input_layernorm
279
+ model.layers.7.mlp
280
+ model.layers.7.mlp.act_fn
281
+ model.layers.7.mlp.down_proj
282
+ model.layers.7.mlp.gate_proj
283
+ model.layers.7.mlp.up_proj
284
+ model.layers.7.post_attention_layernorm
285
+ model.layers.7.self_attn.k_proj
286
+ model.layers.7.self_attn.o_proj
287
+ model.layers.7.self_attn.q_proj
288
+ model.layers.7.self_attn.v_proj
289
+ model.layers.8.input_layernorm
290
+ model.layers.8.mlp
291
+ model.layers.8.mlp.act_fn
292
+ model.layers.8.mlp.down_proj
293
+ model.layers.8.mlp.gate_proj
294
+ model.layers.8.mlp.up_proj
295
+ model.layers.8.post_attention_layernorm
296
+ model.layers.8.self_attn.k_proj
297
+ model.layers.8.self_attn.o_proj
298
+ model.layers.8.self_attn.q_proj
299
+ model.layers.8.self_attn.v_proj
300
+ model.layers.9.input_layernorm
301
+ model.layers.9.mlp
302
+ model.layers.9.mlp.act_fn
303
+ model.layers.9.mlp.down_proj
304
+ model.layers.9.mlp.gate_proj
305
+ model.layers.9.mlp.up_proj
306
+ model.layers.9.post_attention_layernorm
307
+ model.layers.9.self_attn.k_proj
308
+ model.layers.9.self_attn.o_proj
309
+ model.layers.9.self_attn.q_proj
310
+ model.layers.9.self_attn.v_proj
311
  model.norm
312
+ visual
313
+ visual.blocks.0
314
+ visual.blocks.0.attn
315
+ visual.blocks.0.attn.proj
316
+ visual.blocks.0.attn.qkv
317
+ visual.blocks.0.mlp
318
+ visual.blocks.0.mlp.act
319
+ visual.blocks.0.mlp.fc1
320
+ visual.blocks.0.mlp.fc2
321
+ visual.blocks.0.norm1
322
+ visual.blocks.0.norm2
323
+ visual.blocks.1
324
+ visual.blocks.1.attn
325
+ visual.blocks.1.attn.proj
326
+ visual.blocks.1.attn.qkv
327
+ visual.blocks.1.mlp
328
+ visual.blocks.1.mlp.act
329
+ visual.blocks.1.mlp.fc1
330
+ visual.blocks.1.mlp.fc2
331
+ visual.blocks.1.norm1
332
+ visual.blocks.1.norm2
333
+ visual.blocks.10
334
+ visual.blocks.10.attn
335
+ visual.blocks.10.attn.proj
336
+ visual.blocks.10.attn.qkv
337
+ visual.blocks.10.mlp
338
+ visual.blocks.10.mlp.act
339
+ visual.blocks.10.mlp.fc1
340
+ visual.blocks.10.mlp.fc2
341
+ visual.blocks.10.norm1
342
+ visual.blocks.10.norm2
343
+ visual.blocks.11
344
+ visual.blocks.11.attn
345
+ visual.blocks.11.attn.proj
346
+ visual.blocks.11.attn.qkv
347
+ visual.blocks.11.mlp
348
+ visual.blocks.11.mlp.act
349
+ visual.blocks.11.mlp.fc1
350
+ visual.blocks.11.mlp.fc2
351
+ visual.blocks.11.norm1
352
+ visual.blocks.11.norm2
353
+ visual.blocks.12
354
+ visual.blocks.12.attn
355
+ visual.blocks.12.attn.proj
356
+ visual.blocks.12.attn.qkv
357
+ visual.blocks.12.mlp
358
+ visual.blocks.12.mlp.act
359
+ visual.blocks.12.mlp.fc1
360
+ visual.blocks.12.mlp.fc2
361
+ visual.blocks.12.norm1
362
+ visual.blocks.12.norm2
363
+ visual.blocks.13
364
+ visual.blocks.13.attn
365
+ visual.blocks.13.attn.proj
366
+ visual.blocks.13.attn.qkv
367
+ visual.blocks.13.mlp
368
+ visual.blocks.13.mlp.act
369
+ visual.blocks.13.mlp.fc1
370
+ visual.blocks.13.mlp.fc2
371
+ visual.blocks.13.norm1
372
+ visual.blocks.13.norm2
373
+ visual.blocks.14
374
+ visual.blocks.14.attn
375
+ visual.blocks.14.attn.proj
376
+ visual.blocks.14.attn.qkv
377
+ visual.blocks.14.mlp
378
+ visual.blocks.14.mlp.act
379
+ visual.blocks.14.mlp.fc1
380
+ visual.blocks.14.mlp.fc2
381
+ visual.blocks.14.norm1
382
+ visual.blocks.14.norm2
383
+ visual.blocks.15
384
+ visual.blocks.15.attn
385
+ visual.blocks.15.attn.proj
386
+ visual.blocks.15.attn.qkv
387
+ visual.blocks.15.mlp
388
+ visual.blocks.15.mlp.act
389
+ visual.blocks.15.mlp.fc1
390
+ visual.blocks.15.mlp.fc2
391
+ visual.blocks.15.norm1
392
+ visual.blocks.15.norm2
393
+ visual.blocks.16
394
+ visual.blocks.16.attn
395
+ visual.blocks.16.attn.proj
396
+ visual.blocks.16.attn.qkv
397
+ visual.blocks.16.mlp
398
+ visual.blocks.16.mlp.act
399
+ visual.blocks.16.mlp.fc1
400
+ visual.blocks.16.mlp.fc2
401
+ visual.blocks.16.norm1
402
+ visual.blocks.16.norm2
403
+ visual.blocks.17
404
+ visual.blocks.17.attn
405
+ visual.blocks.17.attn.proj
406
+ visual.blocks.17.attn.qkv
407
+ visual.blocks.17.mlp
408
+ visual.blocks.17.mlp.act
409
+ visual.blocks.17.mlp.fc1
410
+ visual.blocks.17.mlp.fc2
411
+ visual.blocks.17.norm1
412
+ visual.blocks.17.norm2
413
+ visual.blocks.18
414
+ visual.blocks.18.attn
415
+ visual.blocks.18.attn.proj
416
+ visual.blocks.18.attn.qkv
417
+ visual.blocks.18.mlp
418
+ visual.blocks.18.mlp.act
419
+ visual.blocks.18.mlp.fc1
420
+ visual.blocks.18.mlp.fc2
421
+ visual.blocks.18.norm1
422
+ visual.blocks.18.norm2
423
+ visual.blocks.19
424
+ visual.blocks.19.attn
425
+ visual.blocks.19.attn.proj
426
+ visual.blocks.19.attn.qkv
427
+ visual.blocks.19.mlp
428
+ visual.blocks.19.mlp.act
429
+ visual.blocks.19.mlp.fc1
430
+ visual.blocks.19.mlp.fc2
431
+ visual.blocks.19.norm1
432
+ visual.blocks.19.norm2
433
+ visual.blocks.2
434
+ visual.blocks.2.attn
435
+ visual.blocks.2.attn.proj
436
+ visual.blocks.2.attn.qkv
437
+ visual.blocks.2.mlp
438
+ visual.blocks.2.mlp.act
439
+ visual.blocks.2.mlp.fc1
440
+ visual.blocks.2.mlp.fc2
441
+ visual.blocks.2.norm1
442
+ visual.blocks.2.norm2
443
+ visual.blocks.20
444
+ visual.blocks.20.attn
445
+ visual.blocks.20.attn.proj
446
+ visual.blocks.20.attn.qkv
447
+ visual.blocks.20.mlp
448
+ visual.blocks.20.mlp.act
449
+ visual.blocks.20.mlp.fc1
450
+ visual.blocks.20.mlp.fc2
451
+ visual.blocks.20.norm1
452
+ visual.blocks.20.norm2
453
+ visual.blocks.21
454
+ visual.blocks.21.attn
455
+ visual.blocks.21.attn.proj
456
+ visual.blocks.21.attn.qkv
457
+ visual.blocks.21.mlp
458
+ visual.blocks.21.mlp.act
459
+ visual.blocks.21.mlp.fc1
460
+ visual.blocks.21.mlp.fc2
461
+ visual.blocks.21.norm1
462
+ visual.blocks.21.norm2
463
+ visual.blocks.22
464
+ visual.blocks.22.attn
465
+ visual.blocks.22.attn.proj
466
+ visual.blocks.22.attn.qkv
467
+ visual.blocks.22.mlp
468
+ visual.blocks.22.mlp.act
469
+ visual.blocks.22.mlp.fc1
470
+ visual.blocks.22.mlp.fc2
471
+ visual.blocks.22.norm1
472
+ visual.blocks.22.norm2
473
+ visual.blocks.23
474
+ visual.blocks.23.attn
475
+ visual.blocks.23.attn.proj
476
+ visual.blocks.23.attn.qkv
477
+ visual.blocks.23.mlp
478
+ visual.blocks.23.mlp.act
479
+ visual.blocks.23.mlp.fc1
480
+ visual.blocks.23.mlp.fc2
481
+ visual.blocks.23.norm1
482
+ visual.blocks.23.norm2
483
+ visual.blocks.24
484
+ visual.blocks.24.attn
485
+ visual.blocks.24.attn.proj
486
+ visual.blocks.24.attn.qkv
487
+ visual.blocks.24.mlp
488
+ visual.blocks.24.mlp.act
489
+ visual.blocks.24.mlp.fc1
490
+ visual.blocks.24.mlp.fc2
491
+ visual.blocks.24.norm1
492
+ visual.blocks.24.norm2
493
+ visual.blocks.25
494
+ visual.blocks.25.attn
495
+ visual.blocks.25.attn.proj
496
+ visual.blocks.25.attn.qkv
497
+ visual.blocks.25.mlp
498
+ visual.blocks.25.mlp.act
499
+ visual.blocks.25.mlp.fc1
500
+ visual.blocks.25.mlp.fc2
501
+ visual.blocks.25.norm1
502
+ visual.blocks.25.norm2
503
+ visual.blocks.26
504
+ visual.blocks.26.attn
505
+ visual.blocks.26.attn.proj
506
+ visual.blocks.26.attn.qkv
507
+ visual.blocks.26.mlp
508
+ visual.blocks.26.mlp.act
509
+ visual.blocks.26.mlp.fc1
510
+ visual.blocks.26.mlp.fc2
511
+ visual.blocks.26.norm1
512
+ visual.blocks.26.norm2
513
+ visual.blocks.27
514
+ visual.blocks.27.attn
515
+ visual.blocks.27.attn.proj
516
+ visual.blocks.27.attn.qkv
517
+ visual.blocks.27.mlp
518
+ visual.blocks.27.mlp.act
519
+ visual.blocks.27.mlp.fc1
520
+ visual.blocks.27.mlp.fc2
521
+ visual.blocks.27.norm1
522
+ visual.blocks.27.norm2
523
+ visual.blocks.28
524
+ visual.blocks.28.attn
525
+ visual.blocks.28.attn.proj
526
+ visual.blocks.28.attn.qkv
527
+ visual.blocks.28.mlp
528
+ visual.blocks.28.mlp.act
529
+ visual.blocks.28.mlp.fc1
530
+ visual.blocks.28.mlp.fc2
531
+ visual.blocks.28.norm1
532
+ visual.blocks.28.norm2
533
+ visual.blocks.29
534
+ visual.blocks.29.attn
535
+ visual.blocks.29.attn.proj
536
+ visual.blocks.29.attn.qkv
537
+ visual.blocks.29.mlp
538
+ visual.blocks.29.mlp.act
539
+ visual.blocks.29.mlp.fc1
540
+ visual.blocks.29.mlp.fc2
541
+ visual.blocks.29.norm1
542
+ visual.blocks.29.norm2
543
+ visual.blocks.3
544
+ visual.blocks.3.attn
545
+ visual.blocks.3.attn.proj
546
+ visual.blocks.3.attn.qkv
547
+ visual.blocks.3.mlp
548
+ visual.blocks.3.mlp.act
549
+ visual.blocks.3.mlp.fc1
550
+ visual.blocks.3.mlp.fc2
551
+ visual.blocks.3.norm1
552
+ visual.blocks.3.norm2
553
+ visual.blocks.30
554
+ visual.blocks.30.attn
555
+ visual.blocks.30.attn.proj
556
+ visual.blocks.30.attn.qkv
557
+ visual.blocks.30.mlp
558
+ visual.blocks.30.mlp.act
559
+ visual.blocks.30.mlp.fc1
560
+ visual.blocks.30.mlp.fc2
561
+ visual.blocks.30.norm1
562
+ visual.blocks.30.norm2
563
+ visual.blocks.31
564
+ visual.blocks.31.attn
565
+ visual.blocks.31.attn.proj
566
+ visual.blocks.31.attn.qkv
567
+ visual.blocks.31.mlp
568
+ visual.blocks.31.mlp.act
569
+ visual.blocks.31.mlp.fc1
570
+ visual.blocks.31.mlp.fc2
571
+ visual.blocks.31.norm1
572
+ visual.blocks.31.norm2
573
+ visual.blocks.4
574
+ visual.blocks.4.attn
575
+ visual.blocks.4.attn.proj
576
+ visual.blocks.4.attn.qkv
577
+ visual.blocks.4.mlp
578
+ visual.blocks.4.mlp.act
579
+ visual.blocks.4.mlp.fc1
580
+ visual.blocks.4.mlp.fc2
581
+ visual.blocks.4.norm1
582
+ visual.blocks.4.norm2
583
+ visual.blocks.5
584
+ visual.blocks.5.attn
585
+ visual.blocks.5.attn.proj
586
+ visual.blocks.5.attn.qkv
587
+ visual.blocks.5.mlp
588
+ visual.blocks.5.mlp.act
589
+ visual.blocks.5.mlp.fc1
590
+ visual.blocks.5.mlp.fc2
591
+ visual.blocks.5.norm1
592
+ visual.blocks.5.norm2
593
+ visual.blocks.6
594
+ visual.blocks.6.attn
595
+ visual.blocks.6.attn.proj
596
+ visual.blocks.6.attn.qkv
597
+ visual.blocks.6.mlp
598
+ visual.blocks.6.mlp.act
599
+ visual.blocks.6.mlp.fc1
600
+ visual.blocks.6.mlp.fc2
601
+ visual.blocks.6.norm1
602
+ visual.blocks.6.norm2
603
+ visual.blocks.7
604
+ visual.blocks.7.attn
605
+ visual.blocks.7.attn.proj
606
+ visual.blocks.7.attn.qkv
607
+ visual.blocks.7.mlp
608
+ visual.blocks.7.mlp.act
609
+ visual.blocks.7.mlp.fc1
610
+ visual.blocks.7.mlp.fc2
611
+ visual.blocks.7.norm1
612
+ visual.blocks.7.norm2
613
+ visual.blocks.8
614
+ visual.blocks.8.attn
615
+ visual.blocks.8.attn.proj
616
+ visual.blocks.8.attn.qkv
617
+ visual.blocks.8.mlp
618
+ visual.blocks.8.mlp.act
619
+ visual.blocks.8.mlp.fc1
620
+ visual.blocks.8.mlp.fc2
621
+ visual.blocks.8.norm1
622
+ visual.blocks.8.norm2
623
+ visual.blocks.9
624
+ visual.blocks.9.attn
625
+ visual.blocks.9.attn.proj
626
+ visual.blocks.9.attn.qkv
627
+ visual.blocks.9.mlp
628
+ visual.blocks.9.mlp.act
629
+ visual.blocks.9.mlp.fc1
630
+ visual.blocks.9.mlp.fc2
631
+ visual.blocks.9.norm1
632
+ visual.blocks.9.norm2
633
+ visual.merger
634
+ visual.merger.ln_q
635
+ visual.merger.mlp
636
+ visual.merger.mlp.0
637
+ visual.merger.mlp.1
638
+ visual.merger.mlp.2
639
+ visual.patch_embed
640
+ visual.patch_embed.proj
641
+ visual.rotary_pos_emb
{logs β†’ demo/logs}/Qwen/Qwen2-VL-7B-Instruct.txt RENAMED
@@ -1,730 +1,641 @@
1
-
2
- visual
3
- visual.patch_embed
4
- visual.patch_embed.proj
5
- visual.rotary_pos_emb
6
- visual.blocks
7
- visual.blocks.0
8
- visual.blocks.0.norm1
9
- visual.blocks.0.norm2
10
- visual.blocks.0.attn
11
- visual.blocks.0.attn.qkv
12
- visual.blocks.0.attn.proj
13
- visual.blocks.0.mlp
14
- visual.blocks.0.mlp.fc1
15
- visual.blocks.0.mlp.act
16
- visual.blocks.0.mlp.fc2
17
- visual.blocks.1
18
- visual.blocks.1.norm1
19
- visual.blocks.1.norm2
20
- visual.blocks.1.attn
21
- visual.blocks.1.attn.qkv
22
- visual.blocks.1.attn.proj
23
- visual.blocks.1.mlp
24
- visual.blocks.1.mlp.fc1
25
- visual.blocks.1.mlp.act
26
- visual.blocks.1.mlp.fc2
27
- visual.blocks.2
28
- visual.blocks.2.norm1
29
- visual.blocks.2.norm2
30
- visual.blocks.2.attn
31
- visual.blocks.2.attn.qkv
32
- visual.blocks.2.attn.proj
33
- visual.blocks.2.mlp
34
- visual.blocks.2.mlp.fc1
35
- visual.blocks.2.mlp.act
36
- visual.blocks.2.mlp.fc2
37
- visual.blocks.3
38
- visual.blocks.3.norm1
39
- visual.blocks.3.norm2
40
- visual.blocks.3.attn
41
- visual.blocks.3.attn.qkv
42
- visual.blocks.3.attn.proj
43
- visual.blocks.3.mlp
44
- visual.blocks.3.mlp.fc1
45
- visual.blocks.3.mlp.act
46
- visual.blocks.3.mlp.fc2
47
- visual.blocks.4
48
- visual.blocks.4.norm1
49
- visual.blocks.4.norm2
50
- visual.blocks.4.attn
51
- visual.blocks.4.attn.qkv
52
- visual.blocks.4.attn.proj
53
- visual.blocks.4.mlp
54
- visual.blocks.4.mlp.fc1
55
- visual.blocks.4.mlp.act
56
- visual.blocks.4.mlp.fc2
57
- visual.blocks.5
58
- visual.blocks.5.norm1
59
- visual.blocks.5.norm2
60
- visual.blocks.5.attn
61
- visual.blocks.5.attn.qkv
62
- visual.blocks.5.attn.proj
63
- visual.blocks.5.mlp
64
- visual.blocks.5.mlp.fc1
65
- visual.blocks.5.mlp.act
66
- visual.blocks.5.mlp.fc2
67
- visual.blocks.6
68
- visual.blocks.6.norm1
69
- visual.blocks.6.norm2
70
- visual.blocks.6.attn
71
- visual.blocks.6.attn.qkv
72
- visual.blocks.6.attn.proj
73
- visual.blocks.6.mlp
74
- visual.blocks.6.mlp.fc1
75
- visual.blocks.6.mlp.act
76
- visual.blocks.6.mlp.fc2
77
- visual.blocks.7
78
- visual.blocks.7.norm1
79
- visual.blocks.7.norm2
80
- visual.blocks.7.attn
81
- visual.blocks.7.attn.qkv
82
- visual.blocks.7.attn.proj
83
- visual.blocks.7.mlp
84
- visual.blocks.7.mlp.fc1
85
- visual.blocks.7.mlp.act
86
- visual.blocks.7.mlp.fc2
87
- visual.blocks.8
88
- visual.blocks.8.norm1
89
- visual.blocks.8.norm2
90
- visual.blocks.8.attn
91
- visual.blocks.8.attn.qkv
92
- visual.blocks.8.attn.proj
93
- visual.blocks.8.mlp
94
- visual.blocks.8.mlp.fc1
95
- visual.blocks.8.mlp.act
96
- visual.blocks.8.mlp.fc2
97
- visual.blocks.9
98
- visual.blocks.9.norm1
99
- visual.blocks.9.norm2
100
- visual.blocks.9.attn
101
- visual.blocks.9.attn.qkv
102
- visual.blocks.9.attn.proj
103
- visual.blocks.9.mlp
104
- visual.blocks.9.mlp.fc1
105
- visual.blocks.9.mlp.act
106
- visual.blocks.9.mlp.fc2
107
- visual.blocks.10
108
- visual.blocks.10.norm1
109
- visual.blocks.10.norm2
110
- visual.blocks.10.attn
111
- visual.blocks.10.attn.qkv
112
- visual.blocks.10.attn.proj
113
- visual.blocks.10.mlp
114
- visual.blocks.10.mlp.fc1
115
- visual.blocks.10.mlp.act
116
- visual.blocks.10.mlp.fc2
117
- visual.blocks.11
118
- visual.blocks.11.norm1
119
- visual.blocks.11.norm2
120
- visual.blocks.11.attn
121
- visual.blocks.11.attn.qkv
122
- visual.blocks.11.attn.proj
123
- visual.blocks.11.mlp
124
- visual.blocks.11.mlp.fc1
125
- visual.blocks.11.mlp.act
126
- visual.blocks.11.mlp.fc2
127
- visual.blocks.12
128
- visual.blocks.12.norm1
129
- visual.blocks.12.norm2
130
- visual.blocks.12.attn
131
- visual.blocks.12.attn.qkv
132
- visual.blocks.12.attn.proj
133
- visual.blocks.12.mlp
134
- visual.blocks.12.mlp.fc1
135
- visual.blocks.12.mlp.act
136
- visual.blocks.12.mlp.fc2
137
- visual.blocks.13
138
- visual.blocks.13.norm1
139
- visual.blocks.13.norm2
140
- visual.blocks.13.attn
141
- visual.blocks.13.attn.qkv
142
- visual.blocks.13.attn.proj
143
- visual.blocks.13.mlp
144
- visual.blocks.13.mlp.fc1
145
- visual.blocks.13.mlp.act
146
- visual.blocks.13.mlp.fc2
147
- visual.blocks.14
148
- visual.blocks.14.norm1
149
- visual.blocks.14.norm2
150
- visual.blocks.14.attn
151
- visual.blocks.14.attn.qkv
152
- visual.blocks.14.attn.proj
153
- visual.blocks.14.mlp
154
- visual.blocks.14.mlp.fc1
155
- visual.blocks.14.mlp.act
156
- visual.blocks.14.mlp.fc2
157
- visual.blocks.15
158
- visual.blocks.15.norm1
159
- visual.blocks.15.norm2
160
- visual.blocks.15.attn
161
- visual.blocks.15.attn.qkv
162
- visual.blocks.15.attn.proj
163
- visual.blocks.15.mlp
164
- visual.blocks.15.mlp.fc1
165
- visual.blocks.15.mlp.act
166
- visual.blocks.15.mlp.fc2
167
- visual.blocks.16
168
- visual.blocks.16.norm1
169
- visual.blocks.16.norm2
170
- visual.blocks.16.attn
171
- visual.blocks.16.attn.qkv
172
- visual.blocks.16.attn.proj
173
- visual.blocks.16.mlp
174
- visual.blocks.16.mlp.fc1
175
- visual.blocks.16.mlp.act
176
- visual.blocks.16.mlp.fc2
177
- visual.blocks.17
178
- visual.blocks.17.norm1
179
- visual.blocks.17.norm2
180
- visual.blocks.17.attn
181
- visual.blocks.17.attn.qkv
182
- visual.blocks.17.attn.proj
183
- visual.blocks.17.mlp
184
- visual.blocks.17.mlp.fc1
185
- visual.blocks.17.mlp.act
186
- visual.blocks.17.mlp.fc2
187
- visual.blocks.18
188
- visual.blocks.18.norm1
189
- visual.blocks.18.norm2
190
- visual.blocks.18.attn
191
- visual.blocks.18.attn.qkv
192
- visual.blocks.18.attn.proj
193
- visual.blocks.18.mlp
194
- visual.blocks.18.mlp.fc1
195
- visual.blocks.18.mlp.act
196
- visual.blocks.18.mlp.fc2
197
- visual.blocks.19
198
- visual.blocks.19.norm1
199
- visual.blocks.19.norm2
200
- visual.blocks.19.attn
201
- visual.blocks.19.attn.qkv
202
- visual.blocks.19.attn.proj
203
- visual.blocks.19.mlp
204
- visual.blocks.19.mlp.fc1
205
- visual.blocks.19.mlp.act
206
- visual.blocks.19.mlp.fc2
207
- visual.blocks.20
208
- visual.blocks.20.norm1
209
- visual.blocks.20.norm2
210
- visual.blocks.20.attn
211
- visual.blocks.20.attn.qkv
212
- visual.blocks.20.attn.proj
213
- visual.blocks.20.mlp
214
- visual.blocks.20.mlp.fc1
215
- visual.blocks.20.mlp.act
216
- visual.blocks.20.mlp.fc2
217
- visual.blocks.21
218
- visual.blocks.21.norm1
219
- visual.blocks.21.norm2
220
- visual.blocks.21.attn
221
- visual.blocks.21.attn.qkv
222
- visual.blocks.21.attn.proj
223
- visual.blocks.21.mlp
224
- visual.blocks.21.mlp.fc1
225
- visual.blocks.21.mlp.act
226
- visual.blocks.21.mlp.fc2
227
- visual.blocks.22
228
- visual.blocks.22.norm1
229
- visual.blocks.22.norm2
230
- visual.blocks.22.attn
231
- visual.blocks.22.attn.qkv
232
- visual.blocks.22.attn.proj
233
- visual.blocks.22.mlp
234
- visual.blocks.22.mlp.fc1
235
- visual.blocks.22.mlp.act
236
- visual.blocks.22.mlp.fc2
237
- visual.blocks.23
238
- visual.blocks.23.norm1
239
- visual.blocks.23.norm2
240
- visual.blocks.23.attn
241
- visual.blocks.23.attn.qkv
242
- visual.blocks.23.attn.proj
243
- visual.blocks.23.mlp
244
- visual.blocks.23.mlp.fc1
245
- visual.blocks.23.mlp.act
246
- visual.blocks.23.mlp.fc2
247
- visual.blocks.24
248
- visual.blocks.24.norm1
249
- visual.blocks.24.norm2
250
- visual.blocks.24.attn
251
- visual.blocks.24.attn.qkv
252
- visual.blocks.24.attn.proj
253
- visual.blocks.24.mlp
254
- visual.blocks.24.mlp.fc1
255
- visual.blocks.24.mlp.act
256
- visual.blocks.24.mlp.fc2
257
- visual.blocks.25
258
- visual.blocks.25.norm1
259
- visual.blocks.25.norm2
260
- visual.blocks.25.attn
261
- visual.blocks.25.attn.qkv
262
- visual.blocks.25.attn.proj
263
- visual.blocks.25.mlp
264
- visual.blocks.25.mlp.fc1
265
- visual.blocks.25.mlp.act
266
- visual.blocks.25.mlp.fc2
267
- visual.blocks.26
268
- visual.blocks.26.norm1
269
- visual.blocks.26.norm2
270
- visual.blocks.26.attn
271
- visual.blocks.26.attn.qkv
272
- visual.blocks.26.attn.proj
273
- visual.blocks.26.mlp
274
- visual.blocks.26.mlp.fc1
275
- visual.blocks.26.mlp.act
276
- visual.blocks.26.mlp.fc2
277
- visual.blocks.27
278
- visual.blocks.27.norm1
279
- visual.blocks.27.norm2
280
- visual.blocks.27.attn
281
- visual.blocks.27.attn.qkv
282
- visual.blocks.27.attn.proj
283
- visual.blocks.27.mlp
284
- visual.blocks.27.mlp.fc1
285
- visual.blocks.27.mlp.act
286
- visual.blocks.27.mlp.fc2
287
- visual.blocks.28
288
- visual.blocks.28.norm1
289
- visual.blocks.28.norm2
290
- visual.blocks.28.attn
291
- visual.blocks.28.attn.qkv
292
- visual.blocks.28.attn.proj
293
- visual.blocks.28.mlp
294
- visual.blocks.28.mlp.fc1
295
- visual.blocks.28.mlp.act
296
- visual.blocks.28.mlp.fc2
297
- visual.blocks.29
298
- visual.blocks.29.norm1
299
- visual.blocks.29.norm2
300
- visual.blocks.29.attn
301
- visual.blocks.29.attn.qkv
302
- visual.blocks.29.attn.proj
303
- visual.blocks.29.mlp
304
- visual.blocks.29.mlp.fc1
305
- visual.blocks.29.mlp.act
306
- visual.blocks.29.mlp.fc2
307
- visual.blocks.30
308
- visual.blocks.30.norm1
309
- visual.blocks.30.norm2
310
- visual.blocks.30.attn
311
- visual.blocks.30.attn.qkv
312
- visual.blocks.30.attn.proj
313
- visual.blocks.30.mlp
314
- visual.blocks.30.mlp.fc1
315
- visual.blocks.30.mlp.act
316
- visual.blocks.30.mlp.fc2
317
- visual.blocks.31
318
- visual.blocks.31.norm1
319
- visual.blocks.31.norm2
320
- visual.blocks.31.attn
321
- visual.blocks.31.attn.qkv
322
- visual.blocks.31.attn.proj
323
- visual.blocks.31.mlp
324
- visual.blocks.31.mlp.fc1
325
- visual.blocks.31.mlp.act
326
- visual.blocks.31.mlp.fc2
327
- visual.merger
328
- visual.merger.ln_q
329
- visual.merger.mlp
330
- visual.merger.mlp.0
331
- visual.merger.mlp.1
332
- visual.merger.mlp.2
333
- model
334
  model.embed_tokens
335
- model.layers
336
- model.layers.0
337
- model.layers.0.self_attn
338
- model.layers.0.self_attn.q_proj
339
- model.layers.0.self_attn.k_proj
340
- model.layers.0.self_attn.v_proj
341
- model.layers.0.self_attn.o_proj
342
- model.layers.0.self_attn.rotary_emb
343
  model.layers.0.mlp
 
 
344
  model.layers.0.mlp.gate_proj
345
  model.layers.0.mlp.up_proj
346
- model.layers.0.mlp.down_proj
347
- model.layers.0.mlp.act_fn
348
- model.layers.0.input_layernorm
349
  model.layers.0.post_attention_layernorm
350
- model.layers.1
351
- model.layers.1.self_attn
352
- model.layers.1.self_attn.q_proj
353
- model.layers.1.self_attn.k_proj
354
- model.layers.1.self_attn.v_proj
355
- model.layers.1.self_attn.o_proj
356
- model.layers.1.self_attn.rotary_emb
357
  model.layers.1.mlp
 
 
358
  model.layers.1.mlp.gate_proj
359
  model.layers.1.mlp.up_proj
360
- model.layers.1.mlp.down_proj
361
- model.layers.1.mlp.act_fn
362
- model.layers.1.input_layernorm
363
  model.layers.1.post_attention_layernorm
364
- model.layers.2
365
- model.layers.2.self_attn
366
- model.layers.2.self_attn.q_proj
367
- model.layers.2.self_attn.k_proj
368
- model.layers.2.self_attn.v_proj
369
- model.layers.2.self_attn.o_proj
370
- model.layers.2.self_attn.rotary_emb
371
- model.layers.2.mlp
372
- model.layers.2.mlp.gate_proj
373
- model.layers.2.mlp.up_proj
374
- model.layers.2.mlp.down_proj
375
- model.layers.2.mlp.act_fn
376
- model.layers.2.input_layernorm
377
- model.layers.2.post_attention_layernorm
378
- model.layers.3
379
- model.layers.3.self_attn
380
- model.layers.3.self_attn.q_proj
381
- model.layers.3.self_attn.k_proj
382
- model.layers.3.self_attn.v_proj
383
- model.layers.3.self_attn.o_proj
384
- model.layers.3.self_attn.rotary_emb
385
- model.layers.3.mlp
386
- model.layers.3.mlp.gate_proj
387
- model.layers.3.mlp.up_proj
388
- model.layers.3.mlp.down_proj
389
- model.layers.3.mlp.act_fn
390
- model.layers.3.input_layernorm
391
- model.layers.3.post_attention_layernorm
392
- model.layers.4
393
- model.layers.4.self_attn
394
- model.layers.4.self_attn.q_proj
395
- model.layers.4.self_attn.k_proj
396
- model.layers.4.self_attn.v_proj
397
- model.layers.4.self_attn.o_proj
398
- model.layers.4.self_attn.rotary_emb
399
- model.layers.4.mlp
400
- model.layers.4.mlp.gate_proj
401
- model.layers.4.mlp.up_proj
402
- model.layers.4.mlp.down_proj
403
- model.layers.4.mlp.act_fn
404
- model.layers.4.input_layernorm
405
- model.layers.4.post_attention_layernorm
406
- model.layers.5
407
- model.layers.5.self_attn
408
- model.layers.5.self_attn.q_proj
409
- model.layers.5.self_attn.k_proj
410
- model.layers.5.self_attn.v_proj
411
- model.layers.5.self_attn.o_proj
412
- model.layers.5.self_attn.rotary_emb
413
- model.layers.5.mlp
414
- model.layers.5.mlp.gate_proj
415
- model.layers.5.mlp.up_proj
416
- model.layers.5.mlp.down_proj
417
- model.layers.5.mlp.act_fn
418
- model.layers.5.input_layernorm
419
- model.layers.5.post_attention_layernorm
420
- model.layers.6
421
- model.layers.6.self_attn
422
- model.layers.6.self_attn.q_proj
423
- model.layers.6.self_attn.k_proj
424
- model.layers.6.self_attn.v_proj
425
- model.layers.6.self_attn.o_proj
426
- model.layers.6.self_attn.rotary_emb
427
- model.layers.6.mlp
428
- model.layers.6.mlp.gate_proj
429
- model.layers.6.mlp.up_proj
430
- model.layers.6.mlp.down_proj
431
- model.layers.6.mlp.act_fn
432
- model.layers.6.input_layernorm
433
- model.layers.6.post_attention_layernorm
434
- model.layers.7
435
- model.layers.7.self_attn
436
- model.layers.7.self_attn.q_proj
437
- model.layers.7.self_attn.k_proj
438
- model.layers.7.self_attn.v_proj
439
- model.layers.7.self_attn.o_proj
440
- model.layers.7.self_attn.rotary_emb
441
- model.layers.7.mlp
442
- model.layers.7.mlp.gate_proj
443
- model.layers.7.mlp.up_proj
444
- model.layers.7.mlp.down_proj
445
- model.layers.7.mlp.act_fn
446
- model.layers.7.input_layernorm
447
- model.layers.7.post_attention_layernorm
448
- model.layers.8
449
- model.layers.8.self_attn
450
- model.layers.8.self_attn.q_proj
451
- model.layers.8.self_attn.k_proj
452
- model.layers.8.self_attn.v_proj
453
- model.layers.8.self_attn.o_proj
454
- model.layers.8.self_attn.rotary_emb
455
- model.layers.8.mlp
456
- model.layers.8.mlp.gate_proj
457
- model.layers.8.mlp.up_proj
458
- model.layers.8.mlp.down_proj
459
- model.layers.8.mlp.act_fn
460
- model.layers.8.input_layernorm
461
- model.layers.8.post_attention_layernorm
462
- model.layers.9
463
- model.layers.9.self_attn
464
- model.layers.9.self_attn.q_proj
465
- model.layers.9.self_attn.k_proj
466
- model.layers.9.self_attn.v_proj
467
- model.layers.9.self_attn.o_proj
468
- model.layers.9.self_attn.rotary_emb
469
- model.layers.9.mlp
470
- model.layers.9.mlp.gate_proj
471
- model.layers.9.mlp.up_proj
472
- model.layers.9.mlp.down_proj
473
- model.layers.9.mlp.act_fn
474
- model.layers.9.input_layernorm
475
- model.layers.9.post_attention_layernorm
476
- model.layers.10
477
- model.layers.10.self_attn
478
- model.layers.10.self_attn.q_proj
479
- model.layers.10.self_attn.k_proj
480
- model.layers.10.self_attn.v_proj
481
- model.layers.10.self_attn.o_proj
482
- model.layers.10.self_attn.rotary_emb
483
  model.layers.10.mlp
 
 
484
  model.layers.10.mlp.gate_proj
485
  model.layers.10.mlp.up_proj
486
- model.layers.10.mlp.down_proj
487
- model.layers.10.mlp.act_fn
488
- model.layers.10.input_layernorm
489
  model.layers.10.post_attention_layernorm
490
- model.layers.11
491
- model.layers.11.self_attn
492
- model.layers.11.self_attn.q_proj
493
- model.layers.11.self_attn.k_proj
494
- model.layers.11.self_attn.v_proj
495
- model.layers.11.self_attn.o_proj
496
- model.layers.11.self_attn.rotary_emb
497
  model.layers.11.mlp
 
 
498
  model.layers.11.mlp.gate_proj
499
  model.layers.11.mlp.up_proj
500
- model.layers.11.mlp.down_proj
501
- model.layers.11.mlp.act_fn
502
- model.layers.11.input_layernorm
503
  model.layers.11.post_attention_layernorm
504
- model.layers.12
505
- model.layers.12.self_attn
506
- model.layers.12.self_attn.q_proj
507
- model.layers.12.self_attn.k_proj
508
- model.layers.12.self_attn.v_proj
509
- model.layers.12.self_attn.o_proj
510
- model.layers.12.self_attn.rotary_emb
511
  model.layers.12.mlp
 
 
512
  model.layers.12.mlp.gate_proj
513
  model.layers.12.mlp.up_proj
514
- model.layers.12.mlp.down_proj
515
- model.layers.12.mlp.act_fn
516
- model.layers.12.input_layernorm
517
  model.layers.12.post_attention_layernorm
518
- model.layers.13
519
- model.layers.13.self_attn
520
- model.layers.13.self_attn.q_proj
521
- model.layers.13.self_attn.k_proj
522
- model.layers.13.self_attn.v_proj
523
- model.layers.13.self_attn.o_proj
524
- model.layers.13.self_attn.rotary_emb
525
  model.layers.13.mlp
 
 
526
  model.layers.13.mlp.gate_proj
527
  model.layers.13.mlp.up_proj
528
- model.layers.13.mlp.down_proj
529
- model.layers.13.mlp.act_fn
530
- model.layers.13.input_layernorm
531
  model.layers.13.post_attention_layernorm
532
- model.layers.14
533
- model.layers.14.self_attn
534
- model.layers.14.self_attn.q_proj
535
- model.layers.14.self_attn.k_proj
536
- model.layers.14.self_attn.v_proj
537
- model.layers.14.self_attn.o_proj
538
- model.layers.14.self_attn.rotary_emb
539
  model.layers.14.mlp
 
 
540
  model.layers.14.mlp.gate_proj
541
  model.layers.14.mlp.up_proj
542
- model.layers.14.mlp.down_proj
543
- model.layers.14.mlp.act_fn
544
- model.layers.14.input_layernorm
545
  model.layers.14.post_attention_layernorm
546
- model.layers.15
547
- model.layers.15.self_attn
548
- model.layers.15.self_attn.q_proj
549
- model.layers.15.self_attn.k_proj
550
- model.layers.15.self_attn.v_proj
551
- model.layers.15.self_attn.o_proj
552
- model.layers.15.self_attn.rotary_emb
553
  model.layers.15.mlp
 
 
554
  model.layers.15.mlp.gate_proj
555
  model.layers.15.mlp.up_proj
556
- model.layers.15.mlp.down_proj
557
- model.layers.15.mlp.act_fn
558
- model.layers.15.input_layernorm
559
  model.layers.15.post_attention_layernorm
560
- model.layers.16
561
- model.layers.16.self_attn
562
- model.layers.16.self_attn.q_proj
563
- model.layers.16.self_attn.k_proj
564
- model.layers.16.self_attn.v_proj
565
- model.layers.16.self_attn.o_proj
566
- model.layers.16.self_attn.rotary_emb
567
  model.layers.16.mlp
 
 
568
  model.layers.16.mlp.gate_proj
569
  model.layers.16.mlp.up_proj
570
- model.layers.16.mlp.down_proj
571
- model.layers.16.mlp.act_fn
572
- model.layers.16.input_layernorm
573
  model.layers.16.post_attention_layernorm
574
- model.layers.17
575
- model.layers.17.self_attn
576
- model.layers.17.self_attn.q_proj
577
- model.layers.17.self_attn.k_proj
578
- model.layers.17.self_attn.v_proj
579
- model.layers.17.self_attn.o_proj
580
- model.layers.17.self_attn.rotary_emb
581
  model.layers.17.mlp
 
 
582
  model.layers.17.mlp.gate_proj
583
  model.layers.17.mlp.up_proj
584
- model.layers.17.mlp.down_proj
585
- model.layers.17.mlp.act_fn
586
- model.layers.17.input_layernorm
587
  model.layers.17.post_attention_layernorm
588
- model.layers.18
589
- model.layers.18.self_attn
590
- model.layers.18.self_attn.q_proj
591
- model.layers.18.self_attn.k_proj
592
- model.layers.18.self_attn.v_proj
593
- model.layers.18.self_attn.o_proj
594
- model.layers.18.self_attn.rotary_emb
595
  model.layers.18.mlp
 
 
596
  model.layers.18.mlp.gate_proj
597
  model.layers.18.mlp.up_proj
598
- model.layers.18.mlp.down_proj
599
- model.layers.18.mlp.act_fn
600
- model.layers.18.input_layernorm
601
  model.layers.18.post_attention_layernorm
602
- model.layers.19
603
- model.layers.19.self_attn
604
- model.layers.19.self_attn.q_proj
605
- model.layers.19.self_attn.k_proj
606
- model.layers.19.self_attn.v_proj
607
- model.layers.19.self_attn.o_proj
608
- model.layers.19.self_attn.rotary_emb
609
  model.layers.19.mlp
 
 
610
  model.layers.19.mlp.gate_proj
611
  model.layers.19.mlp.up_proj
612
- model.layers.19.mlp.down_proj
613
- model.layers.19.mlp.act_fn
614
- model.layers.19.input_layernorm
615
  model.layers.19.post_attention_layernorm
616
- model.layers.20
617
- model.layers.20.self_attn
618
- model.layers.20.self_attn.q_proj
619
- model.layers.20.self_attn.k_proj
620
- model.layers.20.self_attn.v_proj
621
- model.layers.20.self_attn.o_proj
622
- model.layers.20.self_attn.rotary_emb
 
 
 
 
 
 
 
 
 
623
  model.layers.20.mlp
624
- model.layers.20.mlp.gate_proj
625
- model.layers.20.mlp.up_proj
626
- model.layers.20.mlp.down_proj
627
  model.layers.20.mlp.act_fn
628
- model.layers.20.input_layernorm
 
 
629
  model.layers.20.post_attention_layernorm
630
- model.layers.21
631
- model.layers.21.self_attn
632
- model.layers.21.self_attn.q_proj
633
- model.layers.21.self_attn.k_proj
634
- model.layers.21.self_attn.v_proj
635
- model.layers.21.self_attn.o_proj
636
- model.layers.21.self_attn.rotary_emb
637
  model.layers.21.mlp
 
 
638
  model.layers.21.mlp.gate_proj
639
  model.layers.21.mlp.up_proj
640
- model.layers.21.mlp.down_proj
641
- model.layers.21.mlp.act_fn
642
- model.layers.21.input_layernorm
643
  model.layers.21.post_attention_layernorm
644
- model.layers.22
645
- model.layers.22.self_attn
646
- model.layers.22.self_attn.q_proj
647
- model.layers.22.self_attn.k_proj
648
- model.layers.22.self_attn.v_proj
649
- model.layers.22.self_attn.o_proj
650
- model.layers.22.self_attn.rotary_emb
651
  model.layers.22.mlp
 
 
652
  model.layers.22.mlp.gate_proj
653
  model.layers.22.mlp.up_proj
654
- model.layers.22.mlp.down_proj
655
- model.layers.22.mlp.act_fn
656
- model.layers.22.input_layernorm
657
  model.layers.22.post_attention_layernorm
658
- model.layers.23
659
- model.layers.23.self_attn
660
- model.layers.23.self_attn.q_proj
661
- model.layers.23.self_attn.k_proj
662
- model.layers.23.self_attn.v_proj
663
- model.layers.23.self_attn.o_proj
664
- model.layers.23.self_attn.rotary_emb
665
  model.layers.23.mlp
 
 
666
  model.layers.23.mlp.gate_proj
667
  model.layers.23.mlp.up_proj
668
- model.layers.23.mlp.down_proj
669
- model.layers.23.mlp.act_fn
670
- model.layers.23.input_layernorm
671
  model.layers.23.post_attention_layernorm
672
- model.layers.24
673
- model.layers.24.self_attn
674
- model.layers.24.self_attn.q_proj
675
- model.layers.24.self_attn.k_proj
676
- model.layers.24.self_attn.v_proj
677
- model.layers.24.self_attn.o_proj
678
- model.layers.24.self_attn.rotary_emb
679
  model.layers.24.mlp
 
 
680
  model.layers.24.mlp.gate_proj
681
  model.layers.24.mlp.up_proj
682
- model.layers.24.mlp.down_proj
683
- model.layers.24.mlp.act_fn
684
- model.layers.24.input_layernorm
685
  model.layers.24.post_attention_layernorm
686
- model.layers.25
687
- model.layers.25.self_attn
688
- model.layers.25.self_attn.q_proj
689
- model.layers.25.self_attn.k_proj
690
- model.layers.25.self_attn.v_proj
691
- model.layers.25.self_attn.o_proj
692
- model.layers.25.self_attn.rotary_emb
693
  model.layers.25.mlp
 
 
694
  model.layers.25.mlp.gate_proj
695
  model.layers.25.mlp.up_proj
696
- model.layers.25.mlp.down_proj
697
- model.layers.25.mlp.act_fn
698
- model.layers.25.input_layernorm
699
  model.layers.25.post_attention_layernorm
700
- model.layers.26
701
- model.layers.26.self_attn
702
- model.layers.26.self_attn.q_proj
703
- model.layers.26.self_attn.k_proj
704
- model.layers.26.self_attn.v_proj
705
- model.layers.26.self_attn.o_proj
706
- model.layers.26.self_attn.rotary_emb
707
  model.layers.26.mlp
 
 
708
  model.layers.26.mlp.gate_proj
709
  model.layers.26.mlp.up_proj
710
- model.layers.26.mlp.down_proj
711
- model.layers.26.mlp.act_fn
712
- model.layers.26.input_layernorm
713
  model.layers.26.post_attention_layernorm
714
- model.layers.27
715
- model.layers.27.self_attn
716
- model.layers.27.self_attn.q_proj
717
- model.layers.27.self_attn.k_proj
718
- model.layers.27.self_attn.v_proj
719
- model.layers.27.self_attn.o_proj
720
- model.layers.27.self_attn.rotary_emb
721
  model.layers.27.mlp
 
 
722
  model.layers.27.mlp.gate_proj
723
  model.layers.27.mlp.up_proj
724
- model.layers.27.mlp.down_proj
725
- model.layers.27.mlp.act_fn
726
- model.layers.27.input_layernorm
727
  model.layers.27.post_attention_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  model.norm
729
- model.rotary_emb
730
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  model.embed_tokens
3
+ model.layers.0.input_layernorm
 
 
 
 
 
 
 
4
  model.layers.0.mlp
5
+ model.layers.0.mlp.act_fn
6
+ model.layers.0.mlp.down_proj
7
  model.layers.0.mlp.gate_proj
8
  model.layers.0.mlp.up_proj
 
 
 
9
  model.layers.0.post_attention_layernorm
10
+ model.layers.0.self_attn.k_proj
11
+ model.layers.0.self_attn.o_proj
12
+ model.layers.0.self_attn.q_proj
13
+ model.layers.0.self_attn.v_proj
14
+ model.layers.1.input_layernorm
 
 
15
  model.layers.1.mlp
16
+ model.layers.1.mlp.act_fn
17
+ model.layers.1.mlp.down_proj
18
  model.layers.1.mlp.gate_proj
19
  model.layers.1.mlp.up_proj
 
 
 
20
  model.layers.1.post_attention_layernorm
21
+ model.layers.1.self_attn.k_proj
22
+ model.layers.1.self_attn.o_proj
23
+ model.layers.1.self_attn.q_proj
24
+ model.layers.1.self_attn.v_proj
25
+ model.layers.10.input_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  model.layers.10.mlp
27
+ model.layers.10.mlp.act_fn
28
+ model.layers.10.mlp.down_proj
29
  model.layers.10.mlp.gate_proj
30
  model.layers.10.mlp.up_proj
 
 
 
31
  model.layers.10.post_attention_layernorm
32
+ model.layers.10.self_attn.k_proj
33
+ model.layers.10.self_attn.o_proj
34
+ model.layers.10.self_attn.q_proj
35
+ model.layers.10.self_attn.v_proj
36
+ model.layers.11.input_layernorm
 
 
37
  model.layers.11.mlp
38
+ model.layers.11.mlp.act_fn
39
+ model.layers.11.mlp.down_proj
40
  model.layers.11.mlp.gate_proj
41
  model.layers.11.mlp.up_proj
 
 
 
42
  model.layers.11.post_attention_layernorm
43
+ model.layers.11.self_attn.k_proj
44
+ model.layers.11.self_attn.o_proj
45
+ model.layers.11.self_attn.q_proj
46
+ model.layers.11.self_attn.v_proj
47
+ model.layers.12.input_layernorm
 
 
48
  model.layers.12.mlp
49
+ model.layers.12.mlp.act_fn
50
+ model.layers.12.mlp.down_proj
51
  model.layers.12.mlp.gate_proj
52
  model.layers.12.mlp.up_proj
 
 
 
53
  model.layers.12.post_attention_layernorm
54
+ model.layers.12.self_attn.k_proj
55
+ model.layers.12.self_attn.o_proj
56
+ model.layers.12.self_attn.q_proj
57
+ model.layers.12.self_attn.v_proj
58
+ model.layers.13.input_layernorm
 
 
59
  model.layers.13.mlp
60
+ model.layers.13.mlp.act_fn
61
+ model.layers.13.mlp.down_proj
62
  model.layers.13.mlp.gate_proj
63
  model.layers.13.mlp.up_proj
 
 
 
64
  model.layers.13.post_attention_layernorm
65
+ model.layers.13.self_attn.k_proj
66
+ model.layers.13.self_attn.o_proj
67
+ model.layers.13.self_attn.q_proj
68
+ model.layers.13.self_attn.v_proj
69
+ model.layers.14.input_layernorm
 
 
70
  model.layers.14.mlp
71
+ model.layers.14.mlp.act_fn
72
+ model.layers.14.mlp.down_proj
73
  model.layers.14.mlp.gate_proj
74
  model.layers.14.mlp.up_proj
 
 
 
75
  model.layers.14.post_attention_layernorm
76
+ model.layers.14.self_attn.k_proj
77
+ model.layers.14.self_attn.o_proj
78
+ model.layers.14.self_attn.q_proj
79
+ model.layers.14.self_attn.v_proj
80
+ model.layers.15.input_layernorm
 
 
81
  model.layers.15.mlp
82
+ model.layers.15.mlp.act_fn
83
+ model.layers.15.mlp.down_proj
84
  model.layers.15.mlp.gate_proj
85
  model.layers.15.mlp.up_proj
 
 
 
86
  model.layers.15.post_attention_layernorm
87
+ model.layers.15.self_attn.k_proj
88
+ model.layers.15.self_attn.o_proj
89
+ model.layers.15.self_attn.q_proj
90
+ model.layers.15.self_attn.v_proj
91
+ model.layers.16.input_layernorm
 
 
92
  model.layers.16.mlp
93
+ model.layers.16.mlp.act_fn
94
+ model.layers.16.mlp.down_proj
95
  model.layers.16.mlp.gate_proj
96
  model.layers.16.mlp.up_proj
 
 
 
97
  model.layers.16.post_attention_layernorm
98
+ model.layers.16.self_attn.k_proj
99
+ model.layers.16.self_attn.o_proj
100
+ model.layers.16.self_attn.q_proj
101
+ model.layers.16.self_attn.v_proj
102
+ model.layers.17.input_layernorm
 
 
103
  model.layers.17.mlp
104
+ model.layers.17.mlp.act_fn
105
+ model.layers.17.mlp.down_proj
106
  model.layers.17.mlp.gate_proj
107
  model.layers.17.mlp.up_proj
 
 
 
108
  model.layers.17.post_attention_layernorm
109
+ model.layers.17.self_attn.k_proj
110
+ model.layers.17.self_attn.o_proj
111
+ model.layers.17.self_attn.q_proj
112
+ model.layers.17.self_attn.v_proj
113
+ model.layers.18.input_layernorm
 
 
114
  model.layers.18.mlp
115
+ model.layers.18.mlp.act_fn
116
+ model.layers.18.mlp.down_proj
117
  model.layers.18.mlp.gate_proj
118
  model.layers.18.mlp.up_proj
 
 
 
119
  model.layers.18.post_attention_layernorm
120
+ model.layers.18.self_attn.k_proj
121
+ model.layers.18.self_attn.o_proj
122
+ model.layers.18.self_attn.q_proj
123
+ model.layers.18.self_attn.v_proj
124
+ model.layers.19.input_layernorm
 
 
125
  model.layers.19.mlp
126
+ model.layers.19.mlp.act_fn
127
+ model.layers.19.mlp.down_proj
128
  model.layers.19.mlp.gate_proj
129
  model.layers.19.mlp.up_proj
 
 
 
130
  model.layers.19.post_attention_layernorm
131
+ model.layers.19.self_attn.k_proj
132
+ model.layers.19.self_attn.o_proj
133
+ model.layers.19.self_attn.q_proj
134
+ model.layers.19.self_attn.v_proj
135
+ model.layers.2.input_layernorm
136
+ model.layers.2.mlp
137
+ model.layers.2.mlp.act_fn
138
+ model.layers.2.mlp.down_proj
139
+ model.layers.2.mlp.gate_proj
140
+ model.layers.2.mlp.up_proj
141
+ model.layers.2.post_attention_layernorm
142
+ model.layers.2.self_attn.k_proj
143
+ model.layers.2.self_attn.o_proj
144
+ model.layers.2.self_attn.q_proj
145
+ model.layers.2.self_attn.v_proj
146
+ model.layers.20.input_layernorm
147
  model.layers.20.mlp
 
 
 
148
  model.layers.20.mlp.act_fn
149
+ model.layers.20.mlp.down_proj
150
+ model.layers.20.mlp.gate_proj
151
+ model.layers.20.mlp.up_proj
152
  model.layers.20.post_attention_layernorm
153
+ model.layers.20.self_attn.k_proj
154
+ model.layers.20.self_attn.o_proj
155
+ model.layers.20.self_attn.q_proj
156
+ model.layers.20.self_attn.v_proj
157
+ model.layers.21.input_layernorm
 
 
158
  model.layers.21.mlp
159
+ model.layers.21.mlp.act_fn
160
+ model.layers.21.mlp.down_proj
161
  model.layers.21.mlp.gate_proj
162
  model.layers.21.mlp.up_proj
 
 
 
163
  model.layers.21.post_attention_layernorm
164
+ model.layers.21.self_attn.k_proj
165
+ model.layers.21.self_attn.o_proj
166
+ model.layers.21.self_attn.q_proj
167
+ model.layers.21.self_attn.v_proj
168
+ model.layers.22.input_layernorm
 
 
169
  model.layers.22.mlp
170
+ model.layers.22.mlp.act_fn
171
+ model.layers.22.mlp.down_proj
172
  model.layers.22.mlp.gate_proj
173
  model.layers.22.mlp.up_proj
 
 
 
174
  model.layers.22.post_attention_layernorm
175
+ model.layers.22.self_attn.k_proj
176
+ model.layers.22.self_attn.o_proj
177
+ model.layers.22.self_attn.q_proj
178
+ model.layers.22.self_attn.v_proj
179
+ model.layers.23.input_layernorm
 
 
180
  model.layers.23.mlp
181
+ model.layers.23.mlp.act_fn
182
+ model.layers.23.mlp.down_proj
183
  model.layers.23.mlp.gate_proj
184
  model.layers.23.mlp.up_proj
 
 
 
185
  model.layers.23.post_attention_layernorm
186
+ model.layers.23.self_attn.k_proj
187
+ model.layers.23.self_attn.o_proj
188
+ model.layers.23.self_attn.q_proj
189
+ model.layers.23.self_attn.v_proj
190
+ model.layers.24.input_layernorm
 
 
191
  model.layers.24.mlp
192
+ model.layers.24.mlp.act_fn
193
+ model.layers.24.mlp.down_proj
194
  model.layers.24.mlp.gate_proj
195
  model.layers.24.mlp.up_proj
 
 
 
196
  model.layers.24.post_attention_layernorm
197
+ model.layers.24.self_attn.k_proj
198
+ model.layers.24.self_attn.o_proj
199
+ model.layers.24.self_attn.q_proj
200
+ model.layers.24.self_attn.v_proj
201
+ model.layers.25.input_layernorm
 
 
202
  model.layers.25.mlp
203
+ model.layers.25.mlp.act_fn
204
+ model.layers.25.mlp.down_proj
205
  model.layers.25.mlp.gate_proj
206
  model.layers.25.mlp.up_proj
 
 
 
207
  model.layers.25.post_attention_layernorm
208
+ model.layers.25.self_attn.k_proj
209
+ model.layers.25.self_attn.o_proj
210
+ model.layers.25.self_attn.q_proj
211
+ model.layers.25.self_attn.v_proj
212
+ model.layers.26.input_layernorm
 
 
213
  model.layers.26.mlp
214
+ model.layers.26.mlp.act_fn
215
+ model.layers.26.mlp.down_proj
216
  model.layers.26.mlp.gate_proj
217
  model.layers.26.mlp.up_proj
 
 
 
218
  model.layers.26.post_attention_layernorm
219
+ model.layers.26.self_attn.k_proj
220
+ model.layers.26.self_attn.o_proj
221
+ model.layers.26.self_attn.q_proj
222
+ model.layers.26.self_attn.v_proj
223
+ model.layers.27.input_layernorm
 
 
224
  model.layers.27.mlp
225
+ model.layers.27.mlp.act_fn
226
+ model.layers.27.mlp.down_proj
227
  model.layers.27.mlp.gate_proj
228
  model.layers.27.mlp.up_proj
 
 
 
229
  model.layers.27.post_attention_layernorm
230
+ model.layers.27.self_attn.k_proj
231
+ model.layers.27.self_attn.o_proj
232
+ model.layers.27.self_attn.q_proj
233
+ model.layers.27.self_attn.v_proj
234
+ model.layers.3.input_layernorm
235
+ model.layers.3.mlp
236
+ model.layers.3.mlp.act_fn
237
+ model.layers.3.mlp.down_proj
238
+ model.layers.3.mlp.gate_proj
239
+ model.layers.3.mlp.up_proj
240
+ model.layers.3.post_attention_layernorm
241
+ model.layers.3.self_attn.k_proj
242
+ model.layers.3.self_attn.o_proj
243
+ model.layers.3.self_attn.q_proj
244
+ model.layers.3.self_attn.v_proj
245
+ model.layers.4.input_layernorm
246
+ model.layers.4.mlp
247
+ model.layers.4.mlp.act_fn
248
+ model.layers.4.mlp.down_proj
249
+ model.layers.4.mlp.gate_proj
250
+ model.layers.4.mlp.up_proj
251
+ model.layers.4.post_attention_layernorm
252
+ model.layers.4.self_attn.k_proj
253
+ model.layers.4.self_attn.o_proj
254
+ model.layers.4.self_attn.q_proj
255
+ model.layers.4.self_attn.v_proj
256
+ model.layers.5.input_layernorm
257
+ model.layers.5.mlp
258
+ model.layers.5.mlp.act_fn
259
+ model.layers.5.mlp.down_proj
260
+ model.layers.5.mlp.gate_proj
261
+ model.layers.5.mlp.up_proj
262
+ model.layers.5.post_attention_layernorm
263
+ model.layers.5.self_attn.k_proj
264
+ model.layers.5.self_attn.o_proj
265
+ model.layers.5.self_attn.q_proj
266
+ model.layers.5.self_attn.v_proj
267
+ model.layers.6.input_layernorm
268
+ model.layers.6.mlp
269
+ model.layers.6.mlp.act_fn
270
+ model.layers.6.mlp.down_proj
271
+ model.layers.6.mlp.gate_proj
272
+ model.layers.6.mlp.up_proj
273
+ model.layers.6.post_attention_layernorm
274
+ model.layers.6.self_attn.k_proj
275
+ model.layers.6.self_attn.o_proj
276
+ model.layers.6.self_attn.q_proj
277
+ model.layers.6.self_attn.v_proj
278
+ model.layers.7.input_layernorm
279
+ model.layers.7.mlp
280
+ model.layers.7.mlp.act_fn
281
+ model.layers.7.mlp.down_proj
282
+ model.layers.7.mlp.gate_proj
283
+ model.layers.7.mlp.up_proj
284
+ model.layers.7.post_attention_layernorm
285
+ model.layers.7.self_attn.k_proj
286
+ model.layers.7.self_attn.o_proj
287
+ model.layers.7.self_attn.q_proj
288
+ model.layers.7.self_attn.v_proj
289
+ model.layers.8.input_layernorm
290
+ model.layers.8.mlp
291
+ model.layers.8.mlp.act_fn
292
+ model.layers.8.mlp.down_proj
293
+ model.layers.8.mlp.gate_proj
294
+ model.layers.8.mlp.up_proj
295
+ model.layers.8.post_attention_layernorm
296
+ model.layers.8.self_attn.k_proj
297
+ model.layers.8.self_attn.o_proj
298
+ model.layers.8.self_attn.q_proj
299
+ model.layers.8.self_attn.v_proj
300
+ model.layers.9.input_layernorm
301
+ model.layers.9.mlp
302
+ model.layers.9.mlp.act_fn
303
+ model.layers.9.mlp.down_proj
304
+ model.layers.9.mlp.gate_proj
305
+ model.layers.9.mlp.up_proj
306
+ model.layers.9.post_attention_layernorm
307
+ model.layers.9.self_attn.k_proj
308
+ model.layers.9.self_attn.o_proj
309
+ model.layers.9.self_attn.q_proj
310
+ model.layers.9.self_attn.v_proj
311
  model.norm
312
+ visual
313
+ visual.blocks.0
314
+ visual.blocks.0.attn
315
+ visual.blocks.0.attn.proj
316
+ visual.blocks.0.attn.qkv
317
+ visual.blocks.0.mlp
318
+ visual.blocks.0.mlp.act
319
+ visual.blocks.0.mlp.fc1
320
+ visual.blocks.0.mlp.fc2
321
+ visual.blocks.0.norm1
322
+ visual.blocks.0.norm2
323
+ visual.blocks.1
324
+ visual.blocks.1.attn
325
+ visual.blocks.1.attn.proj
326
+ visual.blocks.1.attn.qkv
327
+ visual.blocks.1.mlp
328
+ visual.blocks.1.mlp.act
329
+ visual.blocks.1.mlp.fc1
330
+ visual.blocks.1.mlp.fc2
331
+ visual.blocks.1.norm1
332
+ visual.blocks.1.norm2
333
+ visual.blocks.10
334
+ visual.blocks.10.attn
335
+ visual.blocks.10.attn.proj
336
+ visual.blocks.10.attn.qkv
337
+ visual.blocks.10.mlp
338
+ visual.blocks.10.mlp.act
339
+ visual.blocks.10.mlp.fc1
340
+ visual.blocks.10.mlp.fc2
341
+ visual.blocks.10.norm1
342
+ visual.blocks.10.norm2
343
+ visual.blocks.11
344
+ visual.blocks.11.attn
345
+ visual.blocks.11.attn.proj
346
+ visual.blocks.11.attn.qkv
347
+ visual.blocks.11.mlp
348
+ visual.blocks.11.mlp.act
349
+ visual.blocks.11.mlp.fc1
350
+ visual.blocks.11.mlp.fc2
351
+ visual.blocks.11.norm1
352
+ visual.blocks.11.norm2
353
+ visual.blocks.12
354
+ visual.blocks.12.attn
355
+ visual.blocks.12.attn.proj
356
+ visual.blocks.12.attn.qkv
357
+ visual.blocks.12.mlp
358
+ visual.blocks.12.mlp.act
359
+ visual.blocks.12.mlp.fc1
360
+ visual.blocks.12.mlp.fc2
361
+ visual.blocks.12.norm1
362
+ visual.blocks.12.norm2
363
+ visual.blocks.13
364
+ visual.blocks.13.attn
365
+ visual.blocks.13.attn.proj
366
+ visual.blocks.13.attn.qkv
367
+ visual.blocks.13.mlp
368
+ visual.blocks.13.mlp.act
369
+ visual.blocks.13.mlp.fc1
370
+ visual.blocks.13.mlp.fc2
371
+ visual.blocks.13.norm1
372
+ visual.blocks.13.norm2
373
+ visual.blocks.14
374
+ visual.blocks.14.attn
375
+ visual.blocks.14.attn.proj
376
+ visual.blocks.14.attn.qkv
377
+ visual.blocks.14.mlp
378
+ visual.blocks.14.mlp.act
379
+ visual.blocks.14.mlp.fc1
380
+ visual.blocks.14.mlp.fc2
381
+ visual.blocks.14.norm1
382
+ visual.blocks.14.norm2
383
+ visual.blocks.15
384
+ visual.blocks.15.attn
385
+ visual.blocks.15.attn.proj
386
+ visual.blocks.15.attn.qkv
387
+ visual.blocks.15.mlp
388
+ visual.blocks.15.mlp.act
389
+ visual.blocks.15.mlp.fc1
390
+ visual.blocks.15.mlp.fc2
391
+ visual.blocks.15.norm1
392
+ visual.blocks.15.norm2
393
+ visual.blocks.16
394
+ visual.blocks.16.attn
395
+ visual.blocks.16.attn.proj
396
+ visual.blocks.16.attn.qkv
397
+ visual.blocks.16.mlp
398
+ visual.blocks.16.mlp.act
399
+ visual.blocks.16.mlp.fc1
400
+ visual.blocks.16.mlp.fc2
401
+ visual.blocks.16.norm1
402
+ visual.blocks.16.norm2
403
+ visual.blocks.17
404
+ visual.blocks.17.attn
405
+ visual.blocks.17.attn.proj
406
+ visual.blocks.17.attn.qkv
407
+ visual.blocks.17.mlp
408
+ visual.blocks.17.mlp.act
409
+ visual.blocks.17.mlp.fc1
410
+ visual.blocks.17.mlp.fc2
411
+ visual.blocks.17.norm1
412
+ visual.blocks.17.norm2
413
+ visual.blocks.18
414
+ visual.blocks.18.attn
415
+ visual.blocks.18.attn.proj
416
+ visual.blocks.18.attn.qkv
417
+ visual.blocks.18.mlp
418
+ visual.blocks.18.mlp.act
419
+ visual.blocks.18.mlp.fc1
420
+ visual.blocks.18.mlp.fc2
421
+ visual.blocks.18.norm1
422
+ visual.blocks.18.norm2
423
+ visual.blocks.19
424
+ visual.blocks.19.attn
425
+ visual.blocks.19.attn.proj
426
+ visual.blocks.19.attn.qkv
427
+ visual.blocks.19.mlp
428
+ visual.blocks.19.mlp.act
429
+ visual.blocks.19.mlp.fc1
430
+ visual.blocks.19.mlp.fc2
431
+ visual.blocks.19.norm1
432
+ visual.blocks.19.norm2
433
+ visual.blocks.2
434
+ visual.blocks.2.attn
435
+ visual.blocks.2.attn.proj
436
+ visual.blocks.2.attn.qkv
437
+ visual.blocks.2.mlp
438
+ visual.blocks.2.mlp.act
439
+ visual.blocks.2.mlp.fc1
440
+ visual.blocks.2.mlp.fc2
441
+ visual.blocks.2.norm1
442
+ visual.blocks.2.norm2
443
+ visual.blocks.20
444
+ visual.blocks.20.attn
445
+ visual.blocks.20.attn.proj
446
+ visual.blocks.20.attn.qkv
447
+ visual.blocks.20.mlp
448
+ visual.blocks.20.mlp.act
449
+ visual.blocks.20.mlp.fc1
450
+ visual.blocks.20.mlp.fc2
451
+ visual.blocks.20.norm1
452
+ visual.blocks.20.norm2
453
+ visual.blocks.21
454
+ visual.blocks.21.attn
455
+ visual.blocks.21.attn.proj
456
+ visual.blocks.21.attn.qkv
457
+ visual.blocks.21.mlp
458
+ visual.blocks.21.mlp.act
459
+ visual.blocks.21.mlp.fc1
460
+ visual.blocks.21.mlp.fc2
461
+ visual.blocks.21.norm1
462
+ visual.blocks.21.norm2
463
+ visual.blocks.22
464
+ visual.blocks.22.attn
465
+ visual.blocks.22.attn.proj
466
+ visual.blocks.22.attn.qkv
467
+ visual.blocks.22.mlp
468
+ visual.blocks.22.mlp.act
469
+ visual.blocks.22.mlp.fc1
470
+ visual.blocks.22.mlp.fc2
471
+ visual.blocks.22.norm1
472
+ visual.blocks.22.norm2
473
+ visual.blocks.23
474
+ visual.blocks.23.attn
475
+ visual.blocks.23.attn.proj
476
+ visual.blocks.23.attn.qkv
477
+ visual.blocks.23.mlp
478
+ visual.blocks.23.mlp.act
479
+ visual.blocks.23.mlp.fc1
480
+ visual.blocks.23.mlp.fc2
481
+ visual.blocks.23.norm1
482
+ visual.blocks.23.norm2
483
+ visual.blocks.24
484
+ visual.blocks.24.attn
485
+ visual.blocks.24.attn.proj
486
+ visual.blocks.24.attn.qkv
487
+ visual.blocks.24.mlp
488
+ visual.blocks.24.mlp.act
489
+ visual.blocks.24.mlp.fc1
490
+ visual.blocks.24.mlp.fc2
491
+ visual.blocks.24.norm1
492
+ visual.blocks.24.norm2
493
+ visual.blocks.25
494
+ visual.blocks.25.attn
495
+ visual.blocks.25.attn.proj
496
+ visual.blocks.25.attn.qkv
497
+ visual.blocks.25.mlp
498
+ visual.blocks.25.mlp.act
499
+ visual.blocks.25.mlp.fc1
500
+ visual.blocks.25.mlp.fc2
501
+ visual.blocks.25.norm1
502
+ visual.blocks.25.norm2
503
+ visual.blocks.26
504
+ visual.blocks.26.attn
505
+ visual.blocks.26.attn.proj
506
+ visual.blocks.26.attn.qkv
507
+ visual.blocks.26.mlp
508
+ visual.blocks.26.mlp.act
509
+ visual.blocks.26.mlp.fc1
510
+ visual.blocks.26.mlp.fc2
511
+ visual.blocks.26.norm1
512
+ visual.blocks.26.norm2
513
+ visual.blocks.27
514
+ visual.blocks.27.attn
515
+ visual.blocks.27.attn.proj
516
+ visual.blocks.27.attn.qkv
517
+ visual.blocks.27.mlp
518
+ visual.blocks.27.mlp.act
519
+ visual.blocks.27.mlp.fc1
520
+ visual.blocks.27.mlp.fc2
521
+ visual.blocks.27.norm1
522
+ visual.blocks.27.norm2
523
+ visual.blocks.28
524
+ visual.blocks.28.attn
525
+ visual.blocks.28.attn.proj
526
+ visual.blocks.28.attn.qkv
527
+ visual.blocks.28.mlp
528
+ visual.blocks.28.mlp.act
529
+ visual.blocks.28.mlp.fc1
530
+ visual.blocks.28.mlp.fc2
531
+ visual.blocks.28.norm1
532
+ visual.blocks.28.norm2
533
+ visual.blocks.29
534
+ visual.blocks.29.attn
535
+ visual.blocks.29.attn.proj
536
+ visual.blocks.29.attn.qkv
537
+ visual.blocks.29.mlp
538
+ visual.blocks.29.mlp.act
539
+ visual.blocks.29.mlp.fc1
540
+ visual.blocks.29.mlp.fc2
541
+ visual.blocks.29.norm1
542
+ visual.blocks.29.norm2
543
+ visual.blocks.3
544
+ visual.blocks.3.attn
545
+ visual.blocks.3.attn.proj
546
+ visual.blocks.3.attn.qkv
547
+ visual.blocks.3.mlp
548
+ visual.blocks.3.mlp.act
549
+ visual.blocks.3.mlp.fc1
550
+ visual.blocks.3.mlp.fc2
551
+ visual.blocks.3.norm1
552
+ visual.blocks.3.norm2
553
+ visual.blocks.30
554
+ visual.blocks.30.attn
555
+ visual.blocks.30.attn.proj
556
+ visual.blocks.30.attn.qkv
557
+ visual.blocks.30.mlp
558
+ visual.blocks.30.mlp.act
559
+ visual.blocks.30.mlp.fc1
560
+ visual.blocks.30.mlp.fc2
561
+ visual.blocks.30.norm1
562
+ visual.blocks.30.norm2
563
+ visual.blocks.31
564
+ visual.blocks.31.attn
565
+ visual.blocks.31.attn.proj
566
+ visual.blocks.31.attn.qkv
567
+ visual.blocks.31.mlp
568
+ visual.blocks.31.mlp.act
569
+ visual.blocks.31.mlp.fc1
570
+ visual.blocks.31.mlp.fc2
571
+ visual.blocks.31.norm1
572
+ visual.blocks.31.norm2
573
+ visual.blocks.4
574
+ visual.blocks.4.attn
575
+ visual.blocks.4.attn.proj
576
+ visual.blocks.4.attn.qkv
577
+ visual.blocks.4.mlp
578
+ visual.blocks.4.mlp.act
579
+ visual.blocks.4.mlp.fc1
580
+ visual.blocks.4.mlp.fc2
581
+ visual.blocks.4.norm1
582
+ visual.blocks.4.norm2
583
+ visual.blocks.5
584
+ visual.blocks.5.attn
585
+ visual.blocks.5.attn.proj
586
+ visual.blocks.5.attn.qkv
587
+ visual.blocks.5.mlp
588
+ visual.blocks.5.mlp.act
589
+ visual.blocks.5.mlp.fc1
590
+ visual.blocks.5.mlp.fc2
591
+ visual.blocks.5.norm1
592
+ visual.blocks.5.norm2
593
+ visual.blocks.6
594
+ visual.blocks.6.attn
595
+ visual.blocks.6.attn.proj
596
+ visual.blocks.6.attn.qkv
597
+ visual.blocks.6.mlp
598
+ visual.blocks.6.mlp.act
599
+ visual.blocks.6.mlp.fc1
600
+ visual.blocks.6.mlp.fc2
601
+ visual.blocks.6.norm1
602
+ visual.blocks.6.norm2
603
+ visual.blocks.7
604
+ visual.blocks.7.attn
605
+ visual.blocks.7.attn.proj
606
+ visual.blocks.7.attn.qkv
607
+ visual.blocks.7.mlp
608
+ visual.blocks.7.mlp.act
609
+ visual.blocks.7.mlp.fc1
610
+ visual.blocks.7.mlp.fc2
611
+ visual.blocks.7.norm1
612
+ visual.blocks.7.norm2
613
+ visual.blocks.8
614
+ visual.blocks.8.attn
615
+ visual.blocks.8.attn.proj
616
+ visual.blocks.8.attn.qkv
617
+ visual.blocks.8.mlp
618
+ visual.blocks.8.mlp.act
619
+ visual.blocks.8.mlp.fc1
620
+ visual.blocks.8.mlp.fc2
621
+ visual.blocks.8.norm1
622
+ visual.blocks.8.norm2
623
+ visual.blocks.9
624
+ visual.blocks.9.attn
625
+ visual.blocks.9.attn.proj
626
+ visual.blocks.9.attn.qkv
627
+ visual.blocks.9.mlp
628
+ visual.blocks.9.mlp.act
629
+ visual.blocks.9.mlp.fc1
630
+ visual.blocks.9.mlp.fc2
631
+ visual.blocks.9.norm1
632
+ visual.blocks.9.norm2
633
+ visual.merger
634
+ visual.merger.ln_q
635
+ visual.merger.mlp
636
+ visual.merger.mlp.0
637
+ visual.merger.mlp.1
638
+ visual.merger.mlp.2
639
+ visual.patch_embed
640
+ visual.patch_embed.proj
641
+ visual.rotary_pos_emb
{logs β†’ demo/logs}/Salesforce/blip2-opt-2.7b.txt RENAMED
@@ -1,1078 +1,876 @@
1
-
2
- vision_model
3
- vision_model.embeddings
4
- vision_model.embeddings.patch_embedding
5
- vision_model.encoder
6
- vision_model.encoder.layers
7
- vision_model.encoder.layers.0
8
- vision_model.encoder.layers.0.self_attn
9
- vision_model.encoder.layers.0.self_attn.dropout
10
- vision_model.encoder.layers.0.self_attn.qkv
11
- vision_model.encoder.layers.0.self_attn.projection
12
- vision_model.encoder.layers.0.layer_norm1
13
- vision_model.encoder.layers.0.mlp
14
- vision_model.encoder.layers.0.mlp.activation_fn
15
- vision_model.encoder.layers.0.mlp.fc1
16
- vision_model.encoder.layers.0.mlp.fc2
17
- vision_model.encoder.layers.0.layer_norm2
18
- vision_model.encoder.layers.1
19
- vision_model.encoder.layers.1.self_attn
20
- vision_model.encoder.layers.1.self_attn.dropout
21
- vision_model.encoder.layers.1.self_attn.qkv
22
- vision_model.encoder.layers.1.self_attn.projection
23
- vision_model.encoder.layers.1.layer_norm1
24
- vision_model.encoder.layers.1.mlp
25
- vision_model.encoder.layers.1.mlp.activation_fn
26
- vision_model.encoder.layers.1.mlp.fc1
27
- vision_model.encoder.layers.1.mlp.fc2
28
- vision_model.encoder.layers.1.layer_norm2
29
- vision_model.encoder.layers.2
30
- vision_model.encoder.layers.2.self_attn
31
- vision_model.encoder.layers.2.self_attn.dropout
32
- vision_model.encoder.layers.2.self_attn.qkv
33
- vision_model.encoder.layers.2.self_attn.projection
34
- vision_model.encoder.layers.2.layer_norm1
35
- vision_model.encoder.layers.2.mlp
36
- vision_model.encoder.layers.2.mlp.activation_fn
37
- vision_model.encoder.layers.2.mlp.fc1
38
- vision_model.encoder.layers.2.mlp.fc2
39
- vision_model.encoder.layers.2.layer_norm2
40
- vision_model.encoder.layers.3
41
- vision_model.encoder.layers.3.self_attn
42
- vision_model.encoder.layers.3.self_attn.dropout
43
- vision_model.encoder.layers.3.self_attn.qkv
44
- vision_model.encoder.layers.3.self_attn.projection
45
- vision_model.encoder.layers.3.layer_norm1
46
- vision_model.encoder.layers.3.mlp
47
- vision_model.encoder.layers.3.mlp.activation_fn
48
- vision_model.encoder.layers.3.mlp.fc1
49
- vision_model.encoder.layers.3.mlp.fc2
50
- vision_model.encoder.layers.3.layer_norm2
51
- vision_model.encoder.layers.4
52
- vision_model.encoder.layers.4.self_attn
53
- vision_model.encoder.layers.4.self_attn.dropout
54
- vision_model.encoder.layers.4.self_attn.qkv
55
- vision_model.encoder.layers.4.self_attn.projection
56
- vision_model.encoder.layers.4.layer_norm1
57
- vision_model.encoder.layers.4.mlp
58
- vision_model.encoder.layers.4.mlp.activation_fn
59
- vision_model.encoder.layers.4.mlp.fc1
60
- vision_model.encoder.layers.4.mlp.fc2
61
- vision_model.encoder.layers.4.layer_norm2
62
- vision_model.encoder.layers.5
63
- vision_model.encoder.layers.5.self_attn
64
- vision_model.encoder.layers.5.self_attn.dropout
65
- vision_model.encoder.layers.5.self_attn.qkv
66
- vision_model.encoder.layers.5.self_attn.projection
67
- vision_model.encoder.layers.5.layer_norm1
68
- vision_model.encoder.layers.5.mlp
69
- vision_model.encoder.layers.5.mlp.activation_fn
70
- vision_model.encoder.layers.5.mlp.fc1
71
- vision_model.encoder.layers.5.mlp.fc2
72
- vision_model.encoder.layers.5.layer_norm2
73
- vision_model.encoder.layers.6
74
- vision_model.encoder.layers.6.self_attn
75
- vision_model.encoder.layers.6.self_attn.dropout
76
- vision_model.encoder.layers.6.self_attn.qkv
77
- vision_model.encoder.layers.6.self_attn.projection
78
- vision_model.encoder.layers.6.layer_norm1
79
- vision_model.encoder.layers.6.mlp
80
- vision_model.encoder.layers.6.mlp.activation_fn
81
- vision_model.encoder.layers.6.mlp.fc1
82
- vision_model.encoder.layers.6.mlp.fc2
83
- vision_model.encoder.layers.6.layer_norm2
84
- vision_model.encoder.layers.7
85
- vision_model.encoder.layers.7.self_attn
86
- vision_model.encoder.layers.7.self_attn.dropout
87
- vision_model.encoder.layers.7.self_attn.qkv
88
- vision_model.encoder.layers.7.self_attn.projection
89
- vision_model.encoder.layers.7.layer_norm1
90
- vision_model.encoder.layers.7.mlp
91
- vision_model.encoder.layers.7.mlp.activation_fn
92
- vision_model.encoder.layers.7.mlp.fc1
93
- vision_model.encoder.layers.7.mlp.fc2
94
- vision_model.encoder.layers.7.layer_norm2
95
- vision_model.encoder.layers.8
96
- vision_model.encoder.layers.8.self_attn
97
- vision_model.encoder.layers.8.self_attn.dropout
98
- vision_model.encoder.layers.8.self_attn.qkv
99
- vision_model.encoder.layers.8.self_attn.projection
100
- vision_model.encoder.layers.8.layer_norm1
101
- vision_model.encoder.layers.8.mlp
102
- vision_model.encoder.layers.8.mlp.activation_fn
103
- vision_model.encoder.layers.8.mlp.fc1
104
- vision_model.encoder.layers.8.mlp.fc2
105
- vision_model.encoder.layers.8.layer_norm2
106
- vision_model.encoder.layers.9
107
- vision_model.encoder.layers.9.self_attn
108
- vision_model.encoder.layers.9.self_attn.dropout
109
- vision_model.encoder.layers.9.self_attn.qkv
110
- vision_model.encoder.layers.9.self_attn.projection
111
- vision_model.encoder.layers.9.layer_norm1
112
- vision_model.encoder.layers.9.mlp
113
- vision_model.encoder.layers.9.mlp.activation_fn
114
- vision_model.encoder.layers.9.mlp.fc1
115
- vision_model.encoder.layers.9.mlp.fc2
116
- vision_model.encoder.layers.9.layer_norm2
117
- vision_model.encoder.layers.10
118
- vision_model.encoder.layers.10.self_attn
119
- vision_model.encoder.layers.10.self_attn.dropout
120
- vision_model.encoder.layers.10.self_attn.qkv
121
- vision_model.encoder.layers.10.self_attn.projection
122
- vision_model.encoder.layers.10.layer_norm1
123
- vision_model.encoder.layers.10.mlp
124
- vision_model.encoder.layers.10.mlp.activation_fn
125
- vision_model.encoder.layers.10.mlp.fc1
126
- vision_model.encoder.layers.10.mlp.fc2
127
- vision_model.encoder.layers.10.layer_norm2
128
- vision_model.encoder.layers.11
129
- vision_model.encoder.layers.11.self_attn
130
- vision_model.encoder.layers.11.self_attn.dropout
131
- vision_model.encoder.layers.11.self_attn.qkv
132
- vision_model.encoder.layers.11.self_attn.projection
133
- vision_model.encoder.layers.11.layer_norm1
134
- vision_model.encoder.layers.11.mlp
135
- vision_model.encoder.layers.11.mlp.activation_fn
136
- vision_model.encoder.layers.11.mlp.fc1
137
- vision_model.encoder.layers.11.mlp.fc2
138
- vision_model.encoder.layers.11.layer_norm2
139
- vision_model.encoder.layers.12
140
- vision_model.encoder.layers.12.self_attn
141
- vision_model.encoder.layers.12.self_attn.dropout
142
- vision_model.encoder.layers.12.self_attn.qkv
143
- vision_model.encoder.layers.12.self_attn.projection
144
- vision_model.encoder.layers.12.layer_norm1
145
- vision_model.encoder.layers.12.mlp
146
- vision_model.encoder.layers.12.mlp.activation_fn
147
- vision_model.encoder.layers.12.mlp.fc1
148
- vision_model.encoder.layers.12.mlp.fc2
149
- vision_model.encoder.layers.12.layer_norm2
150
- vision_model.encoder.layers.13
151
- vision_model.encoder.layers.13.self_attn
152
- vision_model.encoder.layers.13.self_attn.dropout
153
- vision_model.encoder.layers.13.self_attn.qkv
154
- vision_model.encoder.layers.13.self_attn.projection
155
- vision_model.encoder.layers.13.layer_norm1
156
- vision_model.encoder.layers.13.mlp
157
- vision_model.encoder.layers.13.mlp.activation_fn
158
- vision_model.encoder.layers.13.mlp.fc1
159
- vision_model.encoder.layers.13.mlp.fc2
160
- vision_model.encoder.layers.13.layer_norm2
161
- vision_model.encoder.layers.14
162
- vision_model.encoder.layers.14.self_attn
163
- vision_model.encoder.layers.14.self_attn.dropout
164
- vision_model.encoder.layers.14.self_attn.qkv
165
- vision_model.encoder.layers.14.self_attn.projection
166
- vision_model.encoder.layers.14.layer_norm1
167
- vision_model.encoder.layers.14.mlp
168
- vision_model.encoder.layers.14.mlp.activation_fn
169
- vision_model.encoder.layers.14.mlp.fc1
170
- vision_model.encoder.layers.14.mlp.fc2
171
- vision_model.encoder.layers.14.layer_norm2
172
- vision_model.encoder.layers.15
173
- vision_model.encoder.layers.15.self_attn
174
- vision_model.encoder.layers.15.self_attn.dropout
175
- vision_model.encoder.layers.15.self_attn.qkv
176
- vision_model.encoder.layers.15.self_attn.projection
177
- vision_model.encoder.layers.15.layer_norm1
178
- vision_model.encoder.layers.15.mlp
179
- vision_model.encoder.layers.15.mlp.activation_fn
180
- vision_model.encoder.layers.15.mlp.fc1
181
- vision_model.encoder.layers.15.mlp.fc2
182
- vision_model.encoder.layers.15.layer_norm2
183
- vision_model.encoder.layers.16
184
- vision_model.encoder.layers.16.self_attn
185
- vision_model.encoder.layers.16.self_attn.dropout
186
- vision_model.encoder.layers.16.self_attn.qkv
187
- vision_model.encoder.layers.16.self_attn.projection
188
- vision_model.encoder.layers.16.layer_norm1
189
- vision_model.encoder.layers.16.mlp
190
- vision_model.encoder.layers.16.mlp.activation_fn
191
- vision_model.encoder.layers.16.mlp.fc1
192
- vision_model.encoder.layers.16.mlp.fc2
193
- vision_model.encoder.layers.16.layer_norm2
194
- vision_model.encoder.layers.17
195
- vision_model.encoder.layers.17.self_attn
196
- vision_model.encoder.layers.17.self_attn.dropout
197
- vision_model.encoder.layers.17.self_attn.qkv
198
- vision_model.encoder.layers.17.self_attn.projection
199
- vision_model.encoder.layers.17.layer_norm1
200
- vision_model.encoder.layers.17.mlp
201
- vision_model.encoder.layers.17.mlp.activation_fn
202
- vision_model.encoder.layers.17.mlp.fc1
203
- vision_model.encoder.layers.17.mlp.fc2
204
- vision_model.encoder.layers.17.layer_norm2
205
- vision_model.encoder.layers.18
206
- vision_model.encoder.layers.18.self_attn
207
- vision_model.encoder.layers.18.self_attn.dropout
208
- vision_model.encoder.layers.18.self_attn.qkv
209
- vision_model.encoder.layers.18.self_attn.projection
210
- vision_model.encoder.layers.18.layer_norm1
211
- vision_model.encoder.layers.18.mlp
212
- vision_model.encoder.layers.18.mlp.activation_fn
213
- vision_model.encoder.layers.18.mlp.fc1
214
- vision_model.encoder.layers.18.mlp.fc2
215
- vision_model.encoder.layers.18.layer_norm2
216
- vision_model.encoder.layers.19
217
- vision_model.encoder.layers.19.self_attn
218
- vision_model.encoder.layers.19.self_attn.dropout
219
- vision_model.encoder.layers.19.self_attn.qkv
220
- vision_model.encoder.layers.19.self_attn.projection
221
- vision_model.encoder.layers.19.layer_norm1
222
- vision_model.encoder.layers.19.mlp
223
- vision_model.encoder.layers.19.mlp.activation_fn
224
- vision_model.encoder.layers.19.mlp.fc1
225
- vision_model.encoder.layers.19.mlp.fc2
226
- vision_model.encoder.layers.19.layer_norm2
227
- vision_model.encoder.layers.20
228
- vision_model.encoder.layers.20.self_attn
229
- vision_model.encoder.layers.20.self_attn.dropout
230
- vision_model.encoder.layers.20.self_attn.qkv
231
- vision_model.encoder.layers.20.self_attn.projection
232
- vision_model.encoder.layers.20.layer_norm1
233
- vision_model.encoder.layers.20.mlp
234
- vision_model.encoder.layers.20.mlp.activation_fn
235
- vision_model.encoder.layers.20.mlp.fc1
236
- vision_model.encoder.layers.20.mlp.fc2
237
- vision_model.encoder.layers.20.layer_norm2
238
- vision_model.encoder.layers.21
239
- vision_model.encoder.layers.21.self_attn
240
- vision_model.encoder.layers.21.self_attn.dropout
241
- vision_model.encoder.layers.21.self_attn.qkv
242
- vision_model.encoder.layers.21.self_attn.projection
243
- vision_model.encoder.layers.21.layer_norm1
244
- vision_model.encoder.layers.21.mlp
245
- vision_model.encoder.layers.21.mlp.activation_fn
246
- vision_model.encoder.layers.21.mlp.fc1
247
- vision_model.encoder.layers.21.mlp.fc2
248
- vision_model.encoder.layers.21.layer_norm2
249
- vision_model.encoder.layers.22
250
- vision_model.encoder.layers.22.self_attn
251
- vision_model.encoder.layers.22.self_attn.dropout
252
- vision_model.encoder.layers.22.self_attn.qkv
253
- vision_model.encoder.layers.22.self_attn.projection
254
- vision_model.encoder.layers.22.layer_norm1
255
- vision_model.encoder.layers.22.mlp
256
- vision_model.encoder.layers.22.mlp.activation_fn
257
- vision_model.encoder.layers.22.mlp.fc1
258
- vision_model.encoder.layers.22.mlp.fc2
259
- vision_model.encoder.layers.22.layer_norm2
260
- vision_model.encoder.layers.23
261
- vision_model.encoder.layers.23.self_attn
262
- vision_model.encoder.layers.23.self_attn.dropout
263
- vision_model.encoder.layers.23.self_attn.qkv
264
- vision_model.encoder.layers.23.self_attn.projection
265
- vision_model.encoder.layers.23.layer_norm1
266
- vision_model.encoder.layers.23.mlp
267
- vision_model.encoder.layers.23.mlp.activation_fn
268
- vision_model.encoder.layers.23.mlp.fc1
269
- vision_model.encoder.layers.23.mlp.fc2
270
- vision_model.encoder.layers.23.layer_norm2
271
- vision_model.encoder.layers.24
272
- vision_model.encoder.layers.24.self_attn
273
- vision_model.encoder.layers.24.self_attn.dropout
274
- vision_model.encoder.layers.24.self_attn.qkv
275
- vision_model.encoder.layers.24.self_attn.projection
276
- vision_model.encoder.layers.24.layer_norm1
277
- vision_model.encoder.layers.24.mlp
278
- vision_model.encoder.layers.24.mlp.activation_fn
279
- vision_model.encoder.layers.24.mlp.fc1
280
- vision_model.encoder.layers.24.mlp.fc2
281
- vision_model.encoder.layers.24.layer_norm2
282
- vision_model.encoder.layers.25
283
- vision_model.encoder.layers.25.self_attn
284
- vision_model.encoder.layers.25.self_attn.dropout
285
- vision_model.encoder.layers.25.self_attn.qkv
286
- vision_model.encoder.layers.25.self_attn.projection
287
- vision_model.encoder.layers.25.layer_norm1
288
- vision_model.encoder.layers.25.mlp
289
- vision_model.encoder.layers.25.mlp.activation_fn
290
- vision_model.encoder.layers.25.mlp.fc1
291
- vision_model.encoder.layers.25.mlp.fc2
292
- vision_model.encoder.layers.25.layer_norm2
293
- vision_model.encoder.layers.26
294
- vision_model.encoder.layers.26.self_attn
295
- vision_model.encoder.layers.26.self_attn.dropout
296
- vision_model.encoder.layers.26.self_attn.qkv
297
- vision_model.encoder.layers.26.self_attn.projection
298
- vision_model.encoder.layers.26.layer_norm1
299
- vision_model.encoder.layers.26.mlp
300
- vision_model.encoder.layers.26.mlp.activation_fn
301
- vision_model.encoder.layers.26.mlp.fc1
302
- vision_model.encoder.layers.26.mlp.fc2
303
- vision_model.encoder.layers.26.layer_norm2
304
- vision_model.encoder.layers.27
305
- vision_model.encoder.layers.27.self_attn
306
- vision_model.encoder.layers.27.self_attn.dropout
307
- vision_model.encoder.layers.27.self_attn.qkv
308
- vision_model.encoder.layers.27.self_attn.projection
309
- vision_model.encoder.layers.27.layer_norm1
310
- vision_model.encoder.layers.27.mlp
311
- vision_model.encoder.layers.27.mlp.activation_fn
312
- vision_model.encoder.layers.27.mlp.fc1
313
- vision_model.encoder.layers.27.mlp.fc2
314
- vision_model.encoder.layers.27.layer_norm2
315
- vision_model.encoder.layers.28
316
- vision_model.encoder.layers.28.self_attn
317
- vision_model.encoder.layers.28.self_attn.dropout
318
- vision_model.encoder.layers.28.self_attn.qkv
319
- vision_model.encoder.layers.28.self_attn.projection
320
- vision_model.encoder.layers.28.layer_norm1
321
- vision_model.encoder.layers.28.mlp
322
- vision_model.encoder.layers.28.mlp.activation_fn
323
- vision_model.encoder.layers.28.mlp.fc1
324
- vision_model.encoder.layers.28.mlp.fc2
325
- vision_model.encoder.layers.28.layer_norm2
326
- vision_model.encoder.layers.29
327
- vision_model.encoder.layers.29.self_attn
328
- vision_model.encoder.layers.29.self_attn.dropout
329
- vision_model.encoder.layers.29.self_attn.qkv
330
- vision_model.encoder.layers.29.self_attn.projection
331
- vision_model.encoder.layers.29.layer_norm1
332
- vision_model.encoder.layers.29.mlp
333
- vision_model.encoder.layers.29.mlp.activation_fn
334
- vision_model.encoder.layers.29.mlp.fc1
335
- vision_model.encoder.layers.29.mlp.fc2
336
- vision_model.encoder.layers.29.layer_norm2
337
- vision_model.encoder.layers.30
338
- vision_model.encoder.layers.30.self_attn
339
- vision_model.encoder.layers.30.self_attn.dropout
340
- vision_model.encoder.layers.30.self_attn.qkv
341
- vision_model.encoder.layers.30.self_attn.projection
342
- vision_model.encoder.layers.30.layer_norm1
343
- vision_model.encoder.layers.30.mlp
344
- vision_model.encoder.layers.30.mlp.activation_fn
345
- vision_model.encoder.layers.30.mlp.fc1
346
- vision_model.encoder.layers.30.mlp.fc2
347
- vision_model.encoder.layers.30.layer_norm2
348
- vision_model.encoder.layers.31
349
- vision_model.encoder.layers.31.self_attn
350
- vision_model.encoder.layers.31.self_attn.dropout
351
- vision_model.encoder.layers.31.self_attn.qkv
352
- vision_model.encoder.layers.31.self_attn.projection
353
- vision_model.encoder.layers.31.layer_norm1
354
- vision_model.encoder.layers.31.mlp
355
- vision_model.encoder.layers.31.mlp.activation_fn
356
- vision_model.encoder.layers.31.mlp.fc1
357
- vision_model.encoder.layers.31.mlp.fc2
358
- vision_model.encoder.layers.31.layer_norm2
359
- vision_model.encoder.layers.32
360
- vision_model.encoder.layers.32.self_attn
361
- vision_model.encoder.layers.32.self_attn.dropout
362
- vision_model.encoder.layers.32.self_attn.qkv
363
- vision_model.encoder.layers.32.self_attn.projection
364
- vision_model.encoder.layers.32.layer_norm1
365
- vision_model.encoder.layers.32.mlp
366
- vision_model.encoder.layers.32.mlp.activation_fn
367
- vision_model.encoder.layers.32.mlp.fc1
368
- vision_model.encoder.layers.32.mlp.fc2
369
- vision_model.encoder.layers.32.layer_norm2
370
- vision_model.encoder.layers.33
371
- vision_model.encoder.layers.33.self_attn
372
- vision_model.encoder.layers.33.self_attn.dropout
373
- vision_model.encoder.layers.33.self_attn.qkv
374
- vision_model.encoder.layers.33.self_attn.projection
375
- vision_model.encoder.layers.33.layer_norm1
376
- vision_model.encoder.layers.33.mlp
377
- vision_model.encoder.layers.33.mlp.activation_fn
378
- vision_model.encoder.layers.33.mlp.fc1
379
- vision_model.encoder.layers.33.mlp.fc2
380
- vision_model.encoder.layers.33.layer_norm2
381
- vision_model.encoder.layers.34
382
- vision_model.encoder.layers.34.self_attn
383
- vision_model.encoder.layers.34.self_attn.dropout
384
- vision_model.encoder.layers.34.self_attn.qkv
385
- vision_model.encoder.layers.34.self_attn.projection
386
- vision_model.encoder.layers.34.layer_norm1
387
- vision_model.encoder.layers.34.mlp
388
- vision_model.encoder.layers.34.mlp.activation_fn
389
- vision_model.encoder.layers.34.mlp.fc1
390
- vision_model.encoder.layers.34.mlp.fc2
391
- vision_model.encoder.layers.34.layer_norm2
392
- vision_model.encoder.layers.35
393
- vision_model.encoder.layers.35.self_attn
394
- vision_model.encoder.layers.35.self_attn.dropout
395
- vision_model.encoder.layers.35.self_attn.qkv
396
- vision_model.encoder.layers.35.self_attn.projection
397
- vision_model.encoder.layers.35.layer_norm1
398
- vision_model.encoder.layers.35.mlp
399
- vision_model.encoder.layers.35.mlp.activation_fn
400
- vision_model.encoder.layers.35.mlp.fc1
401
- vision_model.encoder.layers.35.mlp.fc2
402
- vision_model.encoder.layers.35.layer_norm2
403
- vision_model.encoder.layers.36
404
- vision_model.encoder.layers.36.self_attn
405
- vision_model.encoder.layers.36.self_attn.dropout
406
- vision_model.encoder.layers.36.self_attn.qkv
407
- vision_model.encoder.layers.36.self_attn.projection
408
- vision_model.encoder.layers.36.layer_norm1
409
- vision_model.encoder.layers.36.mlp
410
- vision_model.encoder.layers.36.mlp.activation_fn
411
- vision_model.encoder.layers.36.mlp.fc1
412
- vision_model.encoder.layers.36.mlp.fc2
413
- vision_model.encoder.layers.36.layer_norm2
414
- vision_model.encoder.layers.37
415
- vision_model.encoder.layers.37.self_attn
416
- vision_model.encoder.layers.37.self_attn.dropout
417
- vision_model.encoder.layers.37.self_attn.qkv
418
- vision_model.encoder.layers.37.self_attn.projection
419
- vision_model.encoder.layers.37.layer_norm1
420
- vision_model.encoder.layers.37.mlp
421
- vision_model.encoder.layers.37.mlp.activation_fn
422
- vision_model.encoder.layers.37.mlp.fc1
423
- vision_model.encoder.layers.37.mlp.fc2
424
- vision_model.encoder.layers.37.layer_norm2
425
- vision_model.encoder.layers.38
426
- vision_model.encoder.layers.38.self_attn
427
- vision_model.encoder.layers.38.self_attn.dropout
428
- vision_model.encoder.layers.38.self_attn.qkv
429
- vision_model.encoder.layers.38.self_attn.projection
430
- vision_model.encoder.layers.38.layer_norm1
431
- vision_model.encoder.layers.38.mlp
432
- vision_model.encoder.layers.38.mlp.activation_fn
433
- vision_model.encoder.layers.38.mlp.fc1
434
- vision_model.encoder.layers.38.mlp.fc2
435
- vision_model.encoder.layers.38.layer_norm2
436
- vision_model.post_layernorm
437
- qformer
438
- qformer.layernorm
439
- qformer.dropout
440
- qformer.encoder
441
- qformer.encoder.layer
442
- qformer.encoder.layer.0
443
- qformer.encoder.layer.0.attention
444
- qformer.encoder.layer.0.attention.attention
445
- qformer.encoder.layer.0.attention.attention.query
446
- qformer.encoder.layer.0.attention.attention.key
447
- qformer.encoder.layer.0.attention.attention.value
448
- qformer.encoder.layer.0.attention.attention.dropout
449
- qformer.encoder.layer.0.attention.output
450
- qformer.encoder.layer.0.attention.output.dense
451
- qformer.encoder.layer.0.attention.output.LayerNorm
452
- qformer.encoder.layer.0.attention.output.dropout
453
- qformer.encoder.layer.0.crossattention
454
- qformer.encoder.layer.0.crossattention.attention
455
- qformer.encoder.layer.0.crossattention.attention.query
456
- qformer.encoder.layer.0.crossattention.attention.key
457
- qformer.encoder.layer.0.crossattention.attention.value
458
- qformer.encoder.layer.0.crossattention.attention.dropout
459
- qformer.encoder.layer.0.crossattention.output
460
- qformer.encoder.layer.0.crossattention.output.dense
461
- qformer.encoder.layer.0.crossattention.output.LayerNorm
462
- qformer.encoder.layer.0.crossattention.output.dropout
463
- qformer.encoder.layer.0.intermediate_query
464
- qformer.encoder.layer.0.intermediate_query.dense
465
- qformer.encoder.layer.0.intermediate_query.intermediate_act_fn
466
- qformer.encoder.layer.0.output_query
467
- qformer.encoder.layer.0.output_query.dense
468
- qformer.encoder.layer.0.output_query.LayerNorm
469
- qformer.encoder.layer.0.output_query.dropout
470
- qformer.encoder.layer.1
471
- qformer.encoder.layer.1.attention
472
- qformer.encoder.layer.1.attention.attention
473
- qformer.encoder.layer.1.attention.attention.query
474
- qformer.encoder.layer.1.attention.attention.key
475
- qformer.encoder.layer.1.attention.attention.value
476
- qformer.encoder.layer.1.attention.attention.dropout
477
- qformer.encoder.layer.1.attention.output
478
- qformer.encoder.layer.1.attention.output.dense
479
- qformer.encoder.layer.1.attention.output.LayerNorm
480
- qformer.encoder.layer.1.attention.output.dropout
481
- qformer.encoder.layer.1.intermediate_query
482
- qformer.encoder.layer.1.intermediate_query.dense
483
- qformer.encoder.layer.1.intermediate_query.intermediate_act_fn
484
- qformer.encoder.layer.1.output_query
485
- qformer.encoder.layer.1.output_query.dense
486
- qformer.encoder.layer.1.output_query.LayerNorm
487
- qformer.encoder.layer.1.output_query.dropout
488
- qformer.encoder.layer.2
489
- qformer.encoder.layer.2.attention
490
- qformer.encoder.layer.2.attention.attention
491
- qformer.encoder.layer.2.attention.attention.query
492
- qformer.encoder.layer.2.attention.attention.key
493
- qformer.encoder.layer.2.attention.attention.value
494
- qformer.encoder.layer.2.attention.attention.dropout
495
- qformer.encoder.layer.2.attention.output
496
- qformer.encoder.layer.2.attention.output.dense
497
- qformer.encoder.layer.2.attention.output.LayerNorm
498
- qformer.encoder.layer.2.attention.output.dropout
499
- qformer.encoder.layer.2.crossattention
500
- qformer.encoder.layer.2.crossattention.attention
501
- qformer.encoder.layer.2.crossattention.attention.query
502
- qformer.encoder.layer.2.crossattention.attention.key
503
- qformer.encoder.layer.2.crossattention.attention.value
504
- qformer.encoder.layer.2.crossattention.attention.dropout
505
- qformer.encoder.layer.2.crossattention.output
506
- qformer.encoder.layer.2.crossattention.output.dense
507
- qformer.encoder.layer.2.crossattention.output.LayerNorm
508
- qformer.encoder.layer.2.crossattention.output.dropout
509
- qformer.encoder.layer.2.intermediate_query
510
- qformer.encoder.layer.2.intermediate_query.dense
511
- qformer.encoder.layer.2.intermediate_query.intermediate_act_fn
512
- qformer.encoder.layer.2.output_query
513
- qformer.encoder.layer.2.output_query.dense
514
- qformer.encoder.layer.2.output_query.LayerNorm
515
- qformer.encoder.layer.2.output_query.dropout
516
- qformer.encoder.layer.3
517
- qformer.encoder.layer.3.attention
518
- qformer.encoder.layer.3.attention.attention
519
- qformer.encoder.layer.3.attention.attention.query
520
- qformer.encoder.layer.3.attention.attention.key
521
- qformer.encoder.layer.3.attention.attention.value
522
- qformer.encoder.layer.3.attention.attention.dropout
523
- qformer.encoder.layer.3.attention.output
524
- qformer.encoder.layer.3.attention.output.dense
525
- qformer.encoder.layer.3.attention.output.LayerNorm
526
- qformer.encoder.layer.3.attention.output.dropout
527
- qformer.encoder.layer.3.intermediate_query
528
- qformer.encoder.layer.3.intermediate_query.dense
529
- qformer.encoder.layer.3.intermediate_query.intermediate_act_fn
530
- qformer.encoder.layer.3.output_query
531
- qformer.encoder.layer.3.output_query.dense
532
- qformer.encoder.layer.3.output_query.LayerNorm
533
- qformer.encoder.layer.3.output_query.dropout
534
- qformer.encoder.layer.4
535
- qformer.encoder.layer.4.attention
536
- qformer.encoder.layer.4.attention.attention
537
- qformer.encoder.layer.4.attention.attention.query
538
- qformer.encoder.layer.4.attention.attention.key
539
- qformer.encoder.layer.4.attention.attention.value
540
- qformer.encoder.layer.4.attention.attention.dropout
541
- qformer.encoder.layer.4.attention.output
542
- qformer.encoder.layer.4.attention.output.dense
543
- qformer.encoder.layer.4.attention.output.LayerNorm
544
- qformer.encoder.layer.4.attention.output.dropout
545
- qformer.encoder.layer.4.crossattention
546
- qformer.encoder.layer.4.crossattention.attention
547
- qformer.encoder.layer.4.crossattention.attention.query
548
- qformer.encoder.layer.4.crossattention.attention.key
549
- qformer.encoder.layer.4.crossattention.attention.value
550
- qformer.encoder.layer.4.crossattention.attention.dropout
551
- qformer.encoder.layer.4.crossattention.output
552
- qformer.encoder.layer.4.crossattention.output.dense
553
- qformer.encoder.layer.4.crossattention.output.LayerNorm
554
- qformer.encoder.layer.4.crossattention.output.dropout
555
- qformer.encoder.layer.4.intermediate_query
556
- qformer.encoder.layer.4.intermediate_query.dense
557
- qformer.encoder.layer.4.intermediate_query.intermediate_act_fn
558
- qformer.encoder.layer.4.output_query
559
- qformer.encoder.layer.4.output_query.dense
560
- qformer.encoder.layer.4.output_query.LayerNorm
561
- qformer.encoder.layer.4.output_query.dropout
562
- qformer.encoder.layer.5
563
- qformer.encoder.layer.5.attention
564
- qformer.encoder.layer.5.attention.attention
565
- qformer.encoder.layer.5.attention.attention.query
566
- qformer.encoder.layer.5.attention.attention.key
567
- qformer.encoder.layer.5.attention.attention.value
568
- qformer.encoder.layer.5.attention.attention.dropout
569
- qformer.encoder.layer.5.attention.output
570
- qformer.encoder.layer.5.attention.output.dense
571
- qformer.encoder.layer.5.attention.output.LayerNorm
572
- qformer.encoder.layer.5.attention.output.dropout
573
- qformer.encoder.layer.5.intermediate_query
574
- qformer.encoder.layer.5.intermediate_query.dense
575
- qformer.encoder.layer.5.intermediate_query.intermediate_act_fn
576
- qformer.encoder.layer.5.output_query
577
- qformer.encoder.layer.5.output_query.dense
578
- qformer.encoder.layer.5.output_query.LayerNorm
579
- qformer.encoder.layer.5.output_query.dropout
580
- qformer.encoder.layer.6
581
- qformer.encoder.layer.6.attention
582
- qformer.encoder.layer.6.attention.attention
583
- qformer.encoder.layer.6.attention.attention.query
584
- qformer.encoder.layer.6.attention.attention.key
585
- qformer.encoder.layer.6.attention.attention.value
586
- qformer.encoder.layer.6.attention.attention.dropout
587
- qformer.encoder.layer.6.attention.output
588
- qformer.encoder.layer.6.attention.output.dense
589
- qformer.encoder.layer.6.attention.output.LayerNorm
590
- qformer.encoder.layer.6.attention.output.dropout
591
- qformer.encoder.layer.6.crossattention
592
- qformer.encoder.layer.6.crossattention.attention
593
- qformer.encoder.layer.6.crossattention.attention.query
594
- qformer.encoder.layer.6.crossattention.attention.key
595
- qformer.encoder.layer.6.crossattention.attention.value
596
- qformer.encoder.layer.6.crossattention.attention.dropout
597
- qformer.encoder.layer.6.crossattention.output
598
- qformer.encoder.layer.6.crossattention.output.dense
599
- qformer.encoder.layer.6.crossattention.output.LayerNorm
600
- qformer.encoder.layer.6.crossattention.output.dropout
601
- qformer.encoder.layer.6.intermediate_query
602
- qformer.encoder.layer.6.intermediate_query.dense
603
- qformer.encoder.layer.6.intermediate_query.intermediate_act_fn
604
- qformer.encoder.layer.6.output_query
605
- qformer.encoder.layer.6.output_query.dense
606
- qformer.encoder.layer.6.output_query.LayerNorm
607
- qformer.encoder.layer.6.output_query.dropout
608
- qformer.encoder.layer.7
609
- qformer.encoder.layer.7.attention
610
- qformer.encoder.layer.7.attention.attention
611
- qformer.encoder.layer.7.attention.attention.query
612
- qformer.encoder.layer.7.attention.attention.key
613
- qformer.encoder.layer.7.attention.attention.value
614
- qformer.encoder.layer.7.attention.attention.dropout
615
- qformer.encoder.layer.7.attention.output
616
- qformer.encoder.layer.7.attention.output.dense
617
- qformer.encoder.layer.7.attention.output.LayerNorm
618
- qformer.encoder.layer.7.attention.output.dropout
619
- qformer.encoder.layer.7.intermediate_query
620
- qformer.encoder.layer.7.intermediate_query.dense
621
- qformer.encoder.layer.7.intermediate_query.intermediate_act_fn
622
- qformer.encoder.layer.7.output_query
623
- qformer.encoder.layer.7.output_query.dense
624
- qformer.encoder.layer.7.output_query.LayerNorm
625
- qformer.encoder.layer.7.output_query.dropout
626
- qformer.encoder.layer.8
627
- qformer.encoder.layer.8.attention
628
- qformer.encoder.layer.8.attention.attention
629
- qformer.encoder.layer.8.attention.attention.query
630
- qformer.encoder.layer.8.attention.attention.key
631
- qformer.encoder.layer.8.attention.attention.value
632
- qformer.encoder.layer.8.attention.attention.dropout
633
- qformer.encoder.layer.8.attention.output
634
- qformer.encoder.layer.8.attention.output.dense
635
- qformer.encoder.layer.8.attention.output.LayerNorm
636
- qformer.encoder.layer.8.attention.output.dropout
637
- qformer.encoder.layer.8.crossattention
638
- qformer.encoder.layer.8.crossattention.attention
639
- qformer.encoder.layer.8.crossattention.attention.query
640
- qformer.encoder.layer.8.crossattention.attention.key
641
- qformer.encoder.layer.8.crossattention.attention.value
642
- qformer.encoder.layer.8.crossattention.attention.dropout
643
- qformer.encoder.layer.8.crossattention.output
644
- qformer.encoder.layer.8.crossattention.output.dense
645
- qformer.encoder.layer.8.crossattention.output.LayerNorm
646
- qformer.encoder.layer.8.crossattention.output.dropout
647
- qformer.encoder.layer.8.intermediate_query
648
- qformer.encoder.layer.8.intermediate_query.dense
649
- qformer.encoder.layer.8.intermediate_query.intermediate_act_fn
650
- qformer.encoder.layer.8.output_query
651
- qformer.encoder.layer.8.output_query.dense
652
- qformer.encoder.layer.8.output_query.LayerNorm
653
- qformer.encoder.layer.8.output_query.dropout
654
- qformer.encoder.layer.9
655
- qformer.encoder.layer.9.attention
656
- qformer.encoder.layer.9.attention.attention
657
- qformer.encoder.layer.9.attention.attention.query
658
- qformer.encoder.layer.9.attention.attention.key
659
- qformer.encoder.layer.9.attention.attention.value
660
- qformer.encoder.layer.9.attention.attention.dropout
661
- qformer.encoder.layer.9.attention.output
662
- qformer.encoder.layer.9.attention.output.dense
663
- qformer.encoder.layer.9.attention.output.LayerNorm
664
- qformer.encoder.layer.9.attention.output.dropout
665
- qformer.encoder.layer.9.intermediate_query
666
- qformer.encoder.layer.9.intermediate_query.dense
667
- qformer.encoder.layer.9.intermediate_query.intermediate_act_fn
668
- qformer.encoder.layer.9.output_query
669
- qformer.encoder.layer.9.output_query.dense
670
- qformer.encoder.layer.9.output_query.LayerNorm
671
- qformer.encoder.layer.9.output_query.dropout
672
- qformer.encoder.layer.10
673
- qformer.encoder.layer.10.attention
674
- qformer.encoder.layer.10.attention.attention
675
- qformer.encoder.layer.10.attention.attention.query
676
- qformer.encoder.layer.10.attention.attention.key
677
- qformer.encoder.layer.10.attention.attention.value
678
- qformer.encoder.layer.10.attention.attention.dropout
679
- qformer.encoder.layer.10.attention.output
680
- qformer.encoder.layer.10.attention.output.dense
681
- qformer.encoder.layer.10.attention.output.LayerNorm
682
- qformer.encoder.layer.10.attention.output.dropout
683
- qformer.encoder.layer.10.crossattention
684
- qformer.encoder.layer.10.crossattention.attention
685
- qformer.encoder.layer.10.crossattention.attention.query
686
- qformer.encoder.layer.10.crossattention.attention.key
687
- qformer.encoder.layer.10.crossattention.attention.value
688
- qformer.encoder.layer.10.crossattention.attention.dropout
689
- qformer.encoder.layer.10.crossattention.output
690
- qformer.encoder.layer.10.crossattention.output.dense
691
- qformer.encoder.layer.10.crossattention.output.LayerNorm
692
- qformer.encoder.layer.10.crossattention.output.dropout
693
- qformer.encoder.layer.10.intermediate_query
694
- qformer.encoder.layer.10.intermediate_query.dense
695
- qformer.encoder.layer.10.intermediate_query.intermediate_act_fn
696
- qformer.encoder.layer.10.output_query
697
- qformer.encoder.layer.10.output_query.dense
698
- qformer.encoder.layer.10.output_query.LayerNorm
699
- qformer.encoder.layer.10.output_query.dropout
700
- qformer.encoder.layer.11
701
- qformer.encoder.layer.11.attention
702
- qformer.encoder.layer.11.attention.attention
703
- qformer.encoder.layer.11.attention.attention.query
704
- qformer.encoder.layer.11.attention.attention.key
705
- qformer.encoder.layer.11.attention.attention.value
706
- qformer.encoder.layer.11.attention.attention.dropout
707
- qformer.encoder.layer.11.attention.output
708
- qformer.encoder.layer.11.attention.output.dense
709
- qformer.encoder.layer.11.attention.output.LayerNorm
710
- qformer.encoder.layer.11.attention.output.dropout
711
- qformer.encoder.layer.11.intermediate_query
712
- qformer.encoder.layer.11.intermediate_query.dense
713
- qformer.encoder.layer.11.intermediate_query.intermediate_act_fn
714
- qformer.encoder.layer.11.output_query
715
- qformer.encoder.layer.11.output_query.dense
716
- qformer.encoder.layer.11.output_query.LayerNorm
717
- qformer.encoder.layer.11.output_query.dropout
718
- language_projection
719
- language_model
720
- language_model.model
721
- language_model.model.decoder
722
- language_model.model.decoder.embed_tokens
723
  language_model.model.decoder.embed_positions
 
724
  language_model.model.decoder.final_layer_norm
725
- language_model.model.decoder.layers
726
- language_model.model.decoder.layers.0
727
- language_model.model.decoder.layers.0.self_attn
728
- language_model.model.decoder.layers.0.self_attn.k_proj
729
- language_model.model.decoder.layers.0.self_attn.v_proj
730
- language_model.model.decoder.layers.0.self_attn.q_proj
731
- language_model.model.decoder.layers.0.self_attn.out_proj
732
  language_model.model.decoder.layers.0.activation_fn
733
- language_model.model.decoder.layers.0.self_attn_layer_norm
734
  language_model.model.decoder.layers.0.fc1
735
  language_model.model.decoder.layers.0.fc2
736
  language_model.model.decoder.layers.0.final_layer_norm
737
- language_model.model.decoder.layers.1
738
- language_model.model.decoder.layers.1.self_attn
739
- language_model.model.decoder.layers.1.self_attn.k_proj
740
- language_model.model.decoder.layers.1.self_attn.v_proj
741
- language_model.model.decoder.layers.1.self_attn.q_proj
742
- language_model.model.decoder.layers.1.self_attn.out_proj
743
  language_model.model.decoder.layers.1.activation_fn
744
- language_model.model.decoder.layers.1.self_attn_layer_norm
745
  language_model.model.decoder.layers.1.fc1
746
  language_model.model.decoder.layers.1.fc2
747
  language_model.model.decoder.layers.1.final_layer_norm
748
- language_model.model.decoder.layers.2
749
- language_model.model.decoder.layers.2.self_attn
750
- language_model.model.decoder.layers.2.self_attn.k_proj
751
- language_model.model.decoder.layers.2.self_attn.v_proj
752
- language_model.model.decoder.layers.2.self_attn.q_proj
753
- language_model.model.decoder.layers.2.self_attn.out_proj
754
- language_model.model.decoder.layers.2.activation_fn
755
- language_model.model.decoder.layers.2.self_attn_layer_norm
756
- language_model.model.decoder.layers.2.fc1
757
- language_model.model.decoder.layers.2.fc2
758
- language_model.model.decoder.layers.2.final_layer_norm
759
- language_model.model.decoder.layers.3
760
- language_model.model.decoder.layers.3.self_attn
761
- language_model.model.decoder.layers.3.self_attn.k_proj
762
- language_model.model.decoder.layers.3.self_attn.v_proj
763
- language_model.model.decoder.layers.3.self_attn.q_proj
764
- language_model.model.decoder.layers.3.self_attn.out_proj
765
- language_model.model.decoder.layers.3.activation_fn
766
- language_model.model.decoder.layers.3.self_attn_layer_norm
767
- language_model.model.decoder.layers.3.fc1
768
- language_model.model.decoder.layers.3.fc2
769
- language_model.model.decoder.layers.3.final_layer_norm
770
- language_model.model.decoder.layers.4
771
- language_model.model.decoder.layers.4.self_attn
772
- language_model.model.decoder.layers.4.self_attn.k_proj
773
- language_model.model.decoder.layers.4.self_attn.v_proj
774
- language_model.model.decoder.layers.4.self_attn.q_proj
775
- language_model.model.decoder.layers.4.self_attn.out_proj
776
- language_model.model.decoder.layers.4.activation_fn
777
- language_model.model.decoder.layers.4.self_attn_layer_norm
778
- language_model.model.decoder.layers.4.fc1
779
- language_model.model.decoder.layers.4.fc2
780
- language_model.model.decoder.layers.4.final_layer_norm
781
- language_model.model.decoder.layers.5
782
- language_model.model.decoder.layers.5.self_attn
783
- language_model.model.decoder.layers.5.self_attn.k_proj
784
- language_model.model.decoder.layers.5.self_attn.v_proj
785
- language_model.model.decoder.layers.5.self_attn.q_proj
786
- language_model.model.decoder.layers.5.self_attn.out_proj
787
- language_model.model.decoder.layers.5.activation_fn
788
- language_model.model.decoder.layers.5.self_attn_layer_norm
789
- language_model.model.decoder.layers.5.fc1
790
- language_model.model.decoder.layers.5.fc2
791
- language_model.model.decoder.layers.5.final_layer_norm
792
- language_model.model.decoder.layers.6
793
- language_model.model.decoder.layers.6.self_attn
794
- language_model.model.decoder.layers.6.self_attn.k_proj
795
- language_model.model.decoder.layers.6.self_attn.v_proj
796
- language_model.model.decoder.layers.6.self_attn.q_proj
797
- language_model.model.decoder.layers.6.self_attn.out_proj
798
- language_model.model.decoder.layers.6.activation_fn
799
- language_model.model.decoder.layers.6.self_attn_layer_norm
800
- language_model.model.decoder.layers.6.fc1
801
- language_model.model.decoder.layers.6.fc2
802
- language_model.model.decoder.layers.6.final_layer_norm
803
- language_model.model.decoder.layers.7
804
- language_model.model.decoder.layers.7.self_attn
805
- language_model.model.decoder.layers.7.self_attn.k_proj
806
- language_model.model.decoder.layers.7.self_attn.v_proj
807
- language_model.model.decoder.layers.7.self_attn.q_proj
808
- language_model.model.decoder.layers.7.self_attn.out_proj
809
- language_model.model.decoder.layers.7.activation_fn
810
- language_model.model.decoder.layers.7.self_attn_layer_norm
811
- language_model.model.decoder.layers.7.fc1
812
- language_model.model.decoder.layers.7.fc2
813
- language_model.model.decoder.layers.7.final_layer_norm
814
- language_model.model.decoder.layers.8
815
- language_model.model.decoder.layers.8.self_attn
816
- language_model.model.decoder.layers.8.self_attn.k_proj
817
- language_model.model.decoder.layers.8.self_attn.v_proj
818
- language_model.model.decoder.layers.8.self_attn.q_proj
819
- language_model.model.decoder.layers.8.self_attn.out_proj
820
- language_model.model.decoder.layers.8.activation_fn
821
- language_model.model.decoder.layers.8.self_attn_layer_norm
822
- language_model.model.decoder.layers.8.fc1
823
- language_model.model.decoder.layers.8.fc2
824
- language_model.model.decoder.layers.8.final_layer_norm
825
- language_model.model.decoder.layers.9
826
- language_model.model.decoder.layers.9.self_attn
827
- language_model.model.decoder.layers.9.self_attn.k_proj
828
- language_model.model.decoder.layers.9.self_attn.v_proj
829
- language_model.model.decoder.layers.9.self_attn.q_proj
830
- language_model.model.decoder.layers.9.self_attn.out_proj
831
- language_model.model.decoder.layers.9.activation_fn
832
- language_model.model.decoder.layers.9.self_attn_layer_norm
833
- language_model.model.decoder.layers.9.fc1
834
- language_model.model.decoder.layers.9.fc2
835
- language_model.model.decoder.layers.9.final_layer_norm
836
- language_model.model.decoder.layers.10
837
- language_model.model.decoder.layers.10.self_attn
838
- language_model.model.decoder.layers.10.self_attn.k_proj
839
- language_model.model.decoder.layers.10.self_attn.v_proj
840
- language_model.model.decoder.layers.10.self_attn.q_proj
841
- language_model.model.decoder.layers.10.self_attn.out_proj
842
  language_model.model.decoder.layers.10.activation_fn
843
- language_model.model.decoder.layers.10.self_attn_layer_norm
844
  language_model.model.decoder.layers.10.fc1
845
  language_model.model.decoder.layers.10.fc2
846
  language_model.model.decoder.layers.10.final_layer_norm
847
- language_model.model.decoder.layers.11
848
- language_model.model.decoder.layers.11.self_attn
849
- language_model.model.decoder.layers.11.self_attn.k_proj
850
- language_model.model.decoder.layers.11.self_attn.v_proj
851
- language_model.model.decoder.layers.11.self_attn.q_proj
852
- language_model.model.decoder.layers.11.self_attn.out_proj
853
  language_model.model.decoder.layers.11.activation_fn
854
- language_model.model.decoder.layers.11.self_attn_layer_norm
855
  language_model.model.decoder.layers.11.fc1
856
  language_model.model.decoder.layers.11.fc2
857
  language_model.model.decoder.layers.11.final_layer_norm
858
- language_model.model.decoder.layers.12
859
- language_model.model.decoder.layers.12.self_attn
860
- language_model.model.decoder.layers.12.self_attn.k_proj
861
- language_model.model.decoder.layers.12.self_attn.v_proj
862
- language_model.model.decoder.layers.12.self_attn.q_proj
863
- language_model.model.decoder.layers.12.self_attn.out_proj
864
  language_model.model.decoder.layers.12.activation_fn
865
- language_model.model.decoder.layers.12.self_attn_layer_norm
866
  language_model.model.decoder.layers.12.fc1
867
  language_model.model.decoder.layers.12.fc2
868
  language_model.model.decoder.layers.12.final_layer_norm
869
- language_model.model.decoder.layers.13
870
- language_model.model.decoder.layers.13.self_attn
871
- language_model.model.decoder.layers.13.self_attn.k_proj
872
- language_model.model.decoder.layers.13.self_attn.v_proj
873
- language_model.model.decoder.layers.13.self_attn.q_proj
874
- language_model.model.decoder.layers.13.self_attn.out_proj
875
  language_model.model.decoder.layers.13.activation_fn
876
- language_model.model.decoder.layers.13.self_attn_layer_norm
877
  language_model.model.decoder.layers.13.fc1
878
  language_model.model.decoder.layers.13.fc2
879
  language_model.model.decoder.layers.13.final_layer_norm
880
- language_model.model.decoder.layers.14
881
- language_model.model.decoder.layers.14.self_attn
882
- language_model.model.decoder.layers.14.self_attn.k_proj
883
- language_model.model.decoder.layers.14.self_attn.v_proj
884
- language_model.model.decoder.layers.14.self_attn.q_proj
885
- language_model.model.decoder.layers.14.self_attn.out_proj
886
  language_model.model.decoder.layers.14.activation_fn
887
- language_model.model.decoder.layers.14.self_attn_layer_norm
888
  language_model.model.decoder.layers.14.fc1
889
  language_model.model.decoder.layers.14.fc2
890
  language_model.model.decoder.layers.14.final_layer_norm
891
- language_model.model.decoder.layers.15
892
- language_model.model.decoder.layers.15.self_attn
893
- language_model.model.decoder.layers.15.self_attn.k_proj
894
- language_model.model.decoder.layers.15.self_attn.v_proj
895
- language_model.model.decoder.layers.15.self_attn.q_proj
896
- language_model.model.decoder.layers.15.self_attn.out_proj
897
  language_model.model.decoder.layers.15.activation_fn
898
- language_model.model.decoder.layers.15.self_attn_layer_norm
899
  language_model.model.decoder.layers.15.fc1
900
  language_model.model.decoder.layers.15.fc2
901
  language_model.model.decoder.layers.15.final_layer_norm
902
- language_model.model.decoder.layers.16
903
- language_model.model.decoder.layers.16.self_attn
904
- language_model.model.decoder.layers.16.self_attn.k_proj
905
- language_model.model.decoder.layers.16.self_attn.v_proj
906
- language_model.model.decoder.layers.16.self_attn.q_proj
907
- language_model.model.decoder.layers.16.self_attn.out_proj
908
  language_model.model.decoder.layers.16.activation_fn
909
- language_model.model.decoder.layers.16.self_attn_layer_norm
910
  language_model.model.decoder.layers.16.fc1
911
  language_model.model.decoder.layers.16.fc2
912
  language_model.model.decoder.layers.16.final_layer_norm
913
- language_model.model.decoder.layers.17
914
- language_model.model.decoder.layers.17.self_attn
915
- language_model.model.decoder.layers.17.self_attn.k_proj
916
- language_model.model.decoder.layers.17.self_attn.v_proj
917
- language_model.model.decoder.layers.17.self_attn.q_proj
918
- language_model.model.decoder.layers.17.self_attn.out_proj
919
  language_model.model.decoder.layers.17.activation_fn
920
- language_model.model.decoder.layers.17.self_attn_layer_norm
921
  language_model.model.decoder.layers.17.fc1
922
  language_model.model.decoder.layers.17.fc2
923
  language_model.model.decoder.layers.17.final_layer_norm
924
- language_model.model.decoder.layers.18
925
- language_model.model.decoder.layers.18.self_attn
926
- language_model.model.decoder.layers.18.self_attn.k_proj
927
- language_model.model.decoder.layers.18.self_attn.v_proj
928
- language_model.model.decoder.layers.18.self_attn.q_proj
929
- language_model.model.decoder.layers.18.self_attn.out_proj
930
  language_model.model.decoder.layers.18.activation_fn
931
- language_model.model.decoder.layers.18.self_attn_layer_norm
932
  language_model.model.decoder.layers.18.fc1
933
  language_model.model.decoder.layers.18.fc2
934
  language_model.model.decoder.layers.18.final_layer_norm
935
- language_model.model.decoder.layers.19
936
- language_model.model.decoder.layers.19.self_attn
937
- language_model.model.decoder.layers.19.self_attn.k_proj
938
- language_model.model.decoder.layers.19.self_attn.v_proj
939
- language_model.model.decoder.layers.19.self_attn.q_proj
940
- language_model.model.decoder.layers.19.self_attn.out_proj
941
  language_model.model.decoder.layers.19.activation_fn
942
- language_model.model.decoder.layers.19.self_attn_layer_norm
943
  language_model.model.decoder.layers.19.fc1
944
  language_model.model.decoder.layers.19.fc2
945
  language_model.model.decoder.layers.19.final_layer_norm
946
- language_model.model.decoder.layers.20
947
- language_model.model.decoder.layers.20.self_attn
948
- language_model.model.decoder.layers.20.self_attn.k_proj
949
- language_model.model.decoder.layers.20.self_attn.v_proj
950
- language_model.model.decoder.layers.20.self_attn.q_proj
951
- language_model.model.decoder.layers.20.self_attn.out_proj
 
 
 
 
 
 
 
 
952
  language_model.model.decoder.layers.20.activation_fn
953
- language_model.model.decoder.layers.20.self_attn_layer_norm
954
  language_model.model.decoder.layers.20.fc1
955
  language_model.model.decoder.layers.20.fc2
956
  language_model.model.decoder.layers.20.final_layer_norm
957
- language_model.model.decoder.layers.21
958
- language_model.model.decoder.layers.21.self_attn
959
- language_model.model.decoder.layers.21.self_attn.k_proj
960
- language_model.model.decoder.layers.21.self_attn.v_proj
961
- language_model.model.decoder.layers.21.self_attn.q_proj
962
- language_model.model.decoder.layers.21.self_attn.out_proj
963
  language_model.model.decoder.layers.21.activation_fn
964
- language_model.model.decoder.layers.21.self_attn_layer_norm
965
  language_model.model.decoder.layers.21.fc1
966
  language_model.model.decoder.layers.21.fc2
967
  language_model.model.decoder.layers.21.final_layer_norm
968
- language_model.model.decoder.layers.22
969
- language_model.model.decoder.layers.22.self_attn
970
- language_model.model.decoder.layers.22.self_attn.k_proj
971
- language_model.model.decoder.layers.22.self_attn.v_proj
972
- language_model.model.decoder.layers.22.self_attn.q_proj
973
- language_model.model.decoder.layers.22.self_attn.out_proj
974
  language_model.model.decoder.layers.22.activation_fn
975
- language_model.model.decoder.layers.22.self_attn_layer_norm
976
  language_model.model.decoder.layers.22.fc1
977
  language_model.model.decoder.layers.22.fc2
978
  language_model.model.decoder.layers.22.final_layer_norm
979
- language_model.model.decoder.layers.23
980
- language_model.model.decoder.layers.23.self_attn
981
- language_model.model.decoder.layers.23.self_attn.k_proj
982
- language_model.model.decoder.layers.23.self_attn.v_proj
983
- language_model.model.decoder.layers.23.self_attn.q_proj
984
- language_model.model.decoder.layers.23.self_attn.out_proj
985
  language_model.model.decoder.layers.23.activation_fn
986
- language_model.model.decoder.layers.23.self_attn_layer_norm
987
  language_model.model.decoder.layers.23.fc1
988
  language_model.model.decoder.layers.23.fc2
989
  language_model.model.decoder.layers.23.final_layer_norm
990
- language_model.model.decoder.layers.24
991
- language_model.model.decoder.layers.24.self_attn
992
- language_model.model.decoder.layers.24.self_attn.k_proj
993
- language_model.model.decoder.layers.24.self_attn.v_proj
994
- language_model.model.decoder.layers.24.self_attn.q_proj
995
- language_model.model.decoder.layers.24.self_attn.out_proj
996
  language_model.model.decoder.layers.24.activation_fn
997
- language_model.model.decoder.layers.24.self_attn_layer_norm
998
  language_model.model.decoder.layers.24.fc1
999
  language_model.model.decoder.layers.24.fc2
1000
  language_model.model.decoder.layers.24.final_layer_norm
1001
- language_model.model.decoder.layers.25
1002
- language_model.model.decoder.layers.25.self_attn
1003
- language_model.model.decoder.layers.25.self_attn.k_proj
1004
- language_model.model.decoder.layers.25.self_attn.v_proj
1005
- language_model.model.decoder.layers.25.self_attn.q_proj
1006
- language_model.model.decoder.layers.25.self_attn.out_proj
1007
  language_model.model.decoder.layers.25.activation_fn
1008
- language_model.model.decoder.layers.25.self_attn_layer_norm
1009
  language_model.model.decoder.layers.25.fc1
1010
  language_model.model.decoder.layers.25.fc2
1011
  language_model.model.decoder.layers.25.final_layer_norm
1012
- language_model.model.decoder.layers.26
1013
- language_model.model.decoder.layers.26.self_attn
1014
- language_model.model.decoder.layers.26.self_attn.k_proj
1015
- language_model.model.decoder.layers.26.self_attn.v_proj
1016
- language_model.model.decoder.layers.26.self_attn.q_proj
1017
- language_model.model.decoder.layers.26.self_attn.out_proj
1018
  language_model.model.decoder.layers.26.activation_fn
1019
- language_model.model.decoder.layers.26.self_attn_layer_norm
1020
  language_model.model.decoder.layers.26.fc1
1021
  language_model.model.decoder.layers.26.fc2
1022
  language_model.model.decoder.layers.26.final_layer_norm
1023
- language_model.model.decoder.layers.27
1024
- language_model.model.decoder.layers.27.self_attn
1025
- language_model.model.decoder.layers.27.self_attn.k_proj
1026
- language_model.model.decoder.layers.27.self_attn.v_proj
1027
- language_model.model.decoder.layers.27.self_attn.q_proj
1028
- language_model.model.decoder.layers.27.self_attn.out_proj
1029
  language_model.model.decoder.layers.27.activation_fn
1030
- language_model.model.decoder.layers.27.self_attn_layer_norm
1031
  language_model.model.decoder.layers.27.fc1
1032
  language_model.model.decoder.layers.27.fc2
1033
  language_model.model.decoder.layers.27.final_layer_norm
1034
- language_model.model.decoder.layers.28
1035
- language_model.model.decoder.layers.28.self_attn
1036
- language_model.model.decoder.layers.28.self_attn.k_proj
1037
- language_model.model.decoder.layers.28.self_attn.v_proj
1038
- language_model.model.decoder.layers.28.self_attn.q_proj
1039
- language_model.model.decoder.layers.28.self_attn.out_proj
1040
  language_model.model.decoder.layers.28.activation_fn
1041
- language_model.model.decoder.layers.28.self_attn_layer_norm
1042
  language_model.model.decoder.layers.28.fc1
1043
  language_model.model.decoder.layers.28.fc2
1044
  language_model.model.decoder.layers.28.final_layer_norm
1045
- language_model.model.decoder.layers.29
1046
- language_model.model.decoder.layers.29.self_attn
1047
- language_model.model.decoder.layers.29.self_attn.k_proj
1048
- language_model.model.decoder.layers.29.self_attn.v_proj
1049
- language_model.model.decoder.layers.29.self_attn.q_proj
1050
- language_model.model.decoder.layers.29.self_attn.out_proj
1051
  language_model.model.decoder.layers.29.activation_fn
1052
- language_model.model.decoder.layers.29.self_attn_layer_norm
1053
  language_model.model.decoder.layers.29.fc1
1054
  language_model.model.decoder.layers.29.fc2
1055
  language_model.model.decoder.layers.29.final_layer_norm
1056
- language_model.model.decoder.layers.30
1057
- language_model.model.decoder.layers.30.self_attn
1058
- language_model.model.decoder.layers.30.self_attn.k_proj
1059
- language_model.model.decoder.layers.30.self_attn.v_proj
1060
- language_model.model.decoder.layers.30.self_attn.q_proj
1061
- language_model.model.decoder.layers.30.self_attn.out_proj
 
 
 
 
 
 
 
 
1062
  language_model.model.decoder.layers.30.activation_fn
1063
- language_model.model.decoder.layers.30.self_attn_layer_norm
1064
  language_model.model.decoder.layers.30.fc1
1065
  language_model.model.decoder.layers.30.fc2
1066
  language_model.model.decoder.layers.30.final_layer_norm
1067
- language_model.model.decoder.layers.31
1068
- language_model.model.decoder.layers.31.self_attn
1069
- language_model.model.decoder.layers.31.self_attn.k_proj
1070
- language_model.model.decoder.layers.31.self_attn.v_proj
1071
- language_model.model.decoder.layers.31.self_attn.q_proj
1072
- language_model.model.decoder.layers.31.self_attn.out_proj
1073
  language_model.model.decoder.layers.31.activation_fn
1074
- language_model.model.decoder.layers.31.self_attn_layer_norm
1075
  language_model.model.decoder.layers.31.fc1
1076
  language_model.model.decoder.layers.31.fc2
1077
  language_model.model.decoder.layers.31.final_layer_norm
1078
- language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  language_model.model.decoder.embed_positions
3
+ language_model.model.decoder.embed_tokens
4
  language_model.model.decoder.final_layer_norm
 
 
 
 
 
 
 
5
  language_model.model.decoder.layers.0.activation_fn
 
6
  language_model.model.decoder.layers.0.fc1
7
  language_model.model.decoder.layers.0.fc2
8
  language_model.model.decoder.layers.0.final_layer_norm
9
+ language_model.model.decoder.layers.0.self_attn.k_proj
10
+ language_model.model.decoder.layers.0.self_attn.out_proj
11
+ language_model.model.decoder.layers.0.self_attn.q_proj
12
+ language_model.model.decoder.layers.0.self_attn.v_proj
13
+ language_model.model.decoder.layers.0.self_attn_layer_norm
 
14
  language_model.model.decoder.layers.1.activation_fn
 
15
  language_model.model.decoder.layers.1.fc1
16
  language_model.model.decoder.layers.1.fc2
17
  language_model.model.decoder.layers.1.final_layer_norm
18
+ language_model.model.decoder.layers.1.self_attn.k_proj
19
+ language_model.model.decoder.layers.1.self_attn.out_proj
20
+ language_model.model.decoder.layers.1.self_attn.q_proj
21
+ language_model.model.decoder.layers.1.self_attn.v_proj
22
+ language_model.model.decoder.layers.1.self_attn_layer_norm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  language_model.model.decoder.layers.10.activation_fn
 
24
  language_model.model.decoder.layers.10.fc1
25
  language_model.model.decoder.layers.10.fc2
26
  language_model.model.decoder.layers.10.final_layer_norm
27
+ language_model.model.decoder.layers.10.self_attn.k_proj
28
+ language_model.model.decoder.layers.10.self_attn.out_proj
29
+ language_model.model.decoder.layers.10.self_attn.q_proj
30
+ language_model.model.decoder.layers.10.self_attn.v_proj
31
+ language_model.model.decoder.layers.10.self_attn_layer_norm
 
32
  language_model.model.decoder.layers.11.activation_fn
 
33
  language_model.model.decoder.layers.11.fc1
34
  language_model.model.decoder.layers.11.fc2
35
  language_model.model.decoder.layers.11.final_layer_norm
36
+ language_model.model.decoder.layers.11.self_attn.k_proj
37
+ language_model.model.decoder.layers.11.self_attn.out_proj
38
+ language_model.model.decoder.layers.11.self_attn.q_proj
39
+ language_model.model.decoder.layers.11.self_attn.v_proj
40
+ language_model.model.decoder.layers.11.self_attn_layer_norm
 
41
  language_model.model.decoder.layers.12.activation_fn
 
42
  language_model.model.decoder.layers.12.fc1
43
  language_model.model.decoder.layers.12.fc2
44
  language_model.model.decoder.layers.12.final_layer_norm
45
+ language_model.model.decoder.layers.12.self_attn.k_proj
46
+ language_model.model.decoder.layers.12.self_attn.out_proj
47
+ language_model.model.decoder.layers.12.self_attn.q_proj
48
+ language_model.model.decoder.layers.12.self_attn.v_proj
49
+ language_model.model.decoder.layers.12.self_attn_layer_norm
 
50
  language_model.model.decoder.layers.13.activation_fn
 
51
  language_model.model.decoder.layers.13.fc1
52
  language_model.model.decoder.layers.13.fc2
53
  language_model.model.decoder.layers.13.final_layer_norm
54
+ language_model.model.decoder.layers.13.self_attn.k_proj
55
+ language_model.model.decoder.layers.13.self_attn.out_proj
56
+ language_model.model.decoder.layers.13.self_attn.q_proj
57
+ language_model.model.decoder.layers.13.self_attn.v_proj
58
+ language_model.model.decoder.layers.13.self_attn_layer_norm
 
59
  language_model.model.decoder.layers.14.activation_fn
 
60
  language_model.model.decoder.layers.14.fc1
61
  language_model.model.decoder.layers.14.fc2
62
  language_model.model.decoder.layers.14.final_layer_norm
63
+ language_model.model.decoder.layers.14.self_attn.k_proj
64
+ language_model.model.decoder.layers.14.self_attn.out_proj
65
+ language_model.model.decoder.layers.14.self_attn.q_proj
66
+ language_model.model.decoder.layers.14.self_attn.v_proj
67
+ language_model.model.decoder.layers.14.self_attn_layer_norm
 
68
  language_model.model.decoder.layers.15.activation_fn
 
69
  language_model.model.decoder.layers.15.fc1
70
  language_model.model.decoder.layers.15.fc2
71
  language_model.model.decoder.layers.15.final_layer_norm
72
+ language_model.model.decoder.layers.15.self_attn.k_proj
73
+ language_model.model.decoder.layers.15.self_attn.out_proj
74
+ language_model.model.decoder.layers.15.self_attn.q_proj
75
+ language_model.model.decoder.layers.15.self_attn.v_proj
76
+ language_model.model.decoder.layers.15.self_attn_layer_norm
 
77
  language_model.model.decoder.layers.16.activation_fn
 
78
  language_model.model.decoder.layers.16.fc1
79
  language_model.model.decoder.layers.16.fc2
80
  language_model.model.decoder.layers.16.final_layer_norm
81
+ language_model.model.decoder.layers.16.self_attn.k_proj
82
+ language_model.model.decoder.layers.16.self_attn.out_proj
83
+ language_model.model.decoder.layers.16.self_attn.q_proj
84
+ language_model.model.decoder.layers.16.self_attn.v_proj
85
+ language_model.model.decoder.layers.16.self_attn_layer_norm
 
86
  language_model.model.decoder.layers.17.activation_fn
 
87
  language_model.model.decoder.layers.17.fc1
88
  language_model.model.decoder.layers.17.fc2
89
  language_model.model.decoder.layers.17.final_layer_norm
90
+ language_model.model.decoder.layers.17.self_attn.k_proj
91
+ language_model.model.decoder.layers.17.self_attn.out_proj
92
+ language_model.model.decoder.layers.17.self_attn.q_proj
93
+ language_model.model.decoder.layers.17.self_attn.v_proj
94
+ language_model.model.decoder.layers.17.self_attn_layer_norm
 
95
  language_model.model.decoder.layers.18.activation_fn
 
96
  language_model.model.decoder.layers.18.fc1
97
  language_model.model.decoder.layers.18.fc2
98
  language_model.model.decoder.layers.18.final_layer_norm
99
+ language_model.model.decoder.layers.18.self_attn.k_proj
100
+ language_model.model.decoder.layers.18.self_attn.out_proj
101
+ language_model.model.decoder.layers.18.self_attn.q_proj
102
+ language_model.model.decoder.layers.18.self_attn.v_proj
103
+ language_model.model.decoder.layers.18.self_attn_layer_norm
 
104
  language_model.model.decoder.layers.19.activation_fn
 
105
  language_model.model.decoder.layers.19.fc1
106
  language_model.model.decoder.layers.19.fc2
107
  language_model.model.decoder.layers.19.final_layer_norm
108
+ language_model.model.decoder.layers.19.self_attn.k_proj
109
+ language_model.model.decoder.layers.19.self_attn.out_proj
110
+ language_model.model.decoder.layers.19.self_attn.q_proj
111
+ language_model.model.decoder.layers.19.self_attn.v_proj
112
+ language_model.model.decoder.layers.19.self_attn_layer_norm
113
+ language_model.model.decoder.layers.2.activation_fn
114
+ language_model.model.decoder.layers.2.fc1
115
+ language_model.model.decoder.layers.2.fc2
116
+ language_model.model.decoder.layers.2.final_layer_norm
117
+ language_model.model.decoder.layers.2.self_attn.k_proj
118
+ language_model.model.decoder.layers.2.self_attn.out_proj
119
+ language_model.model.decoder.layers.2.self_attn.q_proj
120
+ language_model.model.decoder.layers.2.self_attn.v_proj
121
+ language_model.model.decoder.layers.2.self_attn_layer_norm
122
  language_model.model.decoder.layers.20.activation_fn
 
123
  language_model.model.decoder.layers.20.fc1
124
  language_model.model.decoder.layers.20.fc2
125
  language_model.model.decoder.layers.20.final_layer_norm
126
+ language_model.model.decoder.layers.20.self_attn.k_proj
127
+ language_model.model.decoder.layers.20.self_attn.out_proj
128
+ language_model.model.decoder.layers.20.self_attn.q_proj
129
+ language_model.model.decoder.layers.20.self_attn.v_proj
130
+ language_model.model.decoder.layers.20.self_attn_layer_norm
 
131
  language_model.model.decoder.layers.21.activation_fn
 
132
  language_model.model.decoder.layers.21.fc1
133
  language_model.model.decoder.layers.21.fc2
134
  language_model.model.decoder.layers.21.final_layer_norm
135
+ language_model.model.decoder.layers.21.self_attn.k_proj
136
+ language_model.model.decoder.layers.21.self_attn.out_proj
137
+ language_model.model.decoder.layers.21.self_attn.q_proj
138
+ language_model.model.decoder.layers.21.self_attn.v_proj
139
+ language_model.model.decoder.layers.21.self_attn_layer_norm
 
140
  language_model.model.decoder.layers.22.activation_fn
 
141
  language_model.model.decoder.layers.22.fc1
142
  language_model.model.decoder.layers.22.fc2
143
  language_model.model.decoder.layers.22.final_layer_norm
144
+ language_model.model.decoder.layers.22.self_attn.k_proj
145
+ language_model.model.decoder.layers.22.self_attn.out_proj
146
+ language_model.model.decoder.layers.22.self_attn.q_proj
147
+ language_model.model.decoder.layers.22.self_attn.v_proj
148
+ language_model.model.decoder.layers.22.self_attn_layer_norm
 
149
  language_model.model.decoder.layers.23.activation_fn
 
150
  language_model.model.decoder.layers.23.fc1
151
  language_model.model.decoder.layers.23.fc2
152
  language_model.model.decoder.layers.23.final_layer_norm
153
+ language_model.model.decoder.layers.23.self_attn.k_proj
154
+ language_model.model.decoder.layers.23.self_attn.out_proj
155
+ language_model.model.decoder.layers.23.self_attn.q_proj
156
+ language_model.model.decoder.layers.23.self_attn.v_proj
157
+ language_model.model.decoder.layers.23.self_attn_layer_norm
 
158
  language_model.model.decoder.layers.24.activation_fn
 
159
  language_model.model.decoder.layers.24.fc1
160
  language_model.model.decoder.layers.24.fc2
161
  language_model.model.decoder.layers.24.final_layer_norm
162
+ language_model.model.decoder.layers.24.self_attn.k_proj
163
+ language_model.model.decoder.layers.24.self_attn.out_proj
164
+ language_model.model.decoder.layers.24.self_attn.q_proj
165
+ language_model.model.decoder.layers.24.self_attn.v_proj
166
+ language_model.model.decoder.layers.24.self_attn_layer_norm
 
167
  language_model.model.decoder.layers.25.activation_fn
 
168
  language_model.model.decoder.layers.25.fc1
169
  language_model.model.decoder.layers.25.fc2
170
  language_model.model.decoder.layers.25.final_layer_norm
171
+ language_model.model.decoder.layers.25.self_attn.k_proj
172
+ language_model.model.decoder.layers.25.self_attn.out_proj
173
+ language_model.model.decoder.layers.25.self_attn.q_proj
174
+ language_model.model.decoder.layers.25.self_attn.v_proj
175
+ language_model.model.decoder.layers.25.self_attn_layer_norm
 
176
  language_model.model.decoder.layers.26.activation_fn
 
177
  language_model.model.decoder.layers.26.fc1
178
  language_model.model.decoder.layers.26.fc2
179
  language_model.model.decoder.layers.26.final_layer_norm
180
+ language_model.model.decoder.layers.26.self_attn.k_proj
181
+ language_model.model.decoder.layers.26.self_attn.out_proj
182
+ language_model.model.decoder.layers.26.self_attn.q_proj
183
+ language_model.model.decoder.layers.26.self_attn.v_proj
184
+ language_model.model.decoder.layers.26.self_attn_layer_norm
 
185
  language_model.model.decoder.layers.27.activation_fn
 
186
  language_model.model.decoder.layers.27.fc1
187
  language_model.model.decoder.layers.27.fc2
188
  language_model.model.decoder.layers.27.final_layer_norm
189
+ language_model.model.decoder.layers.27.self_attn.k_proj
190
+ language_model.model.decoder.layers.27.self_attn.out_proj
191
+ language_model.model.decoder.layers.27.self_attn.q_proj
192
+ language_model.model.decoder.layers.27.self_attn.v_proj
193
+ language_model.model.decoder.layers.27.self_attn_layer_norm
 
194
  language_model.model.decoder.layers.28.activation_fn
 
195
  language_model.model.decoder.layers.28.fc1
196
  language_model.model.decoder.layers.28.fc2
197
  language_model.model.decoder.layers.28.final_layer_norm
198
+ language_model.model.decoder.layers.28.self_attn.k_proj
199
+ language_model.model.decoder.layers.28.self_attn.out_proj
200
+ language_model.model.decoder.layers.28.self_attn.q_proj
201
+ language_model.model.decoder.layers.28.self_attn.v_proj
202
+ language_model.model.decoder.layers.28.self_attn_layer_norm
 
203
  language_model.model.decoder.layers.29.activation_fn
 
204
  language_model.model.decoder.layers.29.fc1
205
  language_model.model.decoder.layers.29.fc2
206
  language_model.model.decoder.layers.29.final_layer_norm
207
+ language_model.model.decoder.layers.29.self_attn.k_proj
208
+ language_model.model.decoder.layers.29.self_attn.out_proj
209
+ language_model.model.decoder.layers.29.self_attn.q_proj
210
+ language_model.model.decoder.layers.29.self_attn.v_proj
211
+ language_model.model.decoder.layers.29.self_attn_layer_norm
212
+ language_model.model.decoder.layers.3.activation_fn
213
+ language_model.model.decoder.layers.3.fc1
214
+ language_model.model.decoder.layers.3.fc2
215
+ language_model.model.decoder.layers.3.final_layer_norm
216
+ language_model.model.decoder.layers.3.self_attn.k_proj
217
+ language_model.model.decoder.layers.3.self_attn.out_proj
218
+ language_model.model.decoder.layers.3.self_attn.q_proj
219
+ language_model.model.decoder.layers.3.self_attn.v_proj
220
+ language_model.model.decoder.layers.3.self_attn_layer_norm
221
  language_model.model.decoder.layers.30.activation_fn
 
222
  language_model.model.decoder.layers.30.fc1
223
  language_model.model.decoder.layers.30.fc2
224
  language_model.model.decoder.layers.30.final_layer_norm
225
+ language_model.model.decoder.layers.30.self_attn.k_proj
226
+ language_model.model.decoder.layers.30.self_attn.out_proj
227
+ language_model.model.decoder.layers.30.self_attn.q_proj
228
+ language_model.model.decoder.layers.30.self_attn.v_proj
229
+ language_model.model.decoder.layers.30.self_attn_layer_norm
 
230
  language_model.model.decoder.layers.31.activation_fn
 
231
  language_model.model.decoder.layers.31.fc1
232
  language_model.model.decoder.layers.31.fc2
233
  language_model.model.decoder.layers.31.final_layer_norm
234
+ language_model.model.decoder.layers.31.self_attn.k_proj
235
+ language_model.model.decoder.layers.31.self_attn.out_proj
236
+ language_model.model.decoder.layers.31.self_attn.q_proj
237
+ language_model.model.decoder.layers.31.self_attn.v_proj
238
+ language_model.model.decoder.layers.31.self_attn_layer_norm
239
+ language_model.model.decoder.layers.4.activation_fn
240
+ language_model.model.decoder.layers.4.fc1
241
+ language_model.model.decoder.layers.4.fc2
242
+ language_model.model.decoder.layers.4.final_layer_norm
243
+ language_model.model.decoder.layers.4.self_attn.k_proj
244
+ language_model.model.decoder.layers.4.self_attn.out_proj
245
+ language_model.model.decoder.layers.4.self_attn.q_proj
246
+ language_model.model.decoder.layers.4.self_attn.v_proj
247
+ language_model.model.decoder.layers.4.self_attn_layer_norm
248
+ language_model.model.decoder.layers.5.activation_fn
249
+ language_model.model.decoder.layers.5.fc1
250
+ language_model.model.decoder.layers.5.fc2
251
+ language_model.model.decoder.layers.5.final_layer_norm
252
+ language_model.model.decoder.layers.5.self_attn.k_proj
253
+ language_model.model.decoder.layers.5.self_attn.out_proj
254
+ language_model.model.decoder.layers.5.self_attn.q_proj
255
+ language_model.model.decoder.layers.5.self_attn.v_proj
256
+ language_model.model.decoder.layers.5.self_attn_layer_norm
257
+ language_model.model.decoder.layers.6.activation_fn
258
+ language_model.model.decoder.layers.6.fc1
259
+ language_model.model.decoder.layers.6.fc2
260
+ language_model.model.decoder.layers.6.final_layer_norm
261
+ language_model.model.decoder.layers.6.self_attn.k_proj
262
+ language_model.model.decoder.layers.6.self_attn.out_proj
263
+ language_model.model.decoder.layers.6.self_attn.q_proj
264
+ language_model.model.decoder.layers.6.self_attn.v_proj
265
+ language_model.model.decoder.layers.6.self_attn_layer_norm
266
+ language_model.model.decoder.layers.7.activation_fn
267
+ language_model.model.decoder.layers.7.fc1
268
+ language_model.model.decoder.layers.7.fc2
269
+ language_model.model.decoder.layers.7.final_layer_norm
270
+ language_model.model.decoder.layers.7.self_attn.k_proj
271
+ language_model.model.decoder.layers.7.self_attn.out_proj
272
+ language_model.model.decoder.layers.7.self_attn.q_proj
273
+ language_model.model.decoder.layers.7.self_attn.v_proj
274
+ language_model.model.decoder.layers.7.self_attn_layer_norm
275
+ language_model.model.decoder.layers.8.activation_fn
276
+ language_model.model.decoder.layers.8.fc1
277
+ language_model.model.decoder.layers.8.fc2
278
+ language_model.model.decoder.layers.8.final_layer_norm
279
+ language_model.model.decoder.layers.8.self_attn.k_proj
280
+ language_model.model.decoder.layers.8.self_attn.out_proj
281
+ language_model.model.decoder.layers.8.self_attn.q_proj
282
+ language_model.model.decoder.layers.8.self_attn.v_proj
283
+ language_model.model.decoder.layers.8.self_attn_layer_norm
284
+ language_model.model.decoder.layers.9.activation_fn
285
+ language_model.model.decoder.layers.9.fc1
286
+ language_model.model.decoder.layers.9.fc2
287
+ language_model.model.decoder.layers.9.final_layer_norm
288
+ language_model.model.decoder.layers.9.self_attn.k_proj
289
+ language_model.model.decoder.layers.9.self_attn.out_proj
290
+ language_model.model.decoder.layers.9.self_attn.q_proj
291
+ language_model.model.decoder.layers.9.self_attn.v_proj
292
+ language_model.model.decoder.layers.9.self_attn_layer_norm
293
+ qformer.dropout
294
+ qformer.encoder.layer.0.attention.attention.dropout
295
+ qformer.encoder.layer.0.attention.attention.key
296
+ qformer.encoder.layer.0.attention.attention.query
297
+ qformer.encoder.layer.0.attention.attention.value
298
+ qformer.encoder.layer.0.attention.output
299
+ qformer.encoder.layer.0.attention.output.LayerNorm
300
+ qformer.encoder.layer.0.attention.output.dense
301
+ qformer.encoder.layer.0.attention.output.dropout
302
+ qformer.encoder.layer.0.crossattention.attention.dropout
303
+ qformer.encoder.layer.0.crossattention.attention.key
304
+ qformer.encoder.layer.0.crossattention.attention.query
305
+ qformer.encoder.layer.0.crossattention.attention.value
306
+ qformer.encoder.layer.0.crossattention.output
307
+ qformer.encoder.layer.0.crossattention.output.LayerNorm
308
+ qformer.encoder.layer.0.crossattention.output.dense
309
+ qformer.encoder.layer.0.crossattention.output.dropout
310
+ qformer.encoder.layer.0.intermediate_query
311
+ qformer.encoder.layer.0.intermediate_query.dense
312
+ qformer.encoder.layer.0.intermediate_query.intermediate_act_fn
313
+ qformer.encoder.layer.0.output_query
314
+ qformer.encoder.layer.0.output_query.LayerNorm
315
+ qformer.encoder.layer.0.output_query.dense
316
+ qformer.encoder.layer.0.output_query.dropout
317
+ qformer.encoder.layer.1.attention.attention.dropout
318
+ qformer.encoder.layer.1.attention.attention.key
319
+ qformer.encoder.layer.1.attention.attention.query
320
+ qformer.encoder.layer.1.attention.attention.value
321
+ qformer.encoder.layer.1.attention.output
322
+ qformer.encoder.layer.1.attention.output.LayerNorm
323
+ qformer.encoder.layer.1.attention.output.dense
324
+ qformer.encoder.layer.1.attention.output.dropout
325
+ qformer.encoder.layer.1.intermediate_query
326
+ qformer.encoder.layer.1.intermediate_query.dense
327
+ qformer.encoder.layer.1.intermediate_query.intermediate_act_fn
328
+ qformer.encoder.layer.1.output_query
329
+ qformer.encoder.layer.1.output_query.LayerNorm
330
+ qformer.encoder.layer.1.output_query.dense
331
+ qformer.encoder.layer.1.output_query.dropout
332
+ qformer.encoder.layer.10.attention.attention.dropout
333
+ qformer.encoder.layer.10.attention.attention.key
334
+ qformer.encoder.layer.10.attention.attention.query
335
+ qformer.encoder.layer.10.attention.attention.value
336
+ qformer.encoder.layer.10.attention.output
337
+ qformer.encoder.layer.10.attention.output.LayerNorm
338
+ qformer.encoder.layer.10.attention.output.dense
339
+ qformer.encoder.layer.10.attention.output.dropout
340
+ qformer.encoder.layer.10.crossattention.attention.dropout
341
+ qformer.encoder.layer.10.crossattention.attention.key
342
+ qformer.encoder.layer.10.crossattention.attention.query
343
+ qformer.encoder.layer.10.crossattention.attention.value
344
+ qformer.encoder.layer.10.crossattention.output
345
+ qformer.encoder.layer.10.crossattention.output.LayerNorm
346
+ qformer.encoder.layer.10.crossattention.output.dense
347
+ qformer.encoder.layer.10.crossattention.output.dropout
348
+ qformer.encoder.layer.10.intermediate_query
349
+ qformer.encoder.layer.10.intermediate_query.dense
350
+ qformer.encoder.layer.10.intermediate_query.intermediate_act_fn
351
+ qformer.encoder.layer.10.output_query
352
+ qformer.encoder.layer.10.output_query.LayerNorm
353
+ qformer.encoder.layer.10.output_query.dense
354
+ qformer.encoder.layer.10.output_query.dropout
355
+ qformer.encoder.layer.11.attention.attention.dropout
356
+ qformer.encoder.layer.11.attention.attention.key
357
+ qformer.encoder.layer.11.attention.attention.query
358
+ qformer.encoder.layer.11.attention.attention.value
359
+ qformer.encoder.layer.11.attention.output
360
+ qformer.encoder.layer.11.attention.output.LayerNorm
361
+ qformer.encoder.layer.11.attention.output.dense
362
+ qformer.encoder.layer.11.attention.output.dropout
363
+ qformer.encoder.layer.11.intermediate_query
364
+ qformer.encoder.layer.11.intermediate_query.dense
365
+ qformer.encoder.layer.11.intermediate_query.intermediate_act_fn
366
+ qformer.encoder.layer.11.output_query
367
+ qformer.encoder.layer.11.output_query.LayerNorm
368
+ qformer.encoder.layer.11.output_query.dense
369
+ qformer.encoder.layer.11.output_query.dropout
370
+ qformer.encoder.layer.2.attention.attention.dropout
371
+ qformer.encoder.layer.2.attention.attention.key
372
+ qformer.encoder.layer.2.attention.attention.query
373
+ qformer.encoder.layer.2.attention.attention.value
374
+ qformer.encoder.layer.2.attention.output
375
+ qformer.encoder.layer.2.attention.output.LayerNorm
376
+ qformer.encoder.layer.2.attention.output.dense
377
+ qformer.encoder.layer.2.attention.output.dropout
378
+ qformer.encoder.layer.2.crossattention.attention.dropout
379
+ qformer.encoder.layer.2.crossattention.attention.key
380
+ qformer.encoder.layer.2.crossattention.attention.query
381
+ qformer.encoder.layer.2.crossattention.attention.value
382
+ qformer.encoder.layer.2.crossattention.output
383
+ qformer.encoder.layer.2.crossattention.output.LayerNorm
384
+ qformer.encoder.layer.2.crossattention.output.dense
385
+ qformer.encoder.layer.2.crossattention.output.dropout
386
+ qformer.encoder.layer.2.intermediate_query
387
+ qformer.encoder.layer.2.intermediate_query.dense
388
+ qformer.encoder.layer.2.intermediate_query.intermediate_act_fn
389
+ qformer.encoder.layer.2.output_query
390
+ qformer.encoder.layer.2.output_query.LayerNorm
391
+ qformer.encoder.layer.2.output_query.dense
392
+ qformer.encoder.layer.2.output_query.dropout
393
+ qformer.encoder.layer.3.attention.attention.dropout
394
+ qformer.encoder.layer.3.attention.attention.key
395
+ qformer.encoder.layer.3.attention.attention.query
396
+ qformer.encoder.layer.3.attention.attention.value
397
+ qformer.encoder.layer.3.attention.output
398
+ qformer.encoder.layer.3.attention.output.LayerNorm
399
+ qformer.encoder.layer.3.attention.output.dense
400
+ qformer.encoder.layer.3.attention.output.dropout
401
+ qformer.encoder.layer.3.intermediate_query
402
+ qformer.encoder.layer.3.intermediate_query.dense
403
+ qformer.encoder.layer.3.intermediate_query.intermediate_act_fn
404
+ qformer.encoder.layer.3.output_query
405
+ qformer.encoder.layer.3.output_query.LayerNorm
406
+ qformer.encoder.layer.3.output_query.dense
407
+ qformer.encoder.layer.3.output_query.dropout
408
+ qformer.encoder.layer.4.attention.attention.dropout
409
+ qformer.encoder.layer.4.attention.attention.key
410
+ qformer.encoder.layer.4.attention.attention.query
411
+ qformer.encoder.layer.4.attention.attention.value
412
+ qformer.encoder.layer.4.attention.output
413
+ qformer.encoder.layer.4.attention.output.LayerNorm
414
+ qformer.encoder.layer.4.attention.output.dense
415
+ qformer.encoder.layer.4.attention.output.dropout
416
+ qformer.encoder.layer.4.crossattention.attention.dropout
417
+ qformer.encoder.layer.4.crossattention.attention.key
418
+ qformer.encoder.layer.4.crossattention.attention.query
419
+ qformer.encoder.layer.4.crossattention.attention.value
420
+ qformer.encoder.layer.4.crossattention.output
421
+ qformer.encoder.layer.4.crossattention.output.LayerNorm
422
+ qformer.encoder.layer.4.crossattention.output.dense
423
+ qformer.encoder.layer.4.crossattention.output.dropout
424
+ qformer.encoder.layer.4.intermediate_query
425
+ qformer.encoder.layer.4.intermediate_query.dense
426
+ qformer.encoder.layer.4.intermediate_query.intermediate_act_fn
427
+ qformer.encoder.layer.4.output_query
428
+ qformer.encoder.layer.4.output_query.LayerNorm
429
+ qformer.encoder.layer.4.output_query.dense
430
+ qformer.encoder.layer.4.output_query.dropout
431
+ qformer.encoder.layer.5.attention.attention.dropout
432
+ qformer.encoder.layer.5.attention.attention.key
433
+ qformer.encoder.layer.5.attention.attention.query
434
+ qformer.encoder.layer.5.attention.attention.value
435
+ qformer.encoder.layer.5.attention.output
436
+ qformer.encoder.layer.5.attention.output.LayerNorm
437
+ qformer.encoder.layer.5.attention.output.dense
438
+ qformer.encoder.layer.5.attention.output.dropout
439
+ qformer.encoder.layer.5.intermediate_query
440
+ qformer.encoder.layer.5.intermediate_query.dense
441
+ qformer.encoder.layer.5.intermediate_query.intermediate_act_fn
442
+ qformer.encoder.layer.5.output_query
443
+ qformer.encoder.layer.5.output_query.LayerNorm
444
+ qformer.encoder.layer.5.output_query.dense
445
+ qformer.encoder.layer.5.output_query.dropout
446
+ qformer.encoder.layer.6.attention.attention.dropout
447
+ qformer.encoder.layer.6.attention.attention.key
448
+ qformer.encoder.layer.6.attention.attention.query
449
+ qformer.encoder.layer.6.attention.attention.value
450
+ qformer.encoder.layer.6.attention.output
451
+ qformer.encoder.layer.6.attention.output.LayerNorm
452
+ qformer.encoder.layer.6.attention.output.dense
453
+ qformer.encoder.layer.6.attention.output.dropout
454
+ qformer.encoder.layer.6.crossattention.attention.dropout
455
+ qformer.encoder.layer.6.crossattention.attention.key
456
+ qformer.encoder.layer.6.crossattention.attention.query
457
+ qformer.encoder.layer.6.crossattention.attention.value
458
+ qformer.encoder.layer.6.crossattention.output
459
+ qformer.encoder.layer.6.crossattention.output.LayerNorm
460
+ qformer.encoder.layer.6.crossattention.output.dense
461
+ qformer.encoder.layer.6.crossattention.output.dropout
462
+ qformer.encoder.layer.6.intermediate_query
463
+ qformer.encoder.layer.6.intermediate_query.dense
464
+ qformer.encoder.layer.6.intermediate_query.intermediate_act_fn
465
+ qformer.encoder.layer.6.output_query
466
+ qformer.encoder.layer.6.output_query.LayerNorm
467
+ qformer.encoder.layer.6.output_query.dense
468
+ qformer.encoder.layer.6.output_query.dropout
469
+ qformer.encoder.layer.7.attention.attention.dropout
470
+ qformer.encoder.layer.7.attention.attention.key
471
+ qformer.encoder.layer.7.attention.attention.query
472
+ qformer.encoder.layer.7.attention.attention.value
473
+ qformer.encoder.layer.7.attention.output
474
+ qformer.encoder.layer.7.attention.output.LayerNorm
475
+ qformer.encoder.layer.7.attention.output.dense
476
+ qformer.encoder.layer.7.attention.output.dropout
477
+ qformer.encoder.layer.7.intermediate_query
478
+ qformer.encoder.layer.7.intermediate_query.dense
479
+ qformer.encoder.layer.7.intermediate_query.intermediate_act_fn
480
+ qformer.encoder.layer.7.output_query
481
+ qformer.encoder.layer.7.output_query.LayerNorm
482
+ qformer.encoder.layer.7.output_query.dense
483
+ qformer.encoder.layer.7.output_query.dropout
484
+ qformer.encoder.layer.8.attention.attention.dropout
485
+ qformer.encoder.layer.8.attention.attention.key
486
+ qformer.encoder.layer.8.attention.attention.query
487
+ qformer.encoder.layer.8.attention.attention.value
488
+ qformer.encoder.layer.8.attention.output
489
+ qformer.encoder.layer.8.attention.output.LayerNorm
490
+ qformer.encoder.layer.8.attention.output.dense
491
+ qformer.encoder.layer.8.attention.output.dropout
492
+ qformer.encoder.layer.8.crossattention.attention.dropout
493
+ qformer.encoder.layer.8.crossattention.attention.key
494
+ qformer.encoder.layer.8.crossattention.attention.query
495
+ qformer.encoder.layer.8.crossattention.attention.value
496
+ qformer.encoder.layer.8.crossattention.output
497
+ qformer.encoder.layer.8.crossattention.output.LayerNorm
498
+ qformer.encoder.layer.8.crossattention.output.dense
499
+ qformer.encoder.layer.8.crossattention.output.dropout
500
+ qformer.encoder.layer.8.intermediate_query
501
+ qformer.encoder.layer.8.intermediate_query.dense
502
+ qformer.encoder.layer.8.intermediate_query.intermediate_act_fn
503
+ qformer.encoder.layer.8.output_query
504
+ qformer.encoder.layer.8.output_query.LayerNorm
505
+ qformer.encoder.layer.8.output_query.dense
506
+ qformer.encoder.layer.8.output_query.dropout
507
+ qformer.encoder.layer.9.attention.attention.dropout
508
+ qformer.encoder.layer.9.attention.attention.key
509
+ qformer.encoder.layer.9.attention.attention.query
510
+ qformer.encoder.layer.9.attention.attention.value
511
+ qformer.encoder.layer.9.attention.output
512
+ qformer.encoder.layer.9.attention.output.LayerNorm
513
+ qformer.encoder.layer.9.attention.output.dense
514
+ qformer.encoder.layer.9.attention.output.dropout
515
+ qformer.encoder.layer.9.intermediate_query
516
+ qformer.encoder.layer.9.intermediate_query.dense
517
+ qformer.encoder.layer.9.intermediate_query.intermediate_act_fn
518
+ qformer.encoder.layer.9.output_query
519
+ qformer.encoder.layer.9.output_query.LayerNorm
520
+ qformer.encoder.layer.9.output_query.dense
521
+ qformer.encoder.layer.9.output_query.dropout
522
+ qformer.layernorm
523
+ vision_model.embeddings
524
+ vision_model.embeddings.patch_embedding
525
+ vision_model.encoder.layers.0.layer_norm1
526
+ vision_model.encoder.layers.0.layer_norm2
527
+ vision_model.encoder.layers.0.mlp
528
+ vision_model.encoder.layers.0.mlp.activation_fn
529
+ vision_model.encoder.layers.0.mlp.fc1
530
+ vision_model.encoder.layers.0.mlp.fc2
531
+ vision_model.encoder.layers.0.self_attn.dropout
532
+ vision_model.encoder.layers.0.self_attn.projection
533
+ vision_model.encoder.layers.0.self_attn.qkv
534
+ vision_model.encoder.layers.1.layer_norm1
535
+ vision_model.encoder.layers.1.layer_norm2
536
+ vision_model.encoder.layers.1.mlp
537
+ vision_model.encoder.layers.1.mlp.activation_fn
538
+ vision_model.encoder.layers.1.mlp.fc1
539
+ vision_model.encoder.layers.1.mlp.fc2
540
+ vision_model.encoder.layers.1.self_attn.dropout
541
+ vision_model.encoder.layers.1.self_attn.projection
542
+ vision_model.encoder.layers.1.self_attn.qkv
543
+ vision_model.encoder.layers.10.layer_norm1
544
+ vision_model.encoder.layers.10.layer_norm2
545
+ vision_model.encoder.layers.10.mlp
546
+ vision_model.encoder.layers.10.mlp.activation_fn
547
+ vision_model.encoder.layers.10.mlp.fc1
548
+ vision_model.encoder.layers.10.mlp.fc2
549
+ vision_model.encoder.layers.10.self_attn.dropout
550
+ vision_model.encoder.layers.10.self_attn.projection
551
+ vision_model.encoder.layers.10.self_attn.qkv
552
+ vision_model.encoder.layers.11.layer_norm1
553
+ vision_model.encoder.layers.11.layer_norm2
554
+ vision_model.encoder.layers.11.mlp
555
+ vision_model.encoder.layers.11.mlp.activation_fn
556
+ vision_model.encoder.layers.11.mlp.fc1
557
+ vision_model.encoder.layers.11.mlp.fc2
558
+ vision_model.encoder.layers.11.self_attn.dropout
559
+ vision_model.encoder.layers.11.self_attn.projection
560
+ vision_model.encoder.layers.11.self_attn.qkv
561
+ vision_model.encoder.layers.12.layer_norm1
562
+ vision_model.encoder.layers.12.layer_norm2
563
+ vision_model.encoder.layers.12.mlp
564
+ vision_model.encoder.layers.12.mlp.activation_fn
565
+ vision_model.encoder.layers.12.mlp.fc1
566
+ vision_model.encoder.layers.12.mlp.fc2
567
+ vision_model.encoder.layers.12.self_attn.dropout
568
+ vision_model.encoder.layers.12.self_attn.projection
569
+ vision_model.encoder.layers.12.self_attn.qkv
570
+ vision_model.encoder.layers.13.layer_norm1
571
+ vision_model.encoder.layers.13.layer_norm2
572
+ vision_model.encoder.layers.13.mlp
573
+ vision_model.encoder.layers.13.mlp.activation_fn
574
+ vision_model.encoder.layers.13.mlp.fc1
575
+ vision_model.encoder.layers.13.mlp.fc2
576
+ vision_model.encoder.layers.13.self_attn.dropout
577
+ vision_model.encoder.layers.13.self_attn.projection
578
+ vision_model.encoder.layers.13.self_attn.qkv
579
+ vision_model.encoder.layers.14.layer_norm1
580
+ vision_model.encoder.layers.14.layer_norm2
581
+ vision_model.encoder.layers.14.mlp
582
+ vision_model.encoder.layers.14.mlp.activation_fn
583
+ vision_model.encoder.layers.14.mlp.fc1
584
+ vision_model.encoder.layers.14.mlp.fc2
585
+ vision_model.encoder.layers.14.self_attn.dropout
586
+ vision_model.encoder.layers.14.self_attn.projection
587
+ vision_model.encoder.layers.14.self_attn.qkv
588
+ vision_model.encoder.layers.15.layer_norm1
589
+ vision_model.encoder.layers.15.layer_norm2
590
+ vision_model.encoder.layers.15.mlp
591
+ vision_model.encoder.layers.15.mlp.activation_fn
592
+ vision_model.encoder.layers.15.mlp.fc1
593
+ vision_model.encoder.layers.15.mlp.fc2
594
+ vision_model.encoder.layers.15.self_attn.dropout
595
+ vision_model.encoder.layers.15.self_attn.projection
596
+ vision_model.encoder.layers.15.self_attn.qkv
597
+ vision_model.encoder.layers.16.layer_norm1
598
+ vision_model.encoder.layers.16.layer_norm2
599
+ vision_model.encoder.layers.16.mlp
600
+ vision_model.encoder.layers.16.mlp.activation_fn
601
+ vision_model.encoder.layers.16.mlp.fc1
602
+ vision_model.encoder.layers.16.mlp.fc2
603
+ vision_model.encoder.layers.16.self_attn.dropout
604
+ vision_model.encoder.layers.16.self_attn.projection
605
+ vision_model.encoder.layers.16.self_attn.qkv
606
+ vision_model.encoder.layers.17.layer_norm1
607
+ vision_model.encoder.layers.17.layer_norm2
608
+ vision_model.encoder.layers.17.mlp
609
+ vision_model.encoder.layers.17.mlp.activation_fn
610
+ vision_model.encoder.layers.17.mlp.fc1
611
+ vision_model.encoder.layers.17.mlp.fc2
612
+ vision_model.encoder.layers.17.self_attn.dropout
613
+ vision_model.encoder.layers.17.self_attn.projection
614
+ vision_model.encoder.layers.17.self_attn.qkv
615
+ vision_model.encoder.layers.18.layer_norm1
616
+ vision_model.encoder.layers.18.layer_norm2
617
+ vision_model.encoder.layers.18.mlp
618
+ vision_model.encoder.layers.18.mlp.activation_fn
619
+ vision_model.encoder.layers.18.mlp.fc1
620
+ vision_model.encoder.layers.18.mlp.fc2
621
+ vision_model.encoder.layers.18.self_attn.dropout
622
+ vision_model.encoder.layers.18.self_attn.projection
623
+ vision_model.encoder.layers.18.self_attn.qkv
624
+ vision_model.encoder.layers.19.layer_norm1
625
+ vision_model.encoder.layers.19.layer_norm2
626
+ vision_model.encoder.layers.19.mlp
627
+ vision_model.encoder.layers.19.mlp.activation_fn
628
+ vision_model.encoder.layers.19.mlp.fc1
629
+ vision_model.encoder.layers.19.mlp.fc2
630
+ vision_model.encoder.layers.19.self_attn.dropout
631
+ vision_model.encoder.layers.19.self_attn.projection
632
+ vision_model.encoder.layers.19.self_attn.qkv
633
+ vision_model.encoder.layers.2.layer_norm1
634
+ vision_model.encoder.layers.2.layer_norm2
635
+ vision_model.encoder.layers.2.mlp
636
+ vision_model.encoder.layers.2.mlp.activation_fn
637
+ vision_model.encoder.layers.2.mlp.fc1
638
+ vision_model.encoder.layers.2.mlp.fc2
639
+ vision_model.encoder.layers.2.self_attn.dropout
640
+ vision_model.encoder.layers.2.self_attn.projection
641
+ vision_model.encoder.layers.2.self_attn.qkv
642
+ vision_model.encoder.layers.20.layer_norm1
643
+ vision_model.encoder.layers.20.layer_norm2
644
+ vision_model.encoder.layers.20.mlp
645
+ vision_model.encoder.layers.20.mlp.activation_fn
646
+ vision_model.encoder.layers.20.mlp.fc1
647
+ vision_model.encoder.layers.20.mlp.fc2
648
+ vision_model.encoder.layers.20.self_attn.dropout
649
+ vision_model.encoder.layers.20.self_attn.projection
650
+ vision_model.encoder.layers.20.self_attn.qkv
651
+ vision_model.encoder.layers.21.layer_norm1
652
+ vision_model.encoder.layers.21.layer_norm2
653
+ vision_model.encoder.layers.21.mlp
654
+ vision_model.encoder.layers.21.mlp.activation_fn
655
+ vision_model.encoder.layers.21.mlp.fc1
656
+ vision_model.encoder.layers.21.mlp.fc2
657
+ vision_model.encoder.layers.21.self_attn.dropout
658
+ vision_model.encoder.layers.21.self_attn.projection
659
+ vision_model.encoder.layers.21.self_attn.qkv
660
+ vision_model.encoder.layers.22.layer_norm1
661
+ vision_model.encoder.layers.22.layer_norm2
662
+ vision_model.encoder.layers.22.mlp
663
+ vision_model.encoder.layers.22.mlp.activation_fn
664
+ vision_model.encoder.layers.22.mlp.fc1
665
+ vision_model.encoder.layers.22.mlp.fc2
666
+ vision_model.encoder.layers.22.self_attn.dropout
667
+ vision_model.encoder.layers.22.self_attn.projection
668
+ vision_model.encoder.layers.22.self_attn.qkv
669
+ vision_model.encoder.layers.23.layer_norm1
670
+ vision_model.encoder.layers.23.layer_norm2
671
+ vision_model.encoder.layers.23.mlp
672
+ vision_model.encoder.layers.23.mlp.activation_fn
673
+ vision_model.encoder.layers.23.mlp.fc1
674
+ vision_model.encoder.layers.23.mlp.fc2
675
+ vision_model.encoder.layers.23.self_attn.dropout
676
+ vision_model.encoder.layers.23.self_attn.projection
677
+ vision_model.encoder.layers.23.self_attn.qkv
678
+ vision_model.encoder.layers.24.layer_norm1
679
+ vision_model.encoder.layers.24.layer_norm2
680
+ vision_model.encoder.layers.24.mlp
681
+ vision_model.encoder.layers.24.mlp.activation_fn
682
+ vision_model.encoder.layers.24.mlp.fc1
683
+ vision_model.encoder.layers.24.mlp.fc2
684
+ vision_model.encoder.layers.24.self_attn.dropout
685
+ vision_model.encoder.layers.24.self_attn.projection
686
+ vision_model.encoder.layers.24.self_attn.qkv
687
+ vision_model.encoder.layers.25.layer_norm1
688
+ vision_model.encoder.layers.25.layer_norm2
689
+ vision_model.encoder.layers.25.mlp
690
+ vision_model.encoder.layers.25.mlp.activation_fn
691
+ vision_model.encoder.layers.25.mlp.fc1
692
+ vision_model.encoder.layers.25.mlp.fc2
693
+ vision_model.encoder.layers.25.self_attn.dropout
694
+ vision_model.encoder.layers.25.self_attn.projection
695
+ vision_model.encoder.layers.25.self_attn.qkv
696
+ vision_model.encoder.layers.26.layer_norm1
697
+ vision_model.encoder.layers.26.layer_norm2
698
+ vision_model.encoder.layers.26.mlp
699
+ vision_model.encoder.layers.26.mlp.activation_fn
700
+ vision_model.encoder.layers.26.mlp.fc1
701
+ vision_model.encoder.layers.26.mlp.fc2
702
+ vision_model.encoder.layers.26.self_attn.dropout
703
+ vision_model.encoder.layers.26.self_attn.projection
704
+ vision_model.encoder.layers.26.self_attn.qkv
705
+ vision_model.encoder.layers.27.layer_norm1
706
+ vision_model.encoder.layers.27.layer_norm2
707
+ vision_model.encoder.layers.27.mlp
708
+ vision_model.encoder.layers.27.mlp.activation_fn
709
+ vision_model.encoder.layers.27.mlp.fc1
710
+ vision_model.encoder.layers.27.mlp.fc2
711
+ vision_model.encoder.layers.27.self_attn.dropout
712
+ vision_model.encoder.layers.27.self_attn.projection
713
+ vision_model.encoder.layers.27.self_attn.qkv
714
+ vision_model.encoder.layers.28.layer_norm1
715
+ vision_model.encoder.layers.28.layer_norm2
716
+ vision_model.encoder.layers.28.mlp
717
+ vision_model.encoder.layers.28.mlp.activation_fn
718
+ vision_model.encoder.layers.28.mlp.fc1
719
+ vision_model.encoder.layers.28.mlp.fc2
720
+ vision_model.encoder.layers.28.self_attn.dropout
721
+ vision_model.encoder.layers.28.self_attn.projection
722
+ vision_model.encoder.layers.28.self_attn.qkv
723
+ vision_model.encoder.layers.29.layer_norm1
724
+ vision_model.encoder.layers.29.layer_norm2
725
+ vision_model.encoder.layers.29.mlp
726
+ vision_model.encoder.layers.29.mlp.activation_fn
727
+ vision_model.encoder.layers.29.mlp.fc1
728
+ vision_model.encoder.layers.29.mlp.fc2
729
+ vision_model.encoder.layers.29.self_attn.dropout
730
+ vision_model.encoder.layers.29.self_attn.projection
731
+ vision_model.encoder.layers.29.self_attn.qkv
732
+ vision_model.encoder.layers.3.layer_norm1
733
+ vision_model.encoder.layers.3.layer_norm2
734
+ vision_model.encoder.layers.3.mlp
735
+ vision_model.encoder.layers.3.mlp.activation_fn
736
+ vision_model.encoder.layers.3.mlp.fc1
737
+ vision_model.encoder.layers.3.mlp.fc2
738
+ vision_model.encoder.layers.3.self_attn.dropout
739
+ vision_model.encoder.layers.3.self_attn.projection
740
+ vision_model.encoder.layers.3.self_attn.qkv
741
+ vision_model.encoder.layers.30.layer_norm1
742
+ vision_model.encoder.layers.30.layer_norm2
743
+ vision_model.encoder.layers.30.mlp
744
+ vision_model.encoder.layers.30.mlp.activation_fn
745
+ vision_model.encoder.layers.30.mlp.fc1
746
+ vision_model.encoder.layers.30.mlp.fc2
747
+ vision_model.encoder.layers.30.self_attn.dropout
748
+ vision_model.encoder.layers.30.self_attn.projection
749
+ vision_model.encoder.layers.30.self_attn.qkv
750
+ vision_model.encoder.layers.31.layer_norm1
751
+ vision_model.encoder.layers.31.layer_norm2
752
+ vision_model.encoder.layers.31.mlp
753
+ vision_model.encoder.layers.31.mlp.activation_fn
754
+ vision_model.encoder.layers.31.mlp.fc1
755
+ vision_model.encoder.layers.31.mlp.fc2
756
+ vision_model.encoder.layers.31.self_attn.dropout
757
+ vision_model.encoder.layers.31.self_attn.projection
758
+ vision_model.encoder.layers.31.self_attn.qkv
759
+ vision_model.encoder.layers.32.layer_norm1
760
+ vision_model.encoder.layers.32.layer_norm2
761
+ vision_model.encoder.layers.32.mlp
762
+ vision_model.encoder.layers.32.mlp.activation_fn
763
+ vision_model.encoder.layers.32.mlp.fc1
764
+ vision_model.encoder.layers.32.mlp.fc2
765
+ vision_model.encoder.layers.32.self_attn.dropout
766
+ vision_model.encoder.layers.32.self_attn.projection
767
+ vision_model.encoder.layers.32.self_attn.qkv
768
+ vision_model.encoder.layers.33.layer_norm1
769
+ vision_model.encoder.layers.33.layer_norm2
770
+ vision_model.encoder.layers.33.mlp
771
+ vision_model.encoder.layers.33.mlp.activation_fn
772
+ vision_model.encoder.layers.33.mlp.fc1
773
+ vision_model.encoder.layers.33.mlp.fc2
774
+ vision_model.encoder.layers.33.self_attn.dropout
775
+ vision_model.encoder.layers.33.self_attn.projection
776
+ vision_model.encoder.layers.33.self_attn.qkv
777
+ vision_model.encoder.layers.34.layer_norm1
778
+ vision_model.encoder.layers.34.layer_norm2
779
+ vision_model.encoder.layers.34.mlp
780
+ vision_model.encoder.layers.34.mlp.activation_fn
781
+ vision_model.encoder.layers.34.mlp.fc1
782
+ vision_model.encoder.layers.34.mlp.fc2
783
+ vision_model.encoder.layers.34.self_attn.dropout
784
+ vision_model.encoder.layers.34.self_attn.projection
785
+ vision_model.encoder.layers.34.self_attn.qkv
786
+ vision_model.encoder.layers.35.layer_norm1
787
+ vision_model.encoder.layers.35.layer_norm2
788
+ vision_model.encoder.layers.35.mlp
789
+ vision_model.encoder.layers.35.mlp.activation_fn
790
+ vision_model.encoder.layers.35.mlp.fc1
791
+ vision_model.encoder.layers.35.mlp.fc2
792
+ vision_model.encoder.layers.35.self_attn.dropout
793
+ vision_model.encoder.layers.35.self_attn.projection
794
+ vision_model.encoder.layers.35.self_attn.qkv
795
+ vision_model.encoder.layers.36.layer_norm1
796
+ vision_model.encoder.layers.36.layer_norm2
797
+ vision_model.encoder.layers.36.mlp
798
+ vision_model.encoder.layers.36.mlp.activation_fn
799
+ vision_model.encoder.layers.36.mlp.fc1
800
+ vision_model.encoder.layers.36.mlp.fc2
801
+ vision_model.encoder.layers.36.self_attn.dropout
802
+ vision_model.encoder.layers.36.self_attn.projection
803
+ vision_model.encoder.layers.36.self_attn.qkv
804
+ vision_model.encoder.layers.37.layer_norm1
805
+ vision_model.encoder.layers.37.layer_norm2
806
+ vision_model.encoder.layers.37.mlp
807
+ vision_model.encoder.layers.37.mlp.activation_fn
808
+ vision_model.encoder.layers.37.mlp.fc1
809
+ vision_model.encoder.layers.37.mlp.fc2
810
+ vision_model.encoder.layers.37.self_attn.dropout
811
+ vision_model.encoder.layers.37.self_attn.projection
812
+ vision_model.encoder.layers.37.self_attn.qkv
813
+ vision_model.encoder.layers.38.layer_norm1
814
+ vision_model.encoder.layers.38.layer_norm2
815
+ vision_model.encoder.layers.38.mlp
816
+ vision_model.encoder.layers.38.mlp.activation_fn
817
+ vision_model.encoder.layers.38.mlp.fc1
818
+ vision_model.encoder.layers.38.mlp.fc2
819
+ vision_model.encoder.layers.38.self_attn.dropout
820
+ vision_model.encoder.layers.38.self_attn.projection
821
+ vision_model.encoder.layers.38.self_attn.qkv
822
+ vision_model.encoder.layers.4.layer_norm1
823
+ vision_model.encoder.layers.4.layer_norm2
824
+ vision_model.encoder.layers.4.mlp
825
+ vision_model.encoder.layers.4.mlp.activation_fn
826
+ vision_model.encoder.layers.4.mlp.fc1
827
+ vision_model.encoder.layers.4.mlp.fc2
828
+ vision_model.encoder.layers.4.self_attn.dropout
829
+ vision_model.encoder.layers.4.self_attn.projection
830
+ vision_model.encoder.layers.4.self_attn.qkv
831
+ vision_model.encoder.layers.5.layer_norm1
832
+ vision_model.encoder.layers.5.layer_norm2
833
+ vision_model.encoder.layers.5.mlp
834
+ vision_model.encoder.layers.5.mlp.activation_fn
835
+ vision_model.encoder.layers.5.mlp.fc1
836
+ vision_model.encoder.layers.5.mlp.fc2
837
+ vision_model.encoder.layers.5.self_attn.dropout
838
+ vision_model.encoder.layers.5.self_attn.projection
839
+ vision_model.encoder.layers.5.self_attn.qkv
840
+ vision_model.encoder.layers.6.layer_norm1
841
+ vision_model.encoder.layers.6.layer_norm2
842
+ vision_model.encoder.layers.6.mlp
843
+ vision_model.encoder.layers.6.mlp.activation_fn
844
+ vision_model.encoder.layers.6.mlp.fc1
845
+ vision_model.encoder.layers.6.mlp.fc2
846
+ vision_model.encoder.layers.6.self_attn.dropout
847
+ vision_model.encoder.layers.6.self_attn.projection
848
+ vision_model.encoder.layers.6.self_attn.qkv
849
+ vision_model.encoder.layers.7.layer_norm1
850
+ vision_model.encoder.layers.7.layer_norm2
851
+ vision_model.encoder.layers.7.mlp
852
+ vision_model.encoder.layers.7.mlp.activation_fn
853
+ vision_model.encoder.layers.7.mlp.fc1
854
+ vision_model.encoder.layers.7.mlp.fc2
855
+ vision_model.encoder.layers.7.self_attn.dropout
856
+ vision_model.encoder.layers.7.self_attn.projection
857
+ vision_model.encoder.layers.7.self_attn.qkv
858
+ vision_model.encoder.layers.8.layer_norm1
859
+ vision_model.encoder.layers.8.layer_norm2
860
+ vision_model.encoder.layers.8.mlp
861
+ vision_model.encoder.layers.8.mlp.activation_fn
862
+ vision_model.encoder.layers.8.mlp.fc1
863
+ vision_model.encoder.layers.8.mlp.fc2
864
+ vision_model.encoder.layers.8.self_attn.dropout
865
+ vision_model.encoder.layers.8.self_attn.projection
866
+ vision_model.encoder.layers.8.self_attn.qkv
867
+ vision_model.encoder.layers.9.layer_norm1
868
+ vision_model.encoder.layers.9.layer_norm2
869
+ vision_model.encoder.layers.9.mlp
870
+ vision_model.encoder.layers.9.mlp.activation_fn
871
+ vision_model.encoder.layers.9.mlp.fc1
872
+ vision_model.encoder.layers.9.mlp.fc2
873
+ vision_model.encoder.layers.9.self_attn.dropout
874
+ vision_model.encoder.layers.9.self_attn.projection
875
+ vision_model.encoder.layers.9.self_attn.qkv
876
+ vision_model.post_layernorm
demo/logs/THUDM/cogvlm-chat-hf.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/allenai/Molmo-7B-D-0924.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/deepseek-community/Janus-Pro-1B.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/facebook/Perception-LM-1B.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/internlm/internlm-xcomposer2d5-7b.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/internvl/InternVL2_5-8B.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
{logs β†’ demo/logs}/llava-hf/llava-1.5-7b-hf.txt RENAMED
@@ -1,725 +1,604 @@
1
-
2
- vision_tower
3
- vision_tower.vision_model
4
- vision_tower.vision_model.embeddings
5
- vision_tower.vision_model.embeddings.patch_embedding
6
- vision_tower.vision_model.embeddings.position_embedding
7
- vision_tower.vision_model.pre_layrnorm
8
- vision_tower.vision_model.encoder
9
- vision_tower.vision_model.encoder.layers
10
- vision_tower.vision_model.encoder.layers.0
11
- vision_tower.vision_model.encoder.layers.0.self_attn
12
- vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
13
- vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
14
- vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
15
- vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
16
- vision_tower.vision_model.encoder.layers.0.layer_norm1
17
- vision_tower.vision_model.encoder.layers.0.mlp
18
- vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
19
- vision_tower.vision_model.encoder.layers.0.mlp.fc1
20
- vision_tower.vision_model.encoder.layers.0.mlp.fc2
21
- vision_tower.vision_model.encoder.layers.0.layer_norm2
22
- vision_tower.vision_model.encoder.layers.1
23
- vision_tower.vision_model.encoder.layers.1.self_attn
24
- vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
25
- vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
26
- vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
27
- vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
28
- vision_tower.vision_model.encoder.layers.1.layer_norm1
29
- vision_tower.vision_model.encoder.layers.1.mlp
30
- vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
31
- vision_tower.vision_model.encoder.layers.1.mlp.fc1
32
- vision_tower.vision_model.encoder.layers.1.mlp.fc2
33
- vision_tower.vision_model.encoder.layers.1.layer_norm2
34
- vision_tower.vision_model.encoder.layers.2
35
- vision_tower.vision_model.encoder.layers.2.self_attn
36
- vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
37
- vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
38
- vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
39
- vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
40
- vision_tower.vision_model.encoder.layers.2.layer_norm1
41
- vision_tower.vision_model.encoder.layers.2.mlp
42
- vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
43
- vision_tower.vision_model.encoder.layers.2.mlp.fc1
44
- vision_tower.vision_model.encoder.layers.2.mlp.fc2
45
- vision_tower.vision_model.encoder.layers.2.layer_norm2
46
- vision_tower.vision_model.encoder.layers.3
47
- vision_tower.vision_model.encoder.layers.3.self_attn
48
- vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
49
- vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
50
- vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
51
- vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
52
- vision_tower.vision_model.encoder.layers.3.layer_norm1
53
- vision_tower.vision_model.encoder.layers.3.mlp
54
- vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
55
- vision_tower.vision_model.encoder.layers.3.mlp.fc1
56
- vision_tower.vision_model.encoder.layers.3.mlp.fc2
57
- vision_tower.vision_model.encoder.layers.3.layer_norm2
58
- vision_tower.vision_model.encoder.layers.4
59
- vision_tower.vision_model.encoder.layers.4.self_attn
60
- vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
61
- vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
62
- vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
63
- vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
64
- vision_tower.vision_model.encoder.layers.4.layer_norm1
65
- vision_tower.vision_model.encoder.layers.4.mlp
66
- vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
67
- vision_tower.vision_model.encoder.layers.4.mlp.fc1
68
- vision_tower.vision_model.encoder.layers.4.mlp.fc2
69
- vision_tower.vision_model.encoder.layers.4.layer_norm2
70
- vision_tower.vision_model.encoder.layers.5
71
- vision_tower.vision_model.encoder.layers.5.self_attn
72
- vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
73
- vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
74
- vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
75
- vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
76
- vision_tower.vision_model.encoder.layers.5.layer_norm1
77
- vision_tower.vision_model.encoder.layers.5.mlp
78
- vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
79
- vision_tower.vision_model.encoder.layers.5.mlp.fc1
80
- vision_tower.vision_model.encoder.layers.5.mlp.fc2
81
- vision_tower.vision_model.encoder.layers.5.layer_norm2
82
- vision_tower.vision_model.encoder.layers.6
83
- vision_tower.vision_model.encoder.layers.6.self_attn
84
- vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
85
- vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
86
- vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
87
- vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
88
- vision_tower.vision_model.encoder.layers.6.layer_norm1
89
- vision_tower.vision_model.encoder.layers.6.mlp
90
- vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
91
- vision_tower.vision_model.encoder.layers.6.mlp.fc1
92
- vision_tower.vision_model.encoder.layers.6.mlp.fc2
93
- vision_tower.vision_model.encoder.layers.6.layer_norm2
94
- vision_tower.vision_model.encoder.layers.7
95
- vision_tower.vision_model.encoder.layers.7.self_attn
96
- vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
97
- vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
98
- vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
99
- vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
100
- vision_tower.vision_model.encoder.layers.7.layer_norm1
101
- vision_tower.vision_model.encoder.layers.7.mlp
102
- vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
103
- vision_tower.vision_model.encoder.layers.7.mlp.fc1
104
- vision_tower.vision_model.encoder.layers.7.mlp.fc2
105
- vision_tower.vision_model.encoder.layers.7.layer_norm2
106
- vision_tower.vision_model.encoder.layers.8
107
- vision_tower.vision_model.encoder.layers.8.self_attn
108
- vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
109
- vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
110
- vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
111
- vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
112
- vision_tower.vision_model.encoder.layers.8.layer_norm1
113
- vision_tower.vision_model.encoder.layers.8.mlp
114
- vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
115
- vision_tower.vision_model.encoder.layers.8.mlp.fc1
116
- vision_tower.vision_model.encoder.layers.8.mlp.fc2
117
- vision_tower.vision_model.encoder.layers.8.layer_norm2
118
- vision_tower.vision_model.encoder.layers.9
119
- vision_tower.vision_model.encoder.layers.9.self_attn
120
- vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
121
- vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
122
- vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
123
- vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
124
- vision_tower.vision_model.encoder.layers.9.layer_norm1
125
- vision_tower.vision_model.encoder.layers.9.mlp
126
- vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
127
- vision_tower.vision_model.encoder.layers.9.mlp.fc1
128
- vision_tower.vision_model.encoder.layers.9.mlp.fc2
129
- vision_tower.vision_model.encoder.layers.9.layer_norm2
130
- vision_tower.vision_model.encoder.layers.10
131
- vision_tower.vision_model.encoder.layers.10.self_attn
132
- vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
133
- vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
134
- vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
135
- vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
136
- vision_tower.vision_model.encoder.layers.10.layer_norm1
137
- vision_tower.vision_model.encoder.layers.10.mlp
138
- vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
139
- vision_tower.vision_model.encoder.layers.10.mlp.fc1
140
- vision_tower.vision_model.encoder.layers.10.mlp.fc2
141
- vision_tower.vision_model.encoder.layers.10.layer_norm2
142
- vision_tower.vision_model.encoder.layers.11
143
- vision_tower.vision_model.encoder.layers.11.self_attn
144
- vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
145
- vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
146
- vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
147
- vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
148
- vision_tower.vision_model.encoder.layers.11.layer_norm1
149
- vision_tower.vision_model.encoder.layers.11.mlp
150
- vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
151
- vision_tower.vision_model.encoder.layers.11.mlp.fc1
152
- vision_tower.vision_model.encoder.layers.11.mlp.fc2
153
- vision_tower.vision_model.encoder.layers.11.layer_norm2
154
- vision_tower.vision_model.encoder.layers.12
155
- vision_tower.vision_model.encoder.layers.12.self_attn
156
- vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
157
- vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
158
- vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
159
- vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
160
- vision_tower.vision_model.encoder.layers.12.layer_norm1
161
- vision_tower.vision_model.encoder.layers.12.mlp
162
- vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
163
- vision_tower.vision_model.encoder.layers.12.mlp.fc1
164
- vision_tower.vision_model.encoder.layers.12.mlp.fc2
165
- vision_tower.vision_model.encoder.layers.12.layer_norm2
166
- vision_tower.vision_model.encoder.layers.13
167
- vision_tower.vision_model.encoder.layers.13.self_attn
168
- vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
169
- vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
170
- vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
171
- vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
172
- vision_tower.vision_model.encoder.layers.13.layer_norm1
173
- vision_tower.vision_model.encoder.layers.13.mlp
174
- vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
175
- vision_tower.vision_model.encoder.layers.13.mlp.fc1
176
- vision_tower.vision_model.encoder.layers.13.mlp.fc2
177
- vision_tower.vision_model.encoder.layers.13.layer_norm2
178
- vision_tower.vision_model.encoder.layers.14
179
- vision_tower.vision_model.encoder.layers.14.self_attn
180
- vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
181
- vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
182
- vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
183
- vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
184
- vision_tower.vision_model.encoder.layers.14.layer_norm1
185
- vision_tower.vision_model.encoder.layers.14.mlp
186
- vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
187
- vision_tower.vision_model.encoder.layers.14.mlp.fc1
188
- vision_tower.vision_model.encoder.layers.14.mlp.fc2
189
- vision_tower.vision_model.encoder.layers.14.layer_norm2
190
- vision_tower.vision_model.encoder.layers.15
191
- vision_tower.vision_model.encoder.layers.15.self_attn
192
- vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
193
- vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
194
- vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
195
- vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
196
- vision_tower.vision_model.encoder.layers.15.layer_norm1
197
- vision_tower.vision_model.encoder.layers.15.mlp
198
- vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
199
- vision_tower.vision_model.encoder.layers.15.mlp.fc1
200
- vision_tower.vision_model.encoder.layers.15.mlp.fc2
201
- vision_tower.vision_model.encoder.layers.15.layer_norm2
202
- vision_tower.vision_model.encoder.layers.16
203
- vision_tower.vision_model.encoder.layers.16.self_attn
204
- vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
205
- vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
206
- vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
207
- vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
208
- vision_tower.vision_model.encoder.layers.16.layer_norm1
209
- vision_tower.vision_model.encoder.layers.16.mlp
210
- vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
211
- vision_tower.vision_model.encoder.layers.16.mlp.fc1
212
- vision_tower.vision_model.encoder.layers.16.mlp.fc2
213
- vision_tower.vision_model.encoder.layers.16.layer_norm2
214
- vision_tower.vision_model.encoder.layers.17
215
- vision_tower.vision_model.encoder.layers.17.self_attn
216
- vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
217
- vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
218
- vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
219
- vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
220
- vision_tower.vision_model.encoder.layers.17.layer_norm1
221
- vision_tower.vision_model.encoder.layers.17.mlp
222
- vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
223
- vision_tower.vision_model.encoder.layers.17.mlp.fc1
224
- vision_tower.vision_model.encoder.layers.17.mlp.fc2
225
- vision_tower.vision_model.encoder.layers.17.layer_norm2
226
- vision_tower.vision_model.encoder.layers.18
227
- vision_tower.vision_model.encoder.layers.18.self_attn
228
- vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
229
- vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
230
- vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
231
- vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
232
- vision_tower.vision_model.encoder.layers.18.layer_norm1
233
- vision_tower.vision_model.encoder.layers.18.mlp
234
- vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
235
- vision_tower.vision_model.encoder.layers.18.mlp.fc1
236
- vision_tower.vision_model.encoder.layers.18.mlp.fc2
237
- vision_tower.vision_model.encoder.layers.18.layer_norm2
238
- vision_tower.vision_model.encoder.layers.19
239
- vision_tower.vision_model.encoder.layers.19.self_attn
240
- vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
241
- vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
242
- vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
243
- vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
244
- vision_tower.vision_model.encoder.layers.19.layer_norm1
245
- vision_tower.vision_model.encoder.layers.19.mlp
246
- vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
247
- vision_tower.vision_model.encoder.layers.19.mlp.fc1
248
- vision_tower.vision_model.encoder.layers.19.mlp.fc2
249
- vision_tower.vision_model.encoder.layers.19.layer_norm2
250
- vision_tower.vision_model.encoder.layers.20
251
- vision_tower.vision_model.encoder.layers.20.self_attn
252
- vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
253
- vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
254
- vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
255
- vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
256
- vision_tower.vision_model.encoder.layers.20.layer_norm1
257
- vision_tower.vision_model.encoder.layers.20.mlp
258
- vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
259
- vision_tower.vision_model.encoder.layers.20.mlp.fc1
260
- vision_tower.vision_model.encoder.layers.20.mlp.fc2
261
- vision_tower.vision_model.encoder.layers.20.layer_norm2
262
- vision_tower.vision_model.encoder.layers.21
263
- vision_tower.vision_model.encoder.layers.21.self_attn
264
- vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
265
- vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
266
- vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
267
- vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
268
- vision_tower.vision_model.encoder.layers.21.layer_norm1
269
- vision_tower.vision_model.encoder.layers.21.mlp
270
- vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
271
- vision_tower.vision_model.encoder.layers.21.mlp.fc1
272
- vision_tower.vision_model.encoder.layers.21.mlp.fc2
273
- vision_tower.vision_model.encoder.layers.21.layer_norm2
274
- vision_tower.vision_model.encoder.layers.22
275
- vision_tower.vision_model.encoder.layers.22.self_attn
276
- vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
277
- vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
278
- vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
279
- vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
280
- vision_tower.vision_model.encoder.layers.22.layer_norm1
281
- vision_tower.vision_model.encoder.layers.22.mlp
282
- vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
283
- vision_tower.vision_model.encoder.layers.22.mlp.fc1
284
- vision_tower.vision_model.encoder.layers.22.mlp.fc2
285
- vision_tower.vision_model.encoder.layers.22.layer_norm2
286
- vision_tower.vision_model.encoder.layers.23
287
- vision_tower.vision_model.encoder.layers.23.self_attn
288
- vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
289
- vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
290
- vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
291
- vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
292
- vision_tower.vision_model.encoder.layers.23.layer_norm1
293
- vision_tower.vision_model.encoder.layers.23.mlp
294
- vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
295
- vision_tower.vision_model.encoder.layers.23.mlp.fc1
296
- vision_tower.vision_model.encoder.layers.23.mlp.fc2
297
- vision_tower.vision_model.encoder.layers.23.layer_norm2
298
- vision_tower.vision_model.post_layernorm
299
- multi_modal_projector
300
- multi_modal_projector.linear_1
301
- multi_modal_projector.act
302
- multi_modal_projector.linear_2
303
- language_model
304
- language_model.model
305
  language_model.model.embed_tokens
306
- language_model.model.layers
307
- language_model.model.layers.0
308
- language_model.model.layers.0.self_attn
309
- language_model.model.layers.0.self_attn.q_proj
310
- language_model.model.layers.0.self_attn.k_proj
311
- language_model.model.layers.0.self_attn.v_proj
312
- language_model.model.layers.0.self_attn.o_proj
313
  language_model.model.layers.0.mlp
314
- language_model.model.layers.0.mlp.gate_proj
315
- language_model.model.layers.0.mlp.up_proj
316
- language_model.model.layers.0.mlp.down_proj
317
  language_model.model.layers.0.mlp.act_fn
318
- language_model.model.layers.0.input_layernorm
 
 
319
  language_model.model.layers.0.post_attention_layernorm
320
- language_model.model.layers.1
321
- language_model.model.layers.1.self_attn
322
- language_model.model.layers.1.self_attn.q_proj
323
- language_model.model.layers.1.self_attn.k_proj
324
- language_model.model.layers.1.self_attn.v_proj
325
- language_model.model.layers.1.self_attn.o_proj
326
  language_model.model.layers.1.mlp
 
 
327
  language_model.model.layers.1.mlp.gate_proj
328
  language_model.model.layers.1.mlp.up_proj
329
- language_model.model.layers.1.mlp.down_proj
330
- language_model.model.layers.1.mlp.act_fn
331
- language_model.model.layers.1.input_layernorm
332
  language_model.model.layers.1.post_attention_layernorm
333
- language_model.model.layers.2
334
- language_model.model.layers.2.self_attn
335
- language_model.model.layers.2.self_attn.q_proj
336
- language_model.model.layers.2.self_attn.k_proj
337
- language_model.model.layers.2.self_attn.v_proj
338
- language_model.model.layers.2.self_attn.o_proj
339
- language_model.model.layers.2.mlp
340
- language_model.model.layers.2.mlp.gate_proj
341
- language_model.model.layers.2.mlp.up_proj
342
- language_model.model.layers.2.mlp.down_proj
343
- language_model.model.layers.2.mlp.act_fn
344
- language_model.model.layers.2.input_layernorm
345
- language_model.model.layers.2.post_attention_layernorm
346
- language_model.model.layers.3
347
- language_model.model.layers.3.self_attn
348
- language_model.model.layers.3.self_attn.q_proj
349
- language_model.model.layers.3.self_attn.k_proj
350
- language_model.model.layers.3.self_attn.v_proj
351
- language_model.model.layers.3.self_attn.o_proj
352
- language_model.model.layers.3.mlp
353
- language_model.model.layers.3.mlp.gate_proj
354
- language_model.model.layers.3.mlp.up_proj
355
- language_model.model.layers.3.mlp.down_proj
356
- language_model.model.layers.3.mlp.act_fn
357
- language_model.model.layers.3.input_layernorm
358
- language_model.model.layers.3.post_attention_layernorm
359
- language_model.model.layers.4
360
- language_model.model.layers.4.self_attn
361
- language_model.model.layers.4.self_attn.q_proj
362
- language_model.model.layers.4.self_attn.k_proj
363
- language_model.model.layers.4.self_attn.v_proj
364
- language_model.model.layers.4.self_attn.o_proj
365
- language_model.model.layers.4.mlp
366
- language_model.model.layers.4.mlp.gate_proj
367
- language_model.model.layers.4.mlp.up_proj
368
- language_model.model.layers.4.mlp.down_proj
369
- language_model.model.layers.4.mlp.act_fn
370
- language_model.model.layers.4.input_layernorm
371
- language_model.model.layers.4.post_attention_layernorm
372
- language_model.model.layers.5
373
- language_model.model.layers.5.self_attn
374
- language_model.model.layers.5.self_attn.q_proj
375
- language_model.model.layers.5.self_attn.k_proj
376
- language_model.model.layers.5.self_attn.v_proj
377
- language_model.model.layers.5.self_attn.o_proj
378
- language_model.model.layers.5.mlp
379
- language_model.model.layers.5.mlp.gate_proj
380
- language_model.model.layers.5.mlp.up_proj
381
- language_model.model.layers.5.mlp.down_proj
382
- language_model.model.layers.5.mlp.act_fn
383
- language_model.model.layers.5.input_layernorm
384
- language_model.model.layers.5.post_attention_layernorm
385
- language_model.model.layers.6
386
- language_model.model.layers.6.self_attn
387
- language_model.model.layers.6.self_attn.q_proj
388
- language_model.model.layers.6.self_attn.k_proj
389
- language_model.model.layers.6.self_attn.v_proj
390
- language_model.model.layers.6.self_attn.o_proj
391
- language_model.model.layers.6.mlp
392
- language_model.model.layers.6.mlp.gate_proj
393
- language_model.model.layers.6.mlp.up_proj
394
- language_model.model.layers.6.mlp.down_proj
395
- language_model.model.layers.6.mlp.act_fn
396
- language_model.model.layers.6.input_layernorm
397
- language_model.model.layers.6.post_attention_layernorm
398
- language_model.model.layers.7
399
- language_model.model.layers.7.self_attn
400
- language_model.model.layers.7.self_attn.q_proj
401
- language_model.model.layers.7.self_attn.k_proj
402
- language_model.model.layers.7.self_attn.v_proj
403
- language_model.model.layers.7.self_attn.o_proj
404
- language_model.model.layers.7.mlp
405
- language_model.model.layers.7.mlp.gate_proj
406
- language_model.model.layers.7.mlp.up_proj
407
- language_model.model.layers.7.mlp.down_proj
408
- language_model.model.layers.7.mlp.act_fn
409
- language_model.model.layers.7.input_layernorm
410
- language_model.model.layers.7.post_attention_layernorm
411
- language_model.model.layers.8
412
- language_model.model.layers.8.self_attn
413
- language_model.model.layers.8.self_attn.q_proj
414
- language_model.model.layers.8.self_attn.k_proj
415
- language_model.model.layers.8.self_attn.v_proj
416
- language_model.model.layers.8.self_attn.o_proj
417
- language_model.model.layers.8.mlp
418
- language_model.model.layers.8.mlp.gate_proj
419
- language_model.model.layers.8.mlp.up_proj
420
- language_model.model.layers.8.mlp.down_proj
421
- language_model.model.layers.8.mlp.act_fn
422
- language_model.model.layers.8.input_layernorm
423
- language_model.model.layers.8.post_attention_layernorm
424
- language_model.model.layers.9
425
- language_model.model.layers.9.self_attn
426
- language_model.model.layers.9.self_attn.q_proj
427
- language_model.model.layers.9.self_attn.k_proj
428
- language_model.model.layers.9.self_attn.v_proj
429
- language_model.model.layers.9.self_attn.o_proj
430
- language_model.model.layers.9.mlp
431
- language_model.model.layers.9.mlp.gate_proj
432
- language_model.model.layers.9.mlp.up_proj
433
- language_model.model.layers.9.mlp.down_proj
434
- language_model.model.layers.9.mlp.act_fn
435
- language_model.model.layers.9.input_layernorm
436
- language_model.model.layers.9.post_attention_layernorm
437
- language_model.model.layers.10
438
- language_model.model.layers.10.self_attn
439
- language_model.model.layers.10.self_attn.q_proj
440
- language_model.model.layers.10.self_attn.k_proj
441
- language_model.model.layers.10.self_attn.v_proj
442
- language_model.model.layers.10.self_attn.o_proj
443
  language_model.model.layers.10.mlp
 
 
444
  language_model.model.layers.10.mlp.gate_proj
445
  language_model.model.layers.10.mlp.up_proj
446
- language_model.model.layers.10.mlp.down_proj
447
- language_model.model.layers.10.mlp.act_fn
448
- language_model.model.layers.10.input_layernorm
449
  language_model.model.layers.10.post_attention_layernorm
450
- language_model.model.layers.11
451
- language_model.model.layers.11.self_attn
452
- language_model.model.layers.11.self_attn.q_proj
453
- language_model.model.layers.11.self_attn.k_proj
454
- language_model.model.layers.11.self_attn.v_proj
455
- language_model.model.layers.11.self_attn.o_proj
456
  language_model.model.layers.11.mlp
 
 
457
  language_model.model.layers.11.mlp.gate_proj
458
  language_model.model.layers.11.mlp.up_proj
459
- language_model.model.layers.11.mlp.down_proj
460
- language_model.model.layers.11.mlp.act_fn
461
- language_model.model.layers.11.input_layernorm
462
  language_model.model.layers.11.post_attention_layernorm
463
- language_model.model.layers.12
464
- language_model.model.layers.12.self_attn
465
- language_model.model.layers.12.self_attn.q_proj
466
- language_model.model.layers.12.self_attn.k_proj
467
- language_model.model.layers.12.self_attn.v_proj
468
- language_model.model.layers.12.self_attn.o_proj
469
  language_model.model.layers.12.mlp
 
 
470
  language_model.model.layers.12.mlp.gate_proj
471
  language_model.model.layers.12.mlp.up_proj
472
- language_model.model.layers.12.mlp.down_proj
473
- language_model.model.layers.12.mlp.act_fn
474
- language_model.model.layers.12.input_layernorm
475
  language_model.model.layers.12.post_attention_layernorm
476
- language_model.model.layers.13
477
- language_model.model.layers.13.self_attn
478
- language_model.model.layers.13.self_attn.q_proj
479
- language_model.model.layers.13.self_attn.k_proj
480
- language_model.model.layers.13.self_attn.v_proj
481
- language_model.model.layers.13.self_attn.o_proj
482
  language_model.model.layers.13.mlp
 
 
483
  language_model.model.layers.13.mlp.gate_proj
484
  language_model.model.layers.13.mlp.up_proj
485
- language_model.model.layers.13.mlp.down_proj
486
- language_model.model.layers.13.mlp.act_fn
487
- language_model.model.layers.13.input_layernorm
488
  language_model.model.layers.13.post_attention_layernorm
489
- language_model.model.layers.14
490
- language_model.model.layers.14.self_attn
491
- language_model.model.layers.14.self_attn.q_proj
492
- language_model.model.layers.14.self_attn.k_proj
493
- language_model.model.layers.14.self_attn.v_proj
494
- language_model.model.layers.14.self_attn.o_proj
495
  language_model.model.layers.14.mlp
 
 
496
  language_model.model.layers.14.mlp.gate_proj
497
  language_model.model.layers.14.mlp.up_proj
498
- language_model.model.layers.14.mlp.down_proj
499
- language_model.model.layers.14.mlp.act_fn
500
- language_model.model.layers.14.input_layernorm
501
  language_model.model.layers.14.post_attention_layernorm
502
- language_model.model.layers.15
503
- language_model.model.layers.15.self_attn
504
- language_model.model.layers.15.self_attn.q_proj
505
- language_model.model.layers.15.self_attn.k_proj
506
- language_model.model.layers.15.self_attn.v_proj
507
- language_model.model.layers.15.self_attn.o_proj
508
  language_model.model.layers.15.mlp
 
 
509
  language_model.model.layers.15.mlp.gate_proj
510
  language_model.model.layers.15.mlp.up_proj
511
- language_model.model.layers.15.mlp.down_proj
512
- language_model.model.layers.15.mlp.act_fn
513
- language_model.model.layers.15.input_layernorm
514
  language_model.model.layers.15.post_attention_layernorm
515
- language_model.model.layers.16
516
- language_model.model.layers.16.self_attn
517
- language_model.model.layers.16.self_attn.q_proj
518
- language_model.model.layers.16.self_attn.k_proj
519
- language_model.model.layers.16.self_attn.v_proj
520
- language_model.model.layers.16.self_attn.o_proj
521
  language_model.model.layers.16.mlp
 
 
522
  language_model.model.layers.16.mlp.gate_proj
523
  language_model.model.layers.16.mlp.up_proj
524
- language_model.model.layers.16.mlp.down_proj
525
- language_model.model.layers.16.mlp.act_fn
526
- language_model.model.layers.16.input_layernorm
527
  language_model.model.layers.16.post_attention_layernorm
528
- language_model.model.layers.17
529
- language_model.model.layers.17.self_attn
530
- language_model.model.layers.17.self_attn.q_proj
531
- language_model.model.layers.17.self_attn.k_proj
532
- language_model.model.layers.17.self_attn.v_proj
533
- language_model.model.layers.17.self_attn.o_proj
534
  language_model.model.layers.17.mlp
 
 
535
  language_model.model.layers.17.mlp.gate_proj
536
  language_model.model.layers.17.mlp.up_proj
537
- language_model.model.layers.17.mlp.down_proj
538
- language_model.model.layers.17.mlp.act_fn
539
- language_model.model.layers.17.input_layernorm
540
  language_model.model.layers.17.post_attention_layernorm
541
- language_model.model.layers.18
542
- language_model.model.layers.18.self_attn
543
- language_model.model.layers.18.self_attn.q_proj
544
- language_model.model.layers.18.self_attn.k_proj
545
- language_model.model.layers.18.self_attn.v_proj
546
- language_model.model.layers.18.self_attn.o_proj
547
  language_model.model.layers.18.mlp
 
 
548
  language_model.model.layers.18.mlp.gate_proj
549
  language_model.model.layers.18.mlp.up_proj
550
- language_model.model.layers.18.mlp.down_proj
551
- language_model.model.layers.18.mlp.act_fn
552
- language_model.model.layers.18.input_layernorm
553
  language_model.model.layers.18.post_attention_layernorm
554
- language_model.model.layers.19
555
- language_model.model.layers.19.self_attn
556
- language_model.model.layers.19.self_attn.q_proj
557
- language_model.model.layers.19.self_attn.k_proj
558
- language_model.model.layers.19.self_attn.v_proj
559
- language_model.model.layers.19.self_attn.o_proj
560
  language_model.model.layers.19.mlp
 
 
561
  language_model.model.layers.19.mlp.gate_proj
562
  language_model.model.layers.19.mlp.up_proj
563
- language_model.model.layers.19.mlp.down_proj
564
- language_model.model.layers.19.mlp.act_fn
565
- language_model.model.layers.19.input_layernorm
566
  language_model.model.layers.19.post_attention_layernorm
567
- language_model.model.layers.20
568
- language_model.model.layers.20.self_attn
569
- language_model.model.layers.20.self_attn.q_proj
570
- language_model.model.layers.20.self_attn.k_proj
571
- language_model.model.layers.20.self_attn.v_proj
572
- language_model.model.layers.20.self_attn.o_proj
573
- language_model.model.layers.20.mlp
574
- language_model.model.layers.20.mlp.gate_proj
575
- language_model.model.layers.20.mlp.up_proj
576
- language_model.model.layers.20.mlp.down_proj
577
- language_model.model.layers.20.mlp.act_fn
 
 
 
 
578
  language_model.model.layers.20.input_layernorm
 
 
 
 
 
579
  language_model.model.layers.20.post_attention_layernorm
580
- language_model.model.layers.21
581
- language_model.model.layers.21.self_attn
582
- language_model.model.layers.21.self_attn.q_proj
583
- language_model.model.layers.21.self_attn.k_proj
584
- language_model.model.layers.21.self_attn.v_proj
585
- language_model.model.layers.21.self_attn.o_proj
586
  language_model.model.layers.21.mlp
 
 
587
  language_model.model.layers.21.mlp.gate_proj
588
  language_model.model.layers.21.mlp.up_proj
589
- language_model.model.layers.21.mlp.down_proj
590
- language_model.model.layers.21.mlp.act_fn
591
- language_model.model.layers.21.input_layernorm
592
  language_model.model.layers.21.post_attention_layernorm
593
- language_model.model.layers.22
594
- language_model.model.layers.22.self_attn
595
- language_model.model.layers.22.self_attn.q_proj
596
- language_model.model.layers.22.self_attn.k_proj
597
- language_model.model.layers.22.self_attn.v_proj
598
- language_model.model.layers.22.self_attn.o_proj
599
  language_model.model.layers.22.mlp
 
 
600
  language_model.model.layers.22.mlp.gate_proj
601
  language_model.model.layers.22.mlp.up_proj
602
- language_model.model.layers.22.mlp.down_proj
603
- language_model.model.layers.22.mlp.act_fn
604
- language_model.model.layers.22.input_layernorm
605
  language_model.model.layers.22.post_attention_layernorm
606
- language_model.model.layers.23
607
- language_model.model.layers.23.self_attn
608
- language_model.model.layers.23.self_attn.q_proj
609
- language_model.model.layers.23.self_attn.k_proj
610
- language_model.model.layers.23.self_attn.v_proj
611
- language_model.model.layers.23.self_attn.o_proj
612
  language_model.model.layers.23.mlp
 
 
613
  language_model.model.layers.23.mlp.gate_proj
614
  language_model.model.layers.23.mlp.up_proj
615
- language_model.model.layers.23.mlp.down_proj
616
- language_model.model.layers.23.mlp.act_fn
617
- language_model.model.layers.23.input_layernorm
618
  language_model.model.layers.23.post_attention_layernorm
619
- language_model.model.layers.24
620
- language_model.model.layers.24.self_attn
621
- language_model.model.layers.24.self_attn.q_proj
622
- language_model.model.layers.24.self_attn.k_proj
623
- language_model.model.layers.24.self_attn.v_proj
624
- language_model.model.layers.24.self_attn.o_proj
625
  language_model.model.layers.24.mlp
 
 
626
  language_model.model.layers.24.mlp.gate_proj
627
  language_model.model.layers.24.mlp.up_proj
628
- language_model.model.layers.24.mlp.down_proj
629
- language_model.model.layers.24.mlp.act_fn
630
- language_model.model.layers.24.input_layernorm
631
  language_model.model.layers.24.post_attention_layernorm
632
- language_model.model.layers.25
633
- language_model.model.layers.25.self_attn
634
- language_model.model.layers.25.self_attn.q_proj
635
- language_model.model.layers.25.self_attn.k_proj
636
- language_model.model.layers.25.self_attn.v_proj
637
- language_model.model.layers.25.self_attn.o_proj
638
  language_model.model.layers.25.mlp
 
 
639
  language_model.model.layers.25.mlp.gate_proj
640
  language_model.model.layers.25.mlp.up_proj
641
- language_model.model.layers.25.mlp.down_proj
642
- language_model.model.layers.25.mlp.act_fn
643
- language_model.model.layers.25.input_layernorm
644
  language_model.model.layers.25.post_attention_layernorm
645
- language_model.model.layers.26
646
- language_model.model.layers.26.self_attn
647
- language_model.model.layers.26.self_attn.q_proj
648
- language_model.model.layers.26.self_attn.k_proj
649
- language_model.model.layers.26.self_attn.v_proj
650
- language_model.model.layers.26.self_attn.o_proj
651
  language_model.model.layers.26.mlp
 
 
652
  language_model.model.layers.26.mlp.gate_proj
653
  language_model.model.layers.26.mlp.up_proj
654
- language_model.model.layers.26.mlp.down_proj
655
- language_model.model.layers.26.mlp.act_fn
656
- language_model.model.layers.26.input_layernorm
657
  language_model.model.layers.26.post_attention_layernorm
658
- language_model.model.layers.27
659
- language_model.model.layers.27.self_attn
660
- language_model.model.layers.27.self_attn.q_proj
661
- language_model.model.layers.27.self_attn.k_proj
662
- language_model.model.layers.27.self_attn.v_proj
663
- language_model.model.layers.27.self_attn.o_proj
664
  language_model.model.layers.27.mlp
 
 
665
  language_model.model.layers.27.mlp.gate_proj
666
  language_model.model.layers.27.mlp.up_proj
667
- language_model.model.layers.27.mlp.down_proj
668
- language_model.model.layers.27.mlp.act_fn
669
- language_model.model.layers.27.input_layernorm
670
  language_model.model.layers.27.post_attention_layernorm
671
- language_model.model.layers.28
672
- language_model.model.layers.28.self_attn
673
- language_model.model.layers.28.self_attn.q_proj
674
- language_model.model.layers.28.self_attn.k_proj
675
- language_model.model.layers.28.self_attn.v_proj
676
- language_model.model.layers.28.self_attn.o_proj
677
  language_model.model.layers.28.mlp
 
 
678
  language_model.model.layers.28.mlp.gate_proj
679
  language_model.model.layers.28.mlp.up_proj
680
- language_model.model.layers.28.mlp.down_proj
681
- language_model.model.layers.28.mlp.act_fn
682
- language_model.model.layers.28.input_layernorm
683
  language_model.model.layers.28.post_attention_layernorm
684
- language_model.model.layers.29
685
- language_model.model.layers.29.self_attn
686
- language_model.model.layers.29.self_attn.q_proj
687
- language_model.model.layers.29.self_attn.k_proj
688
- language_model.model.layers.29.self_attn.v_proj
689
- language_model.model.layers.29.self_attn.o_proj
690
  language_model.model.layers.29.mlp
 
 
691
  language_model.model.layers.29.mlp.gate_proj
692
  language_model.model.layers.29.mlp.up_proj
693
- language_model.model.layers.29.mlp.down_proj
694
- language_model.model.layers.29.mlp.act_fn
695
- language_model.model.layers.29.input_layernorm
696
  language_model.model.layers.29.post_attention_layernorm
697
- language_model.model.layers.30
698
- language_model.model.layers.30.self_attn
699
- language_model.model.layers.30.self_attn.q_proj
700
- language_model.model.layers.30.self_attn.k_proj
701
- language_model.model.layers.30.self_attn.v_proj
702
- language_model.model.layers.30.self_attn.o_proj
 
 
 
 
 
 
 
 
 
 
703
  language_model.model.layers.30.mlp
 
 
704
  language_model.model.layers.30.mlp.gate_proj
705
  language_model.model.layers.30.mlp.up_proj
706
- language_model.model.layers.30.mlp.down_proj
707
- language_model.model.layers.30.mlp.act_fn
708
- language_model.model.layers.30.input_layernorm
709
  language_model.model.layers.30.post_attention_layernorm
710
- language_model.model.layers.31
711
- language_model.model.layers.31.self_attn
712
- language_model.model.layers.31.self_attn.q_proj
713
- language_model.model.layers.31.self_attn.k_proj
714
- language_model.model.layers.31.self_attn.v_proj
715
- language_model.model.layers.31.self_attn.o_proj
716
  language_model.model.layers.31.mlp
 
 
717
  language_model.model.layers.31.mlp.gate_proj
718
  language_model.model.layers.31.mlp.up_proj
719
- language_model.model.layers.31.mlp.down_proj
720
- language_model.model.layers.31.mlp.act_fn
721
- language_model.model.layers.31.input_layernorm
722
  language_model.model.layers.31.post_attention_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
  language_model.model.norm
724
- language_model.model.rotary_emb
725
- language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  language_model.model.embed_tokens
3
+ language_model.model.layers.0.input_layernorm
 
 
 
 
 
 
4
  language_model.model.layers.0.mlp
 
 
 
5
  language_model.model.layers.0.mlp.act_fn
6
+ language_model.model.layers.0.mlp.down_proj
7
+ language_model.model.layers.0.mlp.gate_proj
8
+ language_model.model.layers.0.mlp.up_proj
9
  language_model.model.layers.0.post_attention_layernorm
10
+ language_model.model.layers.0.self_attn.k_proj
11
+ language_model.model.layers.0.self_attn.o_proj
12
+ language_model.model.layers.0.self_attn.q_proj
13
+ language_model.model.layers.0.self_attn.v_proj
14
+ language_model.model.layers.1.input_layernorm
 
15
  language_model.model.layers.1.mlp
16
+ language_model.model.layers.1.mlp.act_fn
17
+ language_model.model.layers.1.mlp.down_proj
18
  language_model.model.layers.1.mlp.gate_proj
19
  language_model.model.layers.1.mlp.up_proj
 
 
 
20
  language_model.model.layers.1.post_attention_layernorm
21
+ language_model.model.layers.1.self_attn.k_proj
22
+ language_model.model.layers.1.self_attn.o_proj
23
+ language_model.model.layers.1.self_attn.q_proj
24
+ language_model.model.layers.1.self_attn.v_proj
25
+ language_model.model.layers.10.input_layernorm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  language_model.model.layers.10.mlp
27
+ language_model.model.layers.10.mlp.act_fn
28
+ language_model.model.layers.10.mlp.down_proj
29
  language_model.model.layers.10.mlp.gate_proj
30
  language_model.model.layers.10.mlp.up_proj
 
 
 
31
  language_model.model.layers.10.post_attention_layernorm
32
+ language_model.model.layers.10.self_attn.k_proj
33
+ language_model.model.layers.10.self_attn.o_proj
34
+ language_model.model.layers.10.self_attn.q_proj
35
+ language_model.model.layers.10.self_attn.v_proj
36
+ language_model.model.layers.11.input_layernorm
 
37
  language_model.model.layers.11.mlp
38
+ language_model.model.layers.11.mlp.act_fn
39
+ language_model.model.layers.11.mlp.down_proj
40
  language_model.model.layers.11.mlp.gate_proj
41
  language_model.model.layers.11.mlp.up_proj
 
 
 
42
  language_model.model.layers.11.post_attention_layernorm
43
+ language_model.model.layers.11.self_attn.k_proj
44
+ language_model.model.layers.11.self_attn.o_proj
45
+ language_model.model.layers.11.self_attn.q_proj
46
+ language_model.model.layers.11.self_attn.v_proj
47
+ language_model.model.layers.12.input_layernorm
 
48
  language_model.model.layers.12.mlp
49
+ language_model.model.layers.12.mlp.act_fn
50
+ language_model.model.layers.12.mlp.down_proj
51
  language_model.model.layers.12.mlp.gate_proj
52
  language_model.model.layers.12.mlp.up_proj
 
 
 
53
  language_model.model.layers.12.post_attention_layernorm
54
+ language_model.model.layers.12.self_attn.k_proj
55
+ language_model.model.layers.12.self_attn.o_proj
56
+ language_model.model.layers.12.self_attn.q_proj
57
+ language_model.model.layers.12.self_attn.v_proj
58
+ language_model.model.layers.13.input_layernorm
 
59
  language_model.model.layers.13.mlp
60
+ language_model.model.layers.13.mlp.act_fn
61
+ language_model.model.layers.13.mlp.down_proj
62
  language_model.model.layers.13.mlp.gate_proj
63
  language_model.model.layers.13.mlp.up_proj
 
 
 
64
  language_model.model.layers.13.post_attention_layernorm
65
+ language_model.model.layers.13.self_attn.k_proj
66
+ language_model.model.layers.13.self_attn.o_proj
67
+ language_model.model.layers.13.self_attn.q_proj
68
+ language_model.model.layers.13.self_attn.v_proj
69
+ language_model.model.layers.14.input_layernorm
 
70
  language_model.model.layers.14.mlp
71
+ language_model.model.layers.14.mlp.act_fn
72
+ language_model.model.layers.14.mlp.down_proj
73
  language_model.model.layers.14.mlp.gate_proj
74
  language_model.model.layers.14.mlp.up_proj
 
 
 
75
  language_model.model.layers.14.post_attention_layernorm
76
+ language_model.model.layers.14.self_attn.k_proj
77
+ language_model.model.layers.14.self_attn.o_proj
78
+ language_model.model.layers.14.self_attn.q_proj
79
+ language_model.model.layers.14.self_attn.v_proj
80
+ language_model.model.layers.15.input_layernorm
 
81
  language_model.model.layers.15.mlp
82
+ language_model.model.layers.15.mlp.act_fn
83
+ language_model.model.layers.15.mlp.down_proj
84
  language_model.model.layers.15.mlp.gate_proj
85
  language_model.model.layers.15.mlp.up_proj
 
 
 
86
  language_model.model.layers.15.post_attention_layernorm
87
+ language_model.model.layers.15.self_attn.k_proj
88
+ language_model.model.layers.15.self_attn.o_proj
89
+ language_model.model.layers.15.self_attn.q_proj
90
+ language_model.model.layers.15.self_attn.v_proj
91
+ language_model.model.layers.16.input_layernorm
 
92
  language_model.model.layers.16.mlp
93
+ language_model.model.layers.16.mlp.act_fn
94
+ language_model.model.layers.16.mlp.down_proj
95
  language_model.model.layers.16.mlp.gate_proj
96
  language_model.model.layers.16.mlp.up_proj
 
 
 
97
  language_model.model.layers.16.post_attention_layernorm
98
+ language_model.model.layers.16.self_attn.k_proj
99
+ language_model.model.layers.16.self_attn.o_proj
100
+ language_model.model.layers.16.self_attn.q_proj
101
+ language_model.model.layers.16.self_attn.v_proj
102
+ language_model.model.layers.17.input_layernorm
 
103
  language_model.model.layers.17.mlp
104
+ language_model.model.layers.17.mlp.act_fn
105
+ language_model.model.layers.17.mlp.down_proj
106
  language_model.model.layers.17.mlp.gate_proj
107
  language_model.model.layers.17.mlp.up_proj
 
 
 
108
  language_model.model.layers.17.post_attention_layernorm
109
+ language_model.model.layers.17.self_attn.k_proj
110
+ language_model.model.layers.17.self_attn.o_proj
111
+ language_model.model.layers.17.self_attn.q_proj
112
+ language_model.model.layers.17.self_attn.v_proj
113
+ language_model.model.layers.18.input_layernorm
 
114
  language_model.model.layers.18.mlp
115
+ language_model.model.layers.18.mlp.act_fn
116
+ language_model.model.layers.18.mlp.down_proj
117
  language_model.model.layers.18.mlp.gate_proj
118
  language_model.model.layers.18.mlp.up_proj
 
 
 
119
  language_model.model.layers.18.post_attention_layernorm
120
+ language_model.model.layers.18.self_attn.k_proj
121
+ language_model.model.layers.18.self_attn.o_proj
122
+ language_model.model.layers.18.self_attn.q_proj
123
+ language_model.model.layers.18.self_attn.v_proj
124
+ language_model.model.layers.19.input_layernorm
 
125
  language_model.model.layers.19.mlp
126
+ language_model.model.layers.19.mlp.act_fn
127
+ language_model.model.layers.19.mlp.down_proj
128
  language_model.model.layers.19.mlp.gate_proj
129
  language_model.model.layers.19.mlp.up_proj
 
 
 
130
  language_model.model.layers.19.post_attention_layernorm
131
+ language_model.model.layers.19.self_attn.k_proj
132
+ language_model.model.layers.19.self_attn.o_proj
133
+ language_model.model.layers.19.self_attn.q_proj
134
+ language_model.model.layers.19.self_attn.v_proj
135
+ language_model.model.layers.2.input_layernorm
136
+ language_model.model.layers.2.mlp
137
+ language_model.model.layers.2.mlp.act_fn
138
+ language_model.model.layers.2.mlp.down_proj
139
+ language_model.model.layers.2.mlp.gate_proj
140
+ language_model.model.layers.2.mlp.up_proj
141
+ language_model.model.layers.2.post_attention_layernorm
142
+ language_model.model.layers.2.self_attn.k_proj
143
+ language_model.model.layers.2.self_attn.o_proj
144
+ language_model.model.layers.2.self_attn.q_proj
145
+ language_model.model.layers.2.self_attn.v_proj
146
  language_model.model.layers.20.input_layernorm
147
+ language_model.model.layers.20.mlp
148
+ language_model.model.layers.20.mlp.act_fn
149
+ language_model.model.layers.20.mlp.down_proj
150
+ language_model.model.layers.20.mlp.gate_proj
151
+ language_model.model.layers.20.mlp.up_proj
152
  language_model.model.layers.20.post_attention_layernorm
153
+ language_model.model.layers.20.self_attn.k_proj
154
+ language_model.model.layers.20.self_attn.o_proj
155
+ language_model.model.layers.20.self_attn.q_proj
156
+ language_model.model.layers.20.self_attn.v_proj
157
+ language_model.model.layers.21.input_layernorm
 
158
  language_model.model.layers.21.mlp
159
+ language_model.model.layers.21.mlp.act_fn
160
+ language_model.model.layers.21.mlp.down_proj
161
  language_model.model.layers.21.mlp.gate_proj
162
  language_model.model.layers.21.mlp.up_proj
 
 
 
163
  language_model.model.layers.21.post_attention_layernorm
164
+ language_model.model.layers.21.self_attn.k_proj
165
+ language_model.model.layers.21.self_attn.o_proj
166
+ language_model.model.layers.21.self_attn.q_proj
167
+ language_model.model.layers.21.self_attn.v_proj
168
+ language_model.model.layers.22.input_layernorm
 
169
  language_model.model.layers.22.mlp
170
+ language_model.model.layers.22.mlp.act_fn
171
+ language_model.model.layers.22.mlp.down_proj
172
  language_model.model.layers.22.mlp.gate_proj
173
  language_model.model.layers.22.mlp.up_proj
 
 
 
174
  language_model.model.layers.22.post_attention_layernorm
175
+ language_model.model.layers.22.self_attn.k_proj
176
+ language_model.model.layers.22.self_attn.o_proj
177
+ language_model.model.layers.22.self_attn.q_proj
178
+ language_model.model.layers.22.self_attn.v_proj
179
+ language_model.model.layers.23.input_layernorm
 
180
  language_model.model.layers.23.mlp
181
+ language_model.model.layers.23.mlp.act_fn
182
+ language_model.model.layers.23.mlp.down_proj
183
  language_model.model.layers.23.mlp.gate_proj
184
  language_model.model.layers.23.mlp.up_proj
 
 
 
185
  language_model.model.layers.23.post_attention_layernorm
186
+ language_model.model.layers.23.self_attn.k_proj
187
+ language_model.model.layers.23.self_attn.o_proj
188
+ language_model.model.layers.23.self_attn.q_proj
189
+ language_model.model.layers.23.self_attn.v_proj
190
+ language_model.model.layers.24.input_layernorm
 
191
  language_model.model.layers.24.mlp
192
+ language_model.model.layers.24.mlp.act_fn
193
+ language_model.model.layers.24.mlp.down_proj
194
  language_model.model.layers.24.mlp.gate_proj
195
  language_model.model.layers.24.mlp.up_proj
 
 
 
196
  language_model.model.layers.24.post_attention_layernorm
197
+ language_model.model.layers.24.self_attn.k_proj
198
+ language_model.model.layers.24.self_attn.o_proj
199
+ language_model.model.layers.24.self_attn.q_proj
200
+ language_model.model.layers.24.self_attn.v_proj
201
+ language_model.model.layers.25.input_layernorm
 
202
  language_model.model.layers.25.mlp
203
+ language_model.model.layers.25.mlp.act_fn
204
+ language_model.model.layers.25.mlp.down_proj
205
  language_model.model.layers.25.mlp.gate_proj
206
  language_model.model.layers.25.mlp.up_proj
 
 
 
207
  language_model.model.layers.25.post_attention_layernorm
208
+ language_model.model.layers.25.self_attn.k_proj
209
+ language_model.model.layers.25.self_attn.o_proj
210
+ language_model.model.layers.25.self_attn.q_proj
211
+ language_model.model.layers.25.self_attn.v_proj
212
+ language_model.model.layers.26.input_layernorm
 
213
  language_model.model.layers.26.mlp
214
+ language_model.model.layers.26.mlp.act_fn
215
+ language_model.model.layers.26.mlp.down_proj
216
  language_model.model.layers.26.mlp.gate_proj
217
  language_model.model.layers.26.mlp.up_proj
 
 
 
218
  language_model.model.layers.26.post_attention_layernorm
219
+ language_model.model.layers.26.self_attn.k_proj
220
+ language_model.model.layers.26.self_attn.o_proj
221
+ language_model.model.layers.26.self_attn.q_proj
222
+ language_model.model.layers.26.self_attn.v_proj
223
+ language_model.model.layers.27.input_layernorm
 
224
  language_model.model.layers.27.mlp
225
+ language_model.model.layers.27.mlp.act_fn
226
+ language_model.model.layers.27.mlp.down_proj
227
  language_model.model.layers.27.mlp.gate_proj
228
  language_model.model.layers.27.mlp.up_proj
 
 
 
229
  language_model.model.layers.27.post_attention_layernorm
230
+ language_model.model.layers.27.self_attn.k_proj
231
+ language_model.model.layers.27.self_attn.o_proj
232
+ language_model.model.layers.27.self_attn.q_proj
233
+ language_model.model.layers.27.self_attn.v_proj
234
+ language_model.model.layers.28.input_layernorm
 
235
  language_model.model.layers.28.mlp
236
+ language_model.model.layers.28.mlp.act_fn
237
+ language_model.model.layers.28.mlp.down_proj
238
  language_model.model.layers.28.mlp.gate_proj
239
  language_model.model.layers.28.mlp.up_proj
 
 
 
240
  language_model.model.layers.28.post_attention_layernorm
241
+ language_model.model.layers.28.self_attn.k_proj
242
+ language_model.model.layers.28.self_attn.o_proj
243
+ language_model.model.layers.28.self_attn.q_proj
244
+ language_model.model.layers.28.self_attn.v_proj
245
+ language_model.model.layers.29.input_layernorm
 
246
  language_model.model.layers.29.mlp
247
+ language_model.model.layers.29.mlp.act_fn
248
+ language_model.model.layers.29.mlp.down_proj
249
  language_model.model.layers.29.mlp.gate_proj
250
  language_model.model.layers.29.mlp.up_proj
 
 
 
251
  language_model.model.layers.29.post_attention_layernorm
252
+ language_model.model.layers.29.self_attn.k_proj
253
+ language_model.model.layers.29.self_attn.o_proj
254
+ language_model.model.layers.29.self_attn.q_proj
255
+ language_model.model.layers.29.self_attn.v_proj
256
+ language_model.model.layers.3.input_layernorm
257
+ language_model.model.layers.3.mlp
258
+ language_model.model.layers.3.mlp.act_fn
259
+ language_model.model.layers.3.mlp.down_proj
260
+ language_model.model.layers.3.mlp.gate_proj
261
+ language_model.model.layers.3.mlp.up_proj
262
+ language_model.model.layers.3.post_attention_layernorm
263
+ language_model.model.layers.3.self_attn.k_proj
264
+ language_model.model.layers.3.self_attn.o_proj
265
+ language_model.model.layers.3.self_attn.q_proj
266
+ language_model.model.layers.3.self_attn.v_proj
267
+ language_model.model.layers.30.input_layernorm
268
  language_model.model.layers.30.mlp
269
+ language_model.model.layers.30.mlp.act_fn
270
+ language_model.model.layers.30.mlp.down_proj
271
  language_model.model.layers.30.mlp.gate_proj
272
  language_model.model.layers.30.mlp.up_proj
 
 
 
273
  language_model.model.layers.30.post_attention_layernorm
274
+ language_model.model.layers.30.self_attn.k_proj
275
+ language_model.model.layers.30.self_attn.o_proj
276
+ language_model.model.layers.30.self_attn.q_proj
277
+ language_model.model.layers.30.self_attn.v_proj
278
+ language_model.model.layers.31.input_layernorm
 
279
  language_model.model.layers.31.mlp
280
+ language_model.model.layers.31.mlp.act_fn
281
+ language_model.model.layers.31.mlp.down_proj
282
  language_model.model.layers.31.mlp.gate_proj
283
  language_model.model.layers.31.mlp.up_proj
 
 
 
284
  language_model.model.layers.31.post_attention_layernorm
285
+ language_model.model.layers.31.self_attn.k_proj
286
+ language_model.model.layers.31.self_attn.o_proj
287
+ language_model.model.layers.31.self_attn.q_proj
288
+ language_model.model.layers.31.self_attn.v_proj
289
+ language_model.model.layers.4.input_layernorm
290
+ language_model.model.layers.4.mlp
291
+ language_model.model.layers.4.mlp.act_fn
292
+ language_model.model.layers.4.mlp.down_proj
293
+ language_model.model.layers.4.mlp.gate_proj
294
+ language_model.model.layers.4.mlp.up_proj
295
+ language_model.model.layers.4.post_attention_layernorm
296
+ language_model.model.layers.4.self_attn.k_proj
297
+ language_model.model.layers.4.self_attn.o_proj
298
+ language_model.model.layers.4.self_attn.q_proj
299
+ language_model.model.layers.4.self_attn.v_proj
300
+ language_model.model.layers.5.input_layernorm
301
+ language_model.model.layers.5.mlp
302
+ language_model.model.layers.5.mlp.act_fn
303
+ language_model.model.layers.5.mlp.down_proj
304
+ language_model.model.layers.5.mlp.gate_proj
305
+ language_model.model.layers.5.mlp.up_proj
306
+ language_model.model.layers.5.post_attention_layernorm
307
+ language_model.model.layers.5.self_attn.k_proj
308
+ language_model.model.layers.5.self_attn.o_proj
309
+ language_model.model.layers.5.self_attn.q_proj
310
+ language_model.model.layers.5.self_attn.v_proj
311
+ language_model.model.layers.6.input_layernorm
312
+ language_model.model.layers.6.mlp
313
+ language_model.model.layers.6.mlp.act_fn
314
+ language_model.model.layers.6.mlp.down_proj
315
+ language_model.model.layers.6.mlp.gate_proj
316
+ language_model.model.layers.6.mlp.up_proj
317
+ language_model.model.layers.6.post_attention_layernorm
318
+ language_model.model.layers.6.self_attn.k_proj
319
+ language_model.model.layers.6.self_attn.o_proj
320
+ language_model.model.layers.6.self_attn.q_proj
321
+ language_model.model.layers.6.self_attn.v_proj
322
+ language_model.model.layers.7.input_layernorm
323
+ language_model.model.layers.7.mlp
324
+ language_model.model.layers.7.mlp.act_fn
325
+ language_model.model.layers.7.mlp.down_proj
326
+ language_model.model.layers.7.mlp.gate_proj
327
+ language_model.model.layers.7.mlp.up_proj
328
+ language_model.model.layers.7.post_attention_layernorm
329
+ language_model.model.layers.7.self_attn.k_proj
330
+ language_model.model.layers.7.self_attn.o_proj
331
+ language_model.model.layers.7.self_attn.q_proj
332
+ language_model.model.layers.7.self_attn.v_proj
333
+ language_model.model.layers.8.input_layernorm
334
+ language_model.model.layers.8.mlp
335
+ language_model.model.layers.8.mlp.act_fn
336
+ language_model.model.layers.8.mlp.down_proj
337
+ language_model.model.layers.8.mlp.gate_proj
338
+ language_model.model.layers.8.mlp.up_proj
339
+ language_model.model.layers.8.post_attention_layernorm
340
+ language_model.model.layers.8.self_attn.k_proj
341
+ language_model.model.layers.8.self_attn.o_proj
342
+ language_model.model.layers.8.self_attn.q_proj
343
+ language_model.model.layers.8.self_attn.v_proj
344
+ language_model.model.layers.9.input_layernorm
345
+ language_model.model.layers.9.mlp
346
+ language_model.model.layers.9.mlp.act_fn
347
+ language_model.model.layers.9.mlp.down_proj
348
+ language_model.model.layers.9.mlp.gate_proj
349
+ language_model.model.layers.9.mlp.up_proj
350
+ language_model.model.layers.9.post_attention_layernorm
351
+ language_model.model.layers.9.self_attn.k_proj
352
+ language_model.model.layers.9.self_attn.o_proj
353
+ language_model.model.layers.9.self_attn.q_proj
354
+ language_model.model.layers.9.self_attn.v_proj
355
  language_model.model.norm
356
+ multi_modal_projector
357
+ multi_modal_projector.act
358
+ multi_modal_projector.linear_1
359
+ multi_modal_projector.linear_2
360
+ vision_tower.vision_model.embeddings
361
+ vision_tower.vision_model.embeddings.patch_embedding
362
+ vision_tower.vision_model.embeddings.position_embedding
363
+ vision_tower.vision_model.encoder.layers.0.layer_norm1
364
+ vision_tower.vision_model.encoder.layers.0.layer_norm2
365
+ vision_tower.vision_model.encoder.layers.0.mlp
366
+ vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
367
+ vision_tower.vision_model.encoder.layers.0.mlp.fc1
368
+ vision_tower.vision_model.encoder.layers.0.mlp.fc2
369
+ vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
370
+ vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
371
+ vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
372
+ vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
373
+ vision_tower.vision_model.encoder.layers.1.layer_norm1
374
+ vision_tower.vision_model.encoder.layers.1.layer_norm2
375
+ vision_tower.vision_model.encoder.layers.1.mlp
376
+ vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
377
+ vision_tower.vision_model.encoder.layers.1.mlp.fc1
378
+ vision_tower.vision_model.encoder.layers.1.mlp.fc2
379
+ vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
380
+ vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
381
+ vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
382
+ vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
383
+ vision_tower.vision_model.encoder.layers.10.layer_norm1
384
+ vision_tower.vision_model.encoder.layers.10.layer_norm2
385
+ vision_tower.vision_model.encoder.layers.10.mlp
386
+ vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
387
+ vision_tower.vision_model.encoder.layers.10.mlp.fc1
388
+ vision_tower.vision_model.encoder.layers.10.mlp.fc2
389
+ vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
390
+ vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
391
+ vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
392
+ vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
393
+ vision_tower.vision_model.encoder.layers.11.layer_norm1
394
+ vision_tower.vision_model.encoder.layers.11.layer_norm2
395
+ vision_tower.vision_model.encoder.layers.11.mlp
396
+ vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
397
+ vision_tower.vision_model.encoder.layers.11.mlp.fc1
398
+ vision_tower.vision_model.encoder.layers.11.mlp.fc2
399
+ vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
400
+ vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
401
+ vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
402
+ vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
403
+ vision_tower.vision_model.encoder.layers.12.layer_norm1
404
+ vision_tower.vision_model.encoder.layers.12.layer_norm2
405
+ vision_tower.vision_model.encoder.layers.12.mlp
406
+ vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
407
+ vision_tower.vision_model.encoder.layers.12.mlp.fc1
408
+ vision_tower.vision_model.encoder.layers.12.mlp.fc2
409
+ vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
410
+ vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
411
+ vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
412
+ vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
413
+ vision_tower.vision_model.encoder.layers.13.layer_norm1
414
+ vision_tower.vision_model.encoder.layers.13.layer_norm2
415
+ vision_tower.vision_model.encoder.layers.13.mlp
416
+ vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
417
+ vision_tower.vision_model.encoder.layers.13.mlp.fc1
418
+ vision_tower.vision_model.encoder.layers.13.mlp.fc2
419
+ vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
420
+ vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
421
+ vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
422
+ vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
423
+ vision_tower.vision_model.encoder.layers.14.layer_norm1
424
+ vision_tower.vision_model.encoder.layers.14.layer_norm2
425
+ vision_tower.vision_model.encoder.layers.14.mlp
426
+ vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
427
+ vision_tower.vision_model.encoder.layers.14.mlp.fc1
428
+ vision_tower.vision_model.encoder.layers.14.mlp.fc2
429
+ vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
430
+ vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
431
+ vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
432
+ vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
433
+ vision_tower.vision_model.encoder.layers.15.layer_norm1
434
+ vision_tower.vision_model.encoder.layers.15.layer_norm2
435
+ vision_tower.vision_model.encoder.layers.15.mlp
436
+ vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
437
+ vision_tower.vision_model.encoder.layers.15.mlp.fc1
438
+ vision_tower.vision_model.encoder.layers.15.mlp.fc2
439
+ vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
440
+ vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
441
+ vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
442
+ vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
443
+ vision_tower.vision_model.encoder.layers.16.layer_norm1
444
+ vision_tower.vision_model.encoder.layers.16.layer_norm2
445
+ vision_tower.vision_model.encoder.layers.16.mlp
446
+ vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
447
+ vision_tower.vision_model.encoder.layers.16.mlp.fc1
448
+ vision_tower.vision_model.encoder.layers.16.mlp.fc2
449
+ vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
450
+ vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
451
+ vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
452
+ vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
453
+ vision_tower.vision_model.encoder.layers.17.layer_norm1
454
+ vision_tower.vision_model.encoder.layers.17.layer_norm2
455
+ vision_tower.vision_model.encoder.layers.17.mlp
456
+ vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
457
+ vision_tower.vision_model.encoder.layers.17.mlp.fc1
458
+ vision_tower.vision_model.encoder.layers.17.mlp.fc2
459
+ vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
460
+ vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
461
+ vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
462
+ vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
463
+ vision_tower.vision_model.encoder.layers.18.layer_norm1
464
+ vision_tower.vision_model.encoder.layers.18.layer_norm2
465
+ vision_tower.vision_model.encoder.layers.18.mlp
466
+ vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
467
+ vision_tower.vision_model.encoder.layers.18.mlp.fc1
468
+ vision_tower.vision_model.encoder.layers.18.mlp.fc2
469
+ vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
470
+ vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
471
+ vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
472
+ vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
473
+ vision_tower.vision_model.encoder.layers.19.layer_norm1
474
+ vision_tower.vision_model.encoder.layers.19.layer_norm2
475
+ vision_tower.vision_model.encoder.layers.19.mlp
476
+ vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
477
+ vision_tower.vision_model.encoder.layers.19.mlp.fc1
478
+ vision_tower.vision_model.encoder.layers.19.mlp.fc2
479
+ vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
480
+ vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
481
+ vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
482
+ vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
483
+ vision_tower.vision_model.encoder.layers.2.layer_norm1
484
+ vision_tower.vision_model.encoder.layers.2.layer_norm2
485
+ vision_tower.vision_model.encoder.layers.2.mlp
486
+ vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
487
+ vision_tower.vision_model.encoder.layers.2.mlp.fc1
488
+ vision_tower.vision_model.encoder.layers.2.mlp.fc2
489
+ vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
490
+ vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
491
+ vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
492
+ vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
493
+ vision_tower.vision_model.encoder.layers.20.layer_norm1
494
+ vision_tower.vision_model.encoder.layers.20.layer_norm2
495
+ vision_tower.vision_model.encoder.layers.20.mlp
496
+ vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
497
+ vision_tower.vision_model.encoder.layers.20.mlp.fc1
498
+ vision_tower.vision_model.encoder.layers.20.mlp.fc2
499
+ vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
500
+ vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
501
+ vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
502
+ vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
503
+ vision_tower.vision_model.encoder.layers.21.layer_norm1
504
+ vision_tower.vision_model.encoder.layers.21.layer_norm2
505
+ vision_tower.vision_model.encoder.layers.21.mlp
506
+ vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
507
+ vision_tower.vision_model.encoder.layers.21.mlp.fc1
508
+ vision_tower.vision_model.encoder.layers.21.mlp.fc2
509
+ vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
510
+ vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
511
+ vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
512
+ vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
513
+ vision_tower.vision_model.encoder.layers.22.layer_norm1
514
+ vision_tower.vision_model.encoder.layers.22.layer_norm2
515
+ vision_tower.vision_model.encoder.layers.22.mlp
516
+ vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
517
+ vision_tower.vision_model.encoder.layers.22.mlp.fc1
518
+ vision_tower.vision_model.encoder.layers.22.mlp.fc2
519
+ vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
520
+ vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
521
+ vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
522
+ vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
523
+ vision_tower.vision_model.encoder.layers.23.layer_norm1
524
+ vision_tower.vision_model.encoder.layers.23.layer_norm2
525
+ vision_tower.vision_model.encoder.layers.23.mlp
526
+ vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
527
+ vision_tower.vision_model.encoder.layers.23.mlp.fc1
528
+ vision_tower.vision_model.encoder.layers.23.mlp.fc2
529
+ vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
530
+ vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
531
+ vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
532
+ vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
533
+ vision_tower.vision_model.encoder.layers.3.layer_norm1
534
+ vision_tower.vision_model.encoder.layers.3.layer_norm2
535
+ vision_tower.vision_model.encoder.layers.3.mlp
536
+ vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
537
+ vision_tower.vision_model.encoder.layers.3.mlp.fc1
538
+ vision_tower.vision_model.encoder.layers.3.mlp.fc2
539
+ vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
540
+ vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
541
+ vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
542
+ vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
543
+ vision_tower.vision_model.encoder.layers.4.layer_norm1
544
+ vision_tower.vision_model.encoder.layers.4.layer_norm2
545
+ vision_tower.vision_model.encoder.layers.4.mlp
546
+ vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
547
+ vision_tower.vision_model.encoder.layers.4.mlp.fc1
548
+ vision_tower.vision_model.encoder.layers.4.mlp.fc2
549
+ vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
550
+ vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
551
+ vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
552
+ vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
553
+ vision_tower.vision_model.encoder.layers.5.layer_norm1
554
+ vision_tower.vision_model.encoder.layers.5.layer_norm2
555
+ vision_tower.vision_model.encoder.layers.5.mlp
556
+ vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
557
+ vision_tower.vision_model.encoder.layers.5.mlp.fc1
558
+ vision_tower.vision_model.encoder.layers.5.mlp.fc2
559
+ vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
560
+ vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
561
+ vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
562
+ vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
563
+ vision_tower.vision_model.encoder.layers.6.layer_norm1
564
+ vision_tower.vision_model.encoder.layers.6.layer_norm2
565
+ vision_tower.vision_model.encoder.layers.6.mlp
566
+ vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
567
+ vision_tower.vision_model.encoder.layers.6.mlp.fc1
568
+ vision_tower.vision_model.encoder.layers.6.mlp.fc2
569
+ vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
570
+ vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
571
+ vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
572
+ vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
573
+ vision_tower.vision_model.encoder.layers.7.layer_norm1
574
+ vision_tower.vision_model.encoder.layers.7.layer_norm2
575
+ vision_tower.vision_model.encoder.layers.7.mlp
576
+ vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
577
+ vision_tower.vision_model.encoder.layers.7.mlp.fc1
578
+ vision_tower.vision_model.encoder.layers.7.mlp.fc2
579
+ vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
580
+ vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
581
+ vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
582
+ vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
583
+ vision_tower.vision_model.encoder.layers.8.layer_norm1
584
+ vision_tower.vision_model.encoder.layers.8.layer_norm2
585
+ vision_tower.vision_model.encoder.layers.8.mlp
586
+ vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
587
+ vision_tower.vision_model.encoder.layers.8.mlp.fc1
588
+ vision_tower.vision_model.encoder.layers.8.mlp.fc2
589
+ vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
590
+ vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
591
+ vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
592
+ vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
593
+ vision_tower.vision_model.encoder.layers.9.layer_norm1
594
+ vision_tower.vision_model.encoder.layers.9.layer_norm2
595
+ vision_tower.vision_model.encoder.layers.9.mlp
596
+ vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
597
+ vision_tower.vision_model.encoder.layers.9.mlp.fc1
598
+ vision_tower.vision_model.encoder.layers.9.mlp.fc2
599
+ vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
600
+ vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
601
+ vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
602
+ vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
603
+ vision_tower.vision_model.post_layernorm
604
+ vision_tower.vision_model.pre_layrnorm
demo/logs/mistralai/Pixtral-12B-2409.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/openai/clip-vit-base-patch32.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not generative.
demo/logs/openbmb/MiniCPM-o-2_6.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/paligemma/paligemma-3b.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/logs/wonderwind271/MiniCPM-V-2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Note: This model is not compatible with the base environment or requires agreement to privacy policy. Please launch it locally instead.
demo/lookup.py CHANGED
@@ -8,7 +8,7 @@ from typing import Tuple
8
  from src.models.config import ModelSelection
9
 
10
  REPO_ROOT = Path(__file__).resolve().parents[1]
11
- SPECS_DIR = Path(os.getenv('MODEL_SPECS_DIR', REPO_ROOT / 'logs'))
12
 
13
  # TODO: To store local model weights in the repo, also define:
14
  # MODELS_DIR = Path(os.getenv('MODELS_DIR', REPO_ROOT / 'checkpoints'))
@@ -16,22 +16,22 @@ SPECS_DIR = Path(os.getenv('MODEL_SPECS_DIR', REPO_ROOT / 'logs'))
16
 
17
  class ModelVariants(str, Enum):
18
  """Enum that contains all possible model variants."""
19
- AYA_VISION_8B = 'aya-vision-8b'
20
  BLIP2_3B = 'blip2-opt-2.7b'
 
 
 
 
21
  COGVLM_17B = 'cogvlm-17b'
22
  GLAMM_7B = 'glamm-7b'
23
  INTERNLM_XC_25_7B = 'internlm-xcomposer2.5-7b'
24
  INTERNVL_25_8B = 'internvl-2.5-8b'
25
  JANUS_1B = 'janus-pro-1b'
26
- LLAVA_15_7B = 'llava-1.5-7b'
27
  MINICPM_O_26_8B = 'minicpm-o-2.6-8b'
28
  MINICPM_V_20_3B = 'minicpm-v-2.0-2.8b'
29
  MOLMO_7B = 'molmo-7b'
30
  PALIGEMMA_3B = 'paligemma-3b'
31
  PIXTRAL_12B = 'pixtral-12b'
32
  PERCEPTION_LM_1B = 'perception-lm-1b'
33
- QWENVL_20_2B = 'qwen2-vl-2b-instruct'
34
- QWENVL_20_7B = 'qwen2-vl-7b-instruct'
35
  # TODO: Add more models here as needed.
36
 
37
 
 
8
  from src.models.config import ModelSelection
9
 
10
  REPO_ROOT = Path(__file__).resolve().parents[1]
11
+ SPECS_DIR = Path(os.getenv('MODEL_SPECS_DIR', REPO_ROOT / 'demo' / 'layers'))
12
 
13
  # TODO: To store local model weights in the repo, also define:
14
  # MODELS_DIR = Path(os.getenv('MODELS_DIR', REPO_ROOT / 'checkpoints'))
 
16
 
17
  class ModelVariants(str, Enum):
18
  """Enum that contains all possible model variants."""
 
19
  BLIP2_3B = 'blip2-opt-2.7b'
20
+ LLAVA_15_7B = 'llava-1.5-7b'
21
+ QWENVL_20_2B = 'qwen2-vl-2b-instruct'
22
+ QWENVL_20_7B = 'qwen2-vl-7b-instruct'
23
+ AYA_VISION_8B = 'aya-vision-8b'
24
  COGVLM_17B = 'cogvlm-17b'
25
  GLAMM_7B = 'glamm-7b'
26
  INTERNLM_XC_25_7B = 'internlm-xcomposer2.5-7b'
27
  INTERNVL_25_8B = 'internvl-2.5-8b'
28
  JANUS_1B = 'janus-pro-1b'
 
29
  MINICPM_O_26_8B = 'minicpm-o-2.6-8b'
30
  MINICPM_V_20_3B = 'minicpm-v-2.0-2.8b'
31
  MOLMO_7B = 'molmo-7b'
32
  PALIGEMMA_3B = 'paligemma-3b'
33
  PIXTRAL_12B = 'pixtral-12b'
34
  PERCEPTION_LM_1B = 'perception-lm-1b'
 
 
35
  # TODO: Add more models here as needed.
36
 
37
 
logs/CohereLabs/aya-vision-8b.txt DELETED
@@ -1,729 +0,0 @@
1
-
2
- vision_tower
3
- vision_tower.vision_model
4
- vision_tower.vision_model.embeddings
5
- vision_tower.vision_model.embeddings.patch_embedding
6
- vision_tower.vision_model.embeddings.position_embedding
7
- vision_tower.vision_model.encoder
8
- vision_tower.vision_model.encoder.layers
9
- vision_tower.vision_model.encoder.layers.0
10
- vision_tower.vision_model.encoder.layers.0.self_attn
11
- vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
12
- vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
13
- vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
14
- vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
15
- vision_tower.vision_model.encoder.layers.0.layer_norm1
16
- vision_tower.vision_model.encoder.layers.0.mlp
17
- vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
18
- vision_tower.vision_model.encoder.layers.0.mlp.fc1
19
- vision_tower.vision_model.encoder.layers.0.mlp.fc2
20
- vision_tower.vision_model.encoder.layers.0.layer_norm2
21
- vision_tower.vision_model.encoder.layers.1
22
- vision_tower.vision_model.encoder.layers.1.self_attn
23
- vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
24
- vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
25
- vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
26
- vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
27
- vision_tower.vision_model.encoder.layers.1.layer_norm1
28
- vision_tower.vision_model.encoder.layers.1.mlp
29
- vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
30
- vision_tower.vision_model.encoder.layers.1.mlp.fc1
31
- vision_tower.vision_model.encoder.layers.1.mlp.fc2
32
- vision_tower.vision_model.encoder.layers.1.layer_norm2
33
- vision_tower.vision_model.encoder.layers.2
34
- vision_tower.vision_model.encoder.layers.2.self_attn
35
- vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
36
- vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
37
- vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
38
- vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
39
- vision_tower.vision_model.encoder.layers.2.layer_norm1
40
- vision_tower.vision_model.encoder.layers.2.mlp
41
- vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
42
- vision_tower.vision_model.encoder.layers.2.mlp.fc1
43
- vision_tower.vision_model.encoder.layers.2.mlp.fc2
44
- vision_tower.vision_model.encoder.layers.2.layer_norm2
45
- vision_tower.vision_model.encoder.layers.3
46
- vision_tower.vision_model.encoder.layers.3.self_attn
47
- vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
48
- vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
49
- vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
50
- vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
51
- vision_tower.vision_model.encoder.layers.3.layer_norm1
52
- vision_tower.vision_model.encoder.layers.3.mlp
53
- vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
54
- vision_tower.vision_model.encoder.layers.3.mlp.fc1
55
- vision_tower.vision_model.encoder.layers.3.mlp.fc2
56
- vision_tower.vision_model.encoder.layers.3.layer_norm2
57
- vision_tower.vision_model.encoder.layers.4
58
- vision_tower.vision_model.encoder.layers.4.self_attn
59
- vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
60
- vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
61
- vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
62
- vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
63
- vision_tower.vision_model.encoder.layers.4.layer_norm1
64
- vision_tower.vision_model.encoder.layers.4.mlp
65
- vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
66
- vision_tower.vision_model.encoder.layers.4.mlp.fc1
67
- vision_tower.vision_model.encoder.layers.4.mlp.fc2
68
- vision_tower.vision_model.encoder.layers.4.layer_norm2
69
- vision_tower.vision_model.encoder.layers.5
70
- vision_tower.vision_model.encoder.layers.5.self_attn
71
- vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
72
- vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
73
- vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
74
- vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
75
- vision_tower.vision_model.encoder.layers.5.layer_norm1
76
- vision_tower.vision_model.encoder.layers.5.mlp
77
- vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
78
- vision_tower.vision_model.encoder.layers.5.mlp.fc1
79
- vision_tower.vision_model.encoder.layers.5.mlp.fc2
80
- vision_tower.vision_model.encoder.layers.5.layer_norm2
81
- vision_tower.vision_model.encoder.layers.6
82
- vision_tower.vision_model.encoder.layers.6.self_attn
83
- vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
84
- vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
85
- vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
86
- vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
87
- vision_tower.vision_model.encoder.layers.6.layer_norm1
88
- vision_tower.vision_model.encoder.layers.6.mlp
89
- vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
90
- vision_tower.vision_model.encoder.layers.6.mlp.fc1
91
- vision_tower.vision_model.encoder.layers.6.mlp.fc2
92
- vision_tower.vision_model.encoder.layers.6.layer_norm2
93
- vision_tower.vision_model.encoder.layers.7
94
- vision_tower.vision_model.encoder.layers.7.self_attn
95
- vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
96
- vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
97
- vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
98
- vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
99
- vision_tower.vision_model.encoder.layers.7.layer_norm1
100
- vision_tower.vision_model.encoder.layers.7.mlp
101
- vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
102
- vision_tower.vision_model.encoder.layers.7.mlp.fc1
103
- vision_tower.vision_model.encoder.layers.7.mlp.fc2
104
- vision_tower.vision_model.encoder.layers.7.layer_norm2
105
- vision_tower.vision_model.encoder.layers.8
106
- vision_tower.vision_model.encoder.layers.8.self_attn
107
- vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
108
- vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
109
- vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
110
- vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
111
- vision_tower.vision_model.encoder.layers.8.layer_norm1
112
- vision_tower.vision_model.encoder.layers.8.mlp
113
- vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
114
- vision_tower.vision_model.encoder.layers.8.mlp.fc1
115
- vision_tower.vision_model.encoder.layers.8.mlp.fc2
116
- vision_tower.vision_model.encoder.layers.8.layer_norm2
117
- vision_tower.vision_model.encoder.layers.9
118
- vision_tower.vision_model.encoder.layers.9.self_attn
119
- vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
120
- vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
121
- vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
122
- vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
123
- vision_tower.vision_model.encoder.layers.9.layer_norm1
124
- vision_tower.vision_model.encoder.layers.9.mlp
125
- vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
126
- vision_tower.vision_model.encoder.layers.9.mlp.fc1
127
- vision_tower.vision_model.encoder.layers.9.mlp.fc2
128
- vision_tower.vision_model.encoder.layers.9.layer_norm2
129
- vision_tower.vision_model.encoder.layers.10
130
- vision_tower.vision_model.encoder.layers.10.self_attn
131
- vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
132
- vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
133
- vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
134
- vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
135
- vision_tower.vision_model.encoder.layers.10.layer_norm1
136
- vision_tower.vision_model.encoder.layers.10.mlp
137
- vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
138
- vision_tower.vision_model.encoder.layers.10.mlp.fc1
139
- vision_tower.vision_model.encoder.layers.10.mlp.fc2
140
- vision_tower.vision_model.encoder.layers.10.layer_norm2
141
- vision_tower.vision_model.encoder.layers.11
142
- vision_tower.vision_model.encoder.layers.11.self_attn
143
- vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
144
- vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
145
- vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
146
- vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
147
- vision_tower.vision_model.encoder.layers.11.layer_norm1
148
- vision_tower.vision_model.encoder.layers.11.mlp
149
- vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
150
- vision_tower.vision_model.encoder.layers.11.mlp.fc1
151
- vision_tower.vision_model.encoder.layers.11.mlp.fc2
152
- vision_tower.vision_model.encoder.layers.11.layer_norm2
153
- vision_tower.vision_model.encoder.layers.12
154
- vision_tower.vision_model.encoder.layers.12.self_attn
155
- vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
156
- vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
157
- vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
158
- vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
159
- vision_tower.vision_model.encoder.layers.12.layer_norm1
160
- vision_tower.vision_model.encoder.layers.12.mlp
161
- vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
162
- vision_tower.vision_model.encoder.layers.12.mlp.fc1
163
- vision_tower.vision_model.encoder.layers.12.mlp.fc2
164
- vision_tower.vision_model.encoder.layers.12.layer_norm2
165
- vision_tower.vision_model.encoder.layers.13
166
- vision_tower.vision_model.encoder.layers.13.self_attn
167
- vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
168
- vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
169
- vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
170
- vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
171
- vision_tower.vision_model.encoder.layers.13.layer_norm1
172
- vision_tower.vision_model.encoder.layers.13.mlp
173
- vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
174
- vision_tower.vision_model.encoder.layers.13.mlp.fc1
175
- vision_tower.vision_model.encoder.layers.13.mlp.fc2
176
- vision_tower.vision_model.encoder.layers.13.layer_norm2
177
- vision_tower.vision_model.encoder.layers.14
178
- vision_tower.vision_model.encoder.layers.14.self_attn
179
- vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
180
- vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
181
- vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
182
- vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
183
- vision_tower.vision_model.encoder.layers.14.layer_norm1
184
- vision_tower.vision_model.encoder.layers.14.mlp
185
- vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
186
- vision_tower.vision_model.encoder.layers.14.mlp.fc1
187
- vision_tower.vision_model.encoder.layers.14.mlp.fc2
188
- vision_tower.vision_model.encoder.layers.14.layer_norm2
189
- vision_tower.vision_model.encoder.layers.15
190
- vision_tower.vision_model.encoder.layers.15.self_attn
191
- vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
192
- vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
193
- vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
194
- vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
195
- vision_tower.vision_model.encoder.layers.15.layer_norm1
196
- vision_tower.vision_model.encoder.layers.15.mlp
197
- vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
198
- vision_tower.vision_model.encoder.layers.15.mlp.fc1
199
- vision_tower.vision_model.encoder.layers.15.mlp.fc2
200
- vision_tower.vision_model.encoder.layers.15.layer_norm2
201
- vision_tower.vision_model.encoder.layers.16
202
- vision_tower.vision_model.encoder.layers.16.self_attn
203
- vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
204
- vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
205
- vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
206
- vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
207
- vision_tower.vision_model.encoder.layers.16.layer_norm1
208
- vision_tower.vision_model.encoder.layers.16.mlp
209
- vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
210
- vision_tower.vision_model.encoder.layers.16.mlp.fc1
211
- vision_tower.vision_model.encoder.layers.16.mlp.fc2
212
- vision_tower.vision_model.encoder.layers.16.layer_norm2
213
- vision_tower.vision_model.encoder.layers.17
214
- vision_tower.vision_model.encoder.layers.17.self_attn
215
- vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
216
- vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
217
- vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
218
- vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
219
- vision_tower.vision_model.encoder.layers.17.layer_norm1
220
- vision_tower.vision_model.encoder.layers.17.mlp
221
- vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
222
- vision_tower.vision_model.encoder.layers.17.mlp.fc1
223
- vision_tower.vision_model.encoder.layers.17.mlp.fc2
224
- vision_tower.vision_model.encoder.layers.17.layer_norm2
225
- vision_tower.vision_model.encoder.layers.18
226
- vision_tower.vision_model.encoder.layers.18.self_attn
227
- vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
228
- vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
229
- vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
230
- vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
231
- vision_tower.vision_model.encoder.layers.18.layer_norm1
232
- vision_tower.vision_model.encoder.layers.18.mlp
233
- vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
234
- vision_tower.vision_model.encoder.layers.18.mlp.fc1
235
- vision_tower.vision_model.encoder.layers.18.mlp.fc2
236
- vision_tower.vision_model.encoder.layers.18.layer_norm2
237
- vision_tower.vision_model.encoder.layers.19
238
- vision_tower.vision_model.encoder.layers.19.self_attn
239
- vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
240
- vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
241
- vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
242
- vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
243
- vision_tower.vision_model.encoder.layers.19.layer_norm1
244
- vision_tower.vision_model.encoder.layers.19.mlp
245
- vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
246
- vision_tower.vision_model.encoder.layers.19.mlp.fc1
247
- vision_tower.vision_model.encoder.layers.19.mlp.fc2
248
- vision_tower.vision_model.encoder.layers.19.layer_norm2
249
- vision_tower.vision_model.encoder.layers.20
250
- vision_tower.vision_model.encoder.layers.20.self_attn
251
- vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
252
- vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
253
- vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
254
- vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
255
- vision_tower.vision_model.encoder.layers.20.layer_norm1
256
- vision_tower.vision_model.encoder.layers.20.mlp
257
- vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
258
- vision_tower.vision_model.encoder.layers.20.mlp.fc1
259
- vision_tower.vision_model.encoder.layers.20.mlp.fc2
260
- vision_tower.vision_model.encoder.layers.20.layer_norm2
261
- vision_tower.vision_model.encoder.layers.21
262
- vision_tower.vision_model.encoder.layers.21.self_attn
263
- vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
264
- vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
265
- vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
266
- vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
267
- vision_tower.vision_model.encoder.layers.21.layer_norm1
268
- vision_tower.vision_model.encoder.layers.21.mlp
269
- vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
270
- vision_tower.vision_model.encoder.layers.21.mlp.fc1
271
- vision_tower.vision_model.encoder.layers.21.mlp.fc2
272
- vision_tower.vision_model.encoder.layers.21.layer_norm2
273
- vision_tower.vision_model.encoder.layers.22
274
- vision_tower.vision_model.encoder.layers.22.self_attn
275
- vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
276
- vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
277
- vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
278
- vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
279
- vision_tower.vision_model.encoder.layers.22.layer_norm1
280
- vision_tower.vision_model.encoder.layers.22.mlp
281
- vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
282
- vision_tower.vision_model.encoder.layers.22.mlp.fc1
283
- vision_tower.vision_model.encoder.layers.22.mlp.fc2
284
- vision_tower.vision_model.encoder.layers.22.layer_norm2
285
- vision_tower.vision_model.encoder.layers.23
286
- vision_tower.vision_model.encoder.layers.23.self_attn
287
- vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
288
- vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
289
- vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
290
- vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
291
- vision_tower.vision_model.encoder.layers.23.layer_norm1
292
- vision_tower.vision_model.encoder.layers.23.mlp
293
- vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
294
- vision_tower.vision_model.encoder.layers.23.mlp.fc1
295
- vision_tower.vision_model.encoder.layers.23.mlp.fc2
296
- vision_tower.vision_model.encoder.layers.23.layer_norm2
297
- vision_tower.vision_model.encoder.layers.24
298
- vision_tower.vision_model.encoder.layers.24.self_attn
299
- vision_tower.vision_model.encoder.layers.24.self_attn.k_proj
300
- vision_tower.vision_model.encoder.layers.24.self_attn.v_proj
301
- vision_tower.vision_model.encoder.layers.24.self_attn.q_proj
302
- vision_tower.vision_model.encoder.layers.24.self_attn.out_proj
303
- vision_tower.vision_model.encoder.layers.24.layer_norm1
304
- vision_tower.vision_model.encoder.layers.24.mlp
305
- vision_tower.vision_model.encoder.layers.24.mlp.activation_fn
306
- vision_tower.vision_model.encoder.layers.24.mlp.fc1
307
- vision_tower.vision_model.encoder.layers.24.mlp.fc2
308
- vision_tower.vision_model.encoder.layers.24.layer_norm2
309
- vision_tower.vision_model.encoder.layers.25
310
- vision_tower.vision_model.encoder.layers.25.self_attn
311
- vision_tower.vision_model.encoder.layers.25.self_attn.k_proj
312
- vision_tower.vision_model.encoder.layers.25.self_attn.v_proj
313
- vision_tower.vision_model.encoder.layers.25.self_attn.q_proj
314
- vision_tower.vision_model.encoder.layers.25.self_attn.out_proj
315
- vision_tower.vision_model.encoder.layers.25.layer_norm1
316
- vision_tower.vision_model.encoder.layers.25.mlp
317
- vision_tower.vision_model.encoder.layers.25.mlp.activation_fn
318
- vision_tower.vision_model.encoder.layers.25.mlp.fc1
319
- vision_tower.vision_model.encoder.layers.25.mlp.fc2
320
- vision_tower.vision_model.encoder.layers.25.layer_norm2
321
- vision_tower.vision_model.encoder.layers.26
322
- vision_tower.vision_model.encoder.layers.26.self_attn
323
- vision_tower.vision_model.encoder.layers.26.self_attn.k_proj
324
- vision_tower.vision_model.encoder.layers.26.self_attn.v_proj
325
- vision_tower.vision_model.encoder.layers.26.self_attn.q_proj
326
- vision_tower.vision_model.encoder.layers.26.self_attn.out_proj
327
- vision_tower.vision_model.encoder.layers.26.layer_norm1
328
- vision_tower.vision_model.encoder.layers.26.mlp
329
- vision_tower.vision_model.encoder.layers.26.mlp.activation_fn
330
- vision_tower.vision_model.encoder.layers.26.mlp.fc1
331
- vision_tower.vision_model.encoder.layers.26.mlp.fc2
332
- vision_tower.vision_model.encoder.layers.26.layer_norm2
333
- vision_tower.vision_model.post_layernorm
334
- multi_modal_projector
335
- multi_modal_projector.layernorm
336
- multi_modal_projector.linear_1
337
- multi_modal_projector.act
338
- multi_modal_projector.linear_2
339
- language_model
340
- language_model.model
341
- language_model.model.embed_tokens
342
- language_model.model.layers
343
- language_model.model.layers.0
344
- language_model.model.layers.0.self_attn
345
- language_model.model.layers.0.self_attn.q_proj
346
- language_model.model.layers.0.self_attn.k_proj
347
- language_model.model.layers.0.self_attn.v_proj
348
- language_model.model.layers.0.self_attn.o_proj
349
- language_model.model.layers.0.mlp
350
- language_model.model.layers.0.mlp.gate_proj
351
- language_model.model.layers.0.mlp.up_proj
352
- language_model.model.layers.0.mlp.down_proj
353
- language_model.model.layers.0.mlp.act_fn
354
- language_model.model.layers.0.input_layernorm
355
- language_model.model.layers.1
356
- language_model.model.layers.1.self_attn
357
- language_model.model.layers.1.self_attn.q_proj
358
- language_model.model.layers.1.self_attn.k_proj
359
- language_model.model.layers.1.self_attn.v_proj
360
- language_model.model.layers.1.self_attn.o_proj
361
- language_model.model.layers.1.mlp
362
- language_model.model.layers.1.mlp.gate_proj
363
- language_model.model.layers.1.mlp.up_proj
364
- language_model.model.layers.1.mlp.down_proj
365
- language_model.model.layers.1.mlp.act_fn
366
- language_model.model.layers.1.input_layernorm
367
- language_model.model.layers.2
368
- language_model.model.layers.2.self_attn
369
- language_model.model.layers.2.self_attn.q_proj
370
- language_model.model.layers.2.self_attn.k_proj
371
- language_model.model.layers.2.self_attn.v_proj
372
- language_model.model.layers.2.self_attn.o_proj
373
- language_model.model.layers.2.mlp
374
- language_model.model.layers.2.mlp.gate_proj
375
- language_model.model.layers.2.mlp.up_proj
376
- language_model.model.layers.2.mlp.down_proj
377
- language_model.model.layers.2.mlp.act_fn
378
- language_model.model.layers.2.input_layernorm
379
- language_model.model.layers.3
380
- language_model.model.layers.3.self_attn
381
- language_model.model.layers.3.self_attn.q_proj
382
- language_model.model.layers.3.self_attn.k_proj
383
- language_model.model.layers.3.self_attn.v_proj
384
- language_model.model.layers.3.self_attn.o_proj
385
- language_model.model.layers.3.mlp
386
- language_model.model.layers.3.mlp.gate_proj
387
- language_model.model.layers.3.mlp.up_proj
388
- language_model.model.layers.3.mlp.down_proj
389
- language_model.model.layers.3.mlp.act_fn
390
- language_model.model.layers.3.input_layernorm
391
- language_model.model.layers.4
392
- language_model.model.layers.4.self_attn
393
- language_model.model.layers.4.self_attn.q_proj
394
- language_model.model.layers.4.self_attn.k_proj
395
- language_model.model.layers.4.self_attn.v_proj
396
- language_model.model.layers.4.self_attn.o_proj
397
- language_model.model.layers.4.mlp
398
- language_model.model.layers.4.mlp.gate_proj
399
- language_model.model.layers.4.mlp.up_proj
400
- language_model.model.layers.4.mlp.down_proj
401
- language_model.model.layers.4.mlp.act_fn
402
- language_model.model.layers.4.input_layernorm
403
- language_model.model.layers.5
404
- language_model.model.layers.5.self_attn
405
- language_model.model.layers.5.self_attn.q_proj
406
- language_model.model.layers.5.self_attn.k_proj
407
- language_model.model.layers.5.self_attn.v_proj
408
- language_model.model.layers.5.self_attn.o_proj
409
- language_model.model.layers.5.mlp
410
- language_model.model.layers.5.mlp.gate_proj
411
- language_model.model.layers.5.mlp.up_proj
412
- language_model.model.layers.5.mlp.down_proj
413
- language_model.model.layers.5.mlp.act_fn
414
- language_model.model.layers.5.input_layernorm
415
- language_model.model.layers.6
416
- language_model.model.layers.6.self_attn
417
- language_model.model.layers.6.self_attn.q_proj
418
- language_model.model.layers.6.self_attn.k_proj
419
- language_model.model.layers.6.self_attn.v_proj
420
- language_model.model.layers.6.self_attn.o_proj
421
- language_model.model.layers.6.mlp
422
- language_model.model.layers.6.mlp.gate_proj
423
- language_model.model.layers.6.mlp.up_proj
424
- language_model.model.layers.6.mlp.down_proj
425
- language_model.model.layers.6.mlp.act_fn
426
- language_model.model.layers.6.input_layernorm
427
- language_model.model.layers.7
428
- language_model.model.layers.7.self_attn
429
- language_model.model.layers.7.self_attn.q_proj
430
- language_model.model.layers.7.self_attn.k_proj
431
- language_model.model.layers.7.self_attn.v_proj
432
- language_model.model.layers.7.self_attn.o_proj
433
- language_model.model.layers.7.mlp
434
- language_model.model.layers.7.mlp.gate_proj
435
- language_model.model.layers.7.mlp.up_proj
436
- language_model.model.layers.7.mlp.down_proj
437
- language_model.model.layers.7.mlp.act_fn
438
- language_model.model.layers.7.input_layernorm
439
- language_model.model.layers.8
440
- language_model.model.layers.8.self_attn
441
- language_model.model.layers.8.self_attn.q_proj
442
- language_model.model.layers.8.self_attn.k_proj
443
- language_model.model.layers.8.self_attn.v_proj
444
- language_model.model.layers.8.self_attn.o_proj
445
- language_model.model.layers.8.mlp
446
- language_model.model.layers.8.mlp.gate_proj
447
- language_model.model.layers.8.mlp.up_proj
448
- language_model.model.layers.8.mlp.down_proj
449
- language_model.model.layers.8.mlp.act_fn
450
- language_model.model.layers.8.input_layernorm
451
- language_model.model.layers.9
452
- language_model.model.layers.9.self_attn
453
- language_model.model.layers.9.self_attn.q_proj
454
- language_model.model.layers.9.self_attn.k_proj
455
- language_model.model.layers.9.self_attn.v_proj
456
- language_model.model.layers.9.self_attn.o_proj
457
- language_model.model.layers.9.mlp
458
- language_model.model.layers.9.mlp.gate_proj
459
- language_model.model.layers.9.mlp.up_proj
460
- language_model.model.layers.9.mlp.down_proj
461
- language_model.model.layers.9.mlp.act_fn
462
- language_model.model.layers.9.input_layernorm
463
- language_model.model.layers.10
464
- language_model.model.layers.10.self_attn
465
- language_model.model.layers.10.self_attn.q_proj
466
- language_model.model.layers.10.self_attn.k_proj
467
- language_model.model.layers.10.self_attn.v_proj
468
- language_model.model.layers.10.self_attn.o_proj
469
- language_model.model.layers.10.mlp
470
- language_model.model.layers.10.mlp.gate_proj
471
- language_model.model.layers.10.mlp.up_proj
472
- language_model.model.layers.10.mlp.down_proj
473
- language_model.model.layers.10.mlp.act_fn
474
- language_model.model.layers.10.input_layernorm
475
- language_model.model.layers.11
476
- language_model.model.layers.11.self_attn
477
- language_model.model.layers.11.self_attn.q_proj
478
- language_model.model.layers.11.self_attn.k_proj
479
- language_model.model.layers.11.self_attn.v_proj
480
- language_model.model.layers.11.self_attn.o_proj
481
- language_model.model.layers.11.mlp
482
- language_model.model.layers.11.mlp.gate_proj
483
- language_model.model.layers.11.mlp.up_proj
484
- language_model.model.layers.11.mlp.down_proj
485
- language_model.model.layers.11.mlp.act_fn
486
- language_model.model.layers.11.input_layernorm
487
- language_model.model.layers.12
488
- language_model.model.layers.12.self_attn
489
- language_model.model.layers.12.self_attn.q_proj
490
- language_model.model.layers.12.self_attn.k_proj
491
- language_model.model.layers.12.self_attn.v_proj
492
- language_model.model.layers.12.self_attn.o_proj
493
- language_model.model.layers.12.mlp
494
- language_model.model.layers.12.mlp.gate_proj
495
- language_model.model.layers.12.mlp.up_proj
496
- language_model.model.layers.12.mlp.down_proj
497
- language_model.model.layers.12.mlp.act_fn
498
- language_model.model.layers.12.input_layernorm
499
- language_model.model.layers.13
500
- language_model.model.layers.13.self_attn
501
- language_model.model.layers.13.self_attn.q_proj
502
- language_model.model.layers.13.self_attn.k_proj
503
- language_model.model.layers.13.self_attn.v_proj
504
- language_model.model.layers.13.self_attn.o_proj
505
- language_model.model.layers.13.mlp
506
- language_model.model.layers.13.mlp.gate_proj
507
- language_model.model.layers.13.mlp.up_proj
508
- language_model.model.layers.13.mlp.down_proj
509
- language_model.model.layers.13.mlp.act_fn
510
- language_model.model.layers.13.input_layernorm
511
- language_model.model.layers.14
512
- language_model.model.layers.14.self_attn
513
- language_model.model.layers.14.self_attn.q_proj
514
- language_model.model.layers.14.self_attn.k_proj
515
- language_model.model.layers.14.self_attn.v_proj
516
- language_model.model.layers.14.self_attn.o_proj
517
- language_model.model.layers.14.mlp
518
- language_model.model.layers.14.mlp.gate_proj
519
- language_model.model.layers.14.mlp.up_proj
520
- language_model.model.layers.14.mlp.down_proj
521
- language_model.model.layers.14.mlp.act_fn
522
- language_model.model.layers.14.input_layernorm
523
- language_model.model.layers.15
524
- language_model.model.layers.15.self_attn
525
- language_model.model.layers.15.self_attn.q_proj
526
- language_model.model.layers.15.self_attn.k_proj
527
- language_model.model.layers.15.self_attn.v_proj
528
- language_model.model.layers.15.self_attn.o_proj
529
- language_model.model.layers.15.mlp
530
- language_model.model.layers.15.mlp.gate_proj
531
- language_model.model.layers.15.mlp.up_proj
532
- language_model.model.layers.15.mlp.down_proj
533
- language_model.model.layers.15.mlp.act_fn
534
- language_model.model.layers.15.input_layernorm
535
- language_model.model.layers.16
536
- language_model.model.layers.16.self_attn
537
- language_model.model.layers.16.self_attn.q_proj
538
- language_model.model.layers.16.self_attn.k_proj
539
- language_model.model.layers.16.self_attn.v_proj
540
- language_model.model.layers.16.self_attn.o_proj
541
- language_model.model.layers.16.mlp
542
- language_model.model.layers.16.mlp.gate_proj
543
- language_model.model.layers.16.mlp.up_proj
544
- language_model.model.layers.16.mlp.down_proj
545
- language_model.model.layers.16.mlp.act_fn
546
- language_model.model.layers.16.input_layernorm
547
- language_model.model.layers.17
548
- language_model.model.layers.17.self_attn
549
- language_model.model.layers.17.self_attn.q_proj
550
- language_model.model.layers.17.self_attn.k_proj
551
- language_model.model.layers.17.self_attn.v_proj
552
- language_model.model.layers.17.self_attn.o_proj
553
- language_model.model.layers.17.mlp
554
- language_model.model.layers.17.mlp.gate_proj
555
- language_model.model.layers.17.mlp.up_proj
556
- language_model.model.layers.17.mlp.down_proj
557
- language_model.model.layers.17.mlp.act_fn
558
- language_model.model.layers.17.input_layernorm
559
- language_model.model.layers.18
560
- language_model.model.layers.18.self_attn
561
- language_model.model.layers.18.self_attn.q_proj
562
- language_model.model.layers.18.self_attn.k_proj
563
- language_model.model.layers.18.self_attn.v_proj
564
- language_model.model.layers.18.self_attn.o_proj
565
- language_model.model.layers.18.mlp
566
- language_model.model.layers.18.mlp.gate_proj
567
- language_model.model.layers.18.mlp.up_proj
568
- language_model.model.layers.18.mlp.down_proj
569
- language_model.model.layers.18.mlp.act_fn
570
- language_model.model.layers.18.input_layernorm
571
- language_model.model.layers.19
572
- language_model.model.layers.19.self_attn
573
- language_model.model.layers.19.self_attn.q_proj
574
- language_model.model.layers.19.self_attn.k_proj
575
- language_model.model.layers.19.self_attn.v_proj
576
- language_model.model.layers.19.self_attn.o_proj
577
- language_model.model.layers.19.mlp
578
- language_model.model.layers.19.mlp.gate_proj
579
- language_model.model.layers.19.mlp.up_proj
580
- language_model.model.layers.19.mlp.down_proj
581
- language_model.model.layers.19.mlp.act_fn
582
- language_model.model.layers.19.input_layernorm
583
- language_model.model.layers.20
584
- language_model.model.layers.20.self_attn
585
- language_model.model.layers.20.self_attn.q_proj
586
- language_model.model.layers.20.self_attn.k_proj
587
- language_model.model.layers.20.self_attn.v_proj
588
- language_model.model.layers.20.self_attn.o_proj
589
- language_model.model.layers.20.mlp
590
- language_model.model.layers.20.mlp.gate_proj
591
- language_model.model.layers.20.mlp.up_proj
592
- language_model.model.layers.20.mlp.down_proj
593
- language_model.model.layers.20.mlp.act_fn
594
- language_model.model.layers.20.input_layernorm
595
- language_model.model.layers.21
596
- language_model.model.layers.21.self_attn
597
- language_model.model.layers.21.self_attn.q_proj
598
- language_model.model.layers.21.self_attn.k_proj
599
- language_model.model.layers.21.self_attn.v_proj
600
- language_model.model.layers.21.self_attn.o_proj
601
- language_model.model.layers.21.mlp
602
- language_model.model.layers.21.mlp.gate_proj
603
- language_model.model.layers.21.mlp.up_proj
604
- language_model.model.layers.21.mlp.down_proj
605
- language_model.model.layers.21.mlp.act_fn
606
- language_model.model.layers.21.input_layernorm
607
- language_model.model.layers.22
608
- language_model.model.layers.22.self_attn
609
- language_model.model.layers.22.self_attn.q_proj
610
- language_model.model.layers.22.self_attn.k_proj
611
- language_model.model.layers.22.self_attn.v_proj
612
- language_model.model.layers.22.self_attn.o_proj
613
- language_model.model.layers.22.mlp
614
- language_model.model.layers.22.mlp.gate_proj
615
- language_model.model.layers.22.mlp.up_proj
616
- language_model.model.layers.22.mlp.down_proj
617
- language_model.model.layers.22.mlp.act_fn
618
- language_model.model.layers.22.input_layernorm
619
- language_model.model.layers.23
620
- language_model.model.layers.23.self_attn
621
- language_model.model.layers.23.self_attn.q_proj
622
- language_model.model.layers.23.self_attn.k_proj
623
- language_model.model.layers.23.self_attn.v_proj
624
- language_model.model.layers.23.self_attn.o_proj
625
- language_model.model.layers.23.mlp
626
- language_model.model.layers.23.mlp.gate_proj
627
- language_model.model.layers.23.mlp.up_proj
628
- language_model.model.layers.23.mlp.down_proj
629
- language_model.model.layers.23.mlp.act_fn
630
- language_model.model.layers.23.input_layernorm
631
- language_model.model.layers.24
632
- language_model.model.layers.24.self_attn
633
- language_model.model.layers.24.self_attn.q_proj
634
- language_model.model.layers.24.self_attn.k_proj
635
- language_model.model.layers.24.self_attn.v_proj
636
- language_model.model.layers.24.self_attn.o_proj
637
- language_model.model.layers.24.mlp
638
- language_model.model.layers.24.mlp.gate_proj
639
- language_model.model.layers.24.mlp.up_proj
640
- language_model.model.layers.24.mlp.down_proj
641
- language_model.model.layers.24.mlp.act_fn
642
- language_model.model.layers.24.input_layernorm
643
- language_model.model.layers.25
644
- language_model.model.layers.25.self_attn
645
- language_model.model.layers.25.self_attn.q_proj
646
- language_model.model.layers.25.self_attn.k_proj
647
- language_model.model.layers.25.self_attn.v_proj
648
- language_model.model.layers.25.self_attn.o_proj
649
- language_model.model.layers.25.mlp
650
- language_model.model.layers.25.mlp.gate_proj
651
- language_model.model.layers.25.mlp.up_proj
652
- language_model.model.layers.25.mlp.down_proj
653
- language_model.model.layers.25.mlp.act_fn
654
- language_model.model.layers.25.input_layernorm
655
- language_model.model.layers.26
656
- language_model.model.layers.26.self_attn
657
- language_model.model.layers.26.self_attn.q_proj
658
- language_model.model.layers.26.self_attn.k_proj
659
- language_model.model.layers.26.self_attn.v_proj
660
- language_model.model.layers.26.self_attn.o_proj
661
- language_model.model.layers.26.mlp
662
- language_model.model.layers.26.mlp.gate_proj
663
- language_model.model.layers.26.mlp.up_proj
664
- language_model.model.layers.26.mlp.down_proj
665
- language_model.model.layers.26.mlp.act_fn
666
- language_model.model.layers.26.input_layernorm
667
- language_model.model.layers.27
668
- language_model.model.layers.27.self_attn
669
- language_model.model.layers.27.self_attn.q_proj
670
- language_model.model.layers.27.self_attn.k_proj
671
- language_model.model.layers.27.self_attn.v_proj
672
- language_model.model.layers.27.self_attn.o_proj
673
- language_model.model.layers.27.mlp
674
- language_model.model.layers.27.mlp.gate_proj
675
- language_model.model.layers.27.mlp.up_proj
676
- language_model.model.layers.27.mlp.down_proj
677
- language_model.model.layers.27.mlp.act_fn
678
- language_model.model.layers.27.input_layernorm
679
- language_model.model.layers.28
680
- language_model.model.layers.28.self_attn
681
- language_model.model.layers.28.self_attn.q_proj
682
- language_model.model.layers.28.self_attn.k_proj
683
- language_model.model.layers.28.self_attn.v_proj
684
- language_model.model.layers.28.self_attn.o_proj
685
- language_model.model.layers.28.mlp
686
- language_model.model.layers.28.mlp.gate_proj
687
- language_model.model.layers.28.mlp.up_proj
688
- language_model.model.layers.28.mlp.down_proj
689
- language_model.model.layers.28.mlp.act_fn
690
- language_model.model.layers.28.input_layernorm
691
- language_model.model.layers.29
692
- language_model.model.layers.29.self_attn
693
- language_model.model.layers.29.self_attn.q_proj
694
- language_model.model.layers.29.self_attn.k_proj
695
- language_model.model.layers.29.self_attn.v_proj
696
- language_model.model.layers.29.self_attn.o_proj
697
- language_model.model.layers.29.mlp
698
- language_model.model.layers.29.mlp.gate_proj
699
- language_model.model.layers.29.mlp.up_proj
700
- language_model.model.layers.29.mlp.down_proj
701
- language_model.model.layers.29.mlp.act_fn
702
- language_model.model.layers.29.input_layernorm
703
- language_model.model.layers.30
704
- language_model.model.layers.30.self_attn
705
- language_model.model.layers.30.self_attn.q_proj
706
- language_model.model.layers.30.self_attn.k_proj
707
- language_model.model.layers.30.self_attn.v_proj
708
- language_model.model.layers.30.self_attn.o_proj
709
- language_model.model.layers.30.mlp
710
- language_model.model.layers.30.mlp.gate_proj
711
- language_model.model.layers.30.mlp.up_proj
712
- language_model.model.layers.30.mlp.down_proj
713
- language_model.model.layers.30.mlp.act_fn
714
- language_model.model.layers.30.input_layernorm
715
- language_model.model.layers.31
716
- language_model.model.layers.31.self_attn
717
- language_model.model.layers.31.self_attn.q_proj
718
- language_model.model.layers.31.self_attn.k_proj
719
- language_model.model.layers.31.self_attn.v_proj
720
- language_model.model.layers.31.self_attn.o_proj
721
- language_model.model.layers.31.mlp
722
- language_model.model.layers.31.mlp.gate_proj
723
- language_model.model.layers.31.mlp.up_proj
724
- language_model.model.layers.31.mlp.down_proj
725
- language_model.model.layers.31.mlp.act_fn
726
- language_model.model.layers.31.input_layernorm
727
- language_model.model.norm
728
- language_model.model.rotary_emb
729
- language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/MBZUAI/GLaMM-FullScope.txt DELETED
@@ -1,950 +0,0 @@
1
- model
2
- model.embed_tokens
3
- model.layers
4
- model.layers.0
5
- model.layers.0.self_attn
6
- model.layers.0.self_attn.q_proj
7
- model.layers.0.self_attn.k_proj
8
- model.layers.0.self_attn.v_proj
9
- model.layers.0.self_attn.o_proj
10
- model.layers.0.self_attn.rotary_emb
11
- model.layers.0.mlp
12
- model.layers.0.mlp.gate_proj
13
- model.layers.0.mlp.down_proj
14
- model.layers.0.mlp.up_proj
15
- model.layers.0.mlp.act_fn
16
- model.layers.0.input_layernorm
17
- model.layers.0.post_attention_layernorm
18
- model.layers.1
19
- model.layers.1.self_attn
20
- model.layers.1.self_attn.q_proj
21
- model.layers.1.self_attn.k_proj
22
- model.layers.1.self_attn.v_proj
23
- model.layers.1.self_attn.o_proj
24
- model.layers.1.self_attn.rotary_emb
25
- model.layers.1.mlp
26
- model.layers.1.mlp.gate_proj
27
- model.layers.1.mlp.down_proj
28
- model.layers.1.mlp.up_proj
29
- model.layers.1.mlp.act_fn
30
- model.layers.1.input_layernorm
31
- model.layers.1.post_attention_layernorm
32
- model.layers.2
33
- model.layers.2.self_attn
34
- model.layers.2.self_attn.q_proj
35
- model.layers.2.self_attn.k_proj
36
- model.layers.2.self_attn.v_proj
37
- model.layers.2.self_attn.o_proj
38
- model.layers.2.self_attn.rotary_emb
39
- model.layers.2.mlp
40
- model.layers.2.mlp.gate_proj
41
- model.layers.2.mlp.down_proj
42
- model.layers.2.mlp.up_proj
43
- model.layers.2.mlp.act_fn
44
- model.layers.2.input_layernorm
45
- model.layers.2.post_attention_layernorm
46
- model.layers.3
47
- model.layers.3.self_attn
48
- model.layers.3.self_attn.q_proj
49
- model.layers.3.self_attn.k_proj
50
- model.layers.3.self_attn.v_proj
51
- model.layers.3.self_attn.o_proj
52
- model.layers.3.self_attn.rotary_emb
53
- model.layers.3.mlp
54
- model.layers.3.mlp.gate_proj
55
- model.layers.3.mlp.down_proj
56
- model.layers.3.mlp.up_proj
57
- model.layers.3.mlp.act_fn
58
- model.layers.3.input_layernorm
59
- model.layers.3.post_attention_layernorm
60
- model.layers.4
61
- model.layers.4.self_attn
62
- model.layers.4.self_attn.q_proj
63
- model.layers.4.self_attn.k_proj
64
- model.layers.4.self_attn.v_proj
65
- model.layers.4.self_attn.o_proj
66
- model.layers.4.self_attn.rotary_emb
67
- model.layers.4.mlp
68
- model.layers.4.mlp.gate_proj
69
- model.layers.4.mlp.down_proj
70
- model.layers.4.mlp.up_proj
71
- model.layers.4.mlp.act_fn
72
- model.layers.4.input_layernorm
73
- model.layers.4.post_attention_layernorm
74
- model.layers.5
75
- model.layers.5.self_attn
76
- model.layers.5.self_attn.q_proj
77
- model.layers.5.self_attn.k_proj
78
- model.layers.5.self_attn.v_proj
79
- model.layers.5.self_attn.o_proj
80
- model.layers.5.self_attn.rotary_emb
81
- model.layers.5.mlp
82
- model.layers.5.mlp.gate_proj
83
- model.layers.5.mlp.down_proj
84
- model.layers.5.mlp.up_proj
85
- model.layers.5.mlp.act_fn
86
- model.layers.5.input_layernorm
87
- model.layers.5.post_attention_layernorm
88
- model.layers.6
89
- model.layers.6.self_attn
90
- model.layers.6.self_attn.q_proj
91
- model.layers.6.self_attn.k_proj
92
- model.layers.6.self_attn.v_proj
93
- model.layers.6.self_attn.o_proj
94
- model.layers.6.self_attn.rotary_emb
95
- model.layers.6.mlp
96
- model.layers.6.mlp.gate_proj
97
- model.layers.6.mlp.down_proj
98
- model.layers.6.mlp.up_proj
99
- model.layers.6.mlp.act_fn
100
- model.layers.6.input_layernorm
101
- model.layers.6.post_attention_layernorm
102
- model.layers.7
103
- model.layers.7.self_attn
104
- model.layers.7.self_attn.q_proj
105
- model.layers.7.self_attn.k_proj
106
- model.layers.7.self_attn.v_proj
107
- model.layers.7.self_attn.o_proj
108
- model.layers.7.self_attn.rotary_emb
109
- model.layers.7.mlp
110
- model.layers.7.mlp.gate_proj
111
- model.layers.7.mlp.down_proj
112
- model.layers.7.mlp.up_proj
113
- model.layers.7.mlp.act_fn
114
- model.layers.7.input_layernorm
115
- model.layers.7.post_attention_layernorm
116
- model.layers.8
117
- model.layers.8.self_attn
118
- model.layers.8.self_attn.q_proj
119
- model.layers.8.self_attn.k_proj
120
- model.layers.8.self_attn.v_proj
121
- model.layers.8.self_attn.o_proj
122
- model.layers.8.self_attn.rotary_emb
123
- model.layers.8.mlp
124
- model.layers.8.mlp.gate_proj
125
- model.layers.8.mlp.down_proj
126
- model.layers.8.mlp.up_proj
127
- model.layers.8.mlp.act_fn
128
- model.layers.8.input_layernorm
129
- model.layers.8.post_attention_layernorm
130
- model.layers.9
131
- model.layers.9.self_attn
132
- model.layers.9.self_attn.q_proj
133
- model.layers.9.self_attn.k_proj
134
- model.layers.9.self_attn.v_proj
135
- model.layers.9.self_attn.o_proj
136
- model.layers.9.self_attn.rotary_emb
137
- model.layers.9.mlp
138
- model.layers.9.mlp.gate_proj
139
- model.layers.9.mlp.down_proj
140
- model.layers.9.mlp.up_proj
141
- model.layers.9.mlp.act_fn
142
- model.layers.9.input_layernorm
143
- model.layers.9.post_attention_layernorm
144
- model.layers.10
145
- model.layers.10.self_attn
146
- model.layers.10.self_attn.q_proj
147
- model.layers.10.self_attn.k_proj
148
- model.layers.10.self_attn.v_proj
149
- model.layers.10.self_attn.o_proj
150
- model.layers.10.self_attn.rotary_emb
151
- model.layers.10.mlp
152
- model.layers.10.mlp.gate_proj
153
- model.layers.10.mlp.down_proj
154
- model.layers.10.mlp.up_proj
155
- model.layers.10.mlp.act_fn
156
- model.layers.10.input_layernorm
157
- model.layers.10.post_attention_layernorm
158
- model.layers.11
159
- model.layers.11.self_attn
160
- model.layers.11.self_attn.q_proj
161
- model.layers.11.self_attn.k_proj
162
- model.layers.11.self_attn.v_proj
163
- model.layers.11.self_attn.o_proj
164
- model.layers.11.self_attn.rotary_emb
165
- model.layers.11.mlp
166
- model.layers.11.mlp.gate_proj
167
- model.layers.11.mlp.down_proj
168
- model.layers.11.mlp.up_proj
169
- model.layers.11.mlp.act_fn
170
- model.layers.11.input_layernorm
171
- model.layers.11.post_attention_layernorm
172
- model.layers.12
173
- model.layers.12.self_attn
174
- model.layers.12.self_attn.q_proj
175
- model.layers.12.self_attn.k_proj
176
- model.layers.12.self_attn.v_proj
177
- model.layers.12.self_attn.o_proj
178
- model.layers.12.self_attn.rotary_emb
179
- model.layers.12.mlp
180
- model.layers.12.mlp.gate_proj
181
- model.layers.12.mlp.down_proj
182
- model.layers.12.mlp.up_proj
183
- model.layers.12.mlp.act_fn
184
- model.layers.12.input_layernorm
185
- model.layers.12.post_attention_layernorm
186
- model.layers.13
187
- model.layers.13.self_attn
188
- model.layers.13.self_attn.q_proj
189
- model.layers.13.self_attn.k_proj
190
- model.layers.13.self_attn.v_proj
191
- model.layers.13.self_attn.o_proj
192
- model.layers.13.self_attn.rotary_emb
193
- model.layers.13.mlp
194
- model.layers.13.mlp.gate_proj
195
- model.layers.13.mlp.down_proj
196
- model.layers.13.mlp.up_proj
197
- model.layers.13.mlp.act_fn
198
- model.layers.13.input_layernorm
199
- model.layers.13.post_attention_layernorm
200
- model.layers.14
201
- model.layers.14.self_attn
202
- model.layers.14.self_attn.q_proj
203
- model.layers.14.self_attn.k_proj
204
- model.layers.14.self_attn.v_proj
205
- model.layers.14.self_attn.o_proj
206
- model.layers.14.self_attn.rotary_emb
207
- model.layers.14.mlp
208
- model.layers.14.mlp.gate_proj
209
- model.layers.14.mlp.down_proj
210
- model.layers.14.mlp.up_proj
211
- model.layers.14.mlp.act_fn
212
- model.layers.14.input_layernorm
213
- model.layers.14.post_attention_layernorm
214
- model.layers.15
215
- model.layers.15.self_attn
216
- model.layers.15.self_attn.q_proj
217
- model.layers.15.self_attn.k_proj
218
- model.layers.15.self_attn.v_proj
219
- model.layers.15.self_attn.o_proj
220
- model.layers.15.self_attn.rotary_emb
221
- model.layers.15.mlp
222
- model.layers.15.mlp.gate_proj
223
- model.layers.15.mlp.down_proj
224
- model.layers.15.mlp.up_proj
225
- model.layers.15.mlp.act_fn
226
- model.layers.15.input_layernorm
227
- model.layers.15.post_attention_layernorm
228
- model.layers.16
229
- model.layers.16.self_attn
230
- model.layers.16.self_attn.q_proj
231
- model.layers.16.self_attn.k_proj
232
- model.layers.16.self_attn.v_proj
233
- model.layers.16.self_attn.o_proj
234
- model.layers.16.self_attn.rotary_emb
235
- model.layers.16.mlp
236
- model.layers.16.mlp.gate_proj
237
- model.layers.16.mlp.down_proj
238
- model.layers.16.mlp.up_proj
239
- model.layers.16.mlp.act_fn
240
- model.layers.16.input_layernorm
241
- model.layers.16.post_attention_layernorm
242
- model.layers.17
243
- model.layers.17.self_attn
244
- model.layers.17.self_attn.q_proj
245
- model.layers.17.self_attn.k_proj
246
- model.layers.17.self_attn.v_proj
247
- model.layers.17.self_attn.o_proj
248
- model.layers.17.self_attn.rotary_emb
249
- model.layers.17.mlp
250
- model.layers.17.mlp.gate_proj
251
- model.layers.17.mlp.down_proj
252
- model.layers.17.mlp.up_proj
253
- model.layers.17.mlp.act_fn
254
- model.layers.17.input_layernorm
255
- model.layers.17.post_attention_layernorm
256
- model.layers.18
257
- model.layers.18.self_attn
258
- model.layers.18.self_attn.q_proj
259
- model.layers.18.self_attn.k_proj
260
- model.layers.18.self_attn.v_proj
261
- model.layers.18.self_attn.o_proj
262
- model.layers.18.self_attn.rotary_emb
263
- model.layers.18.mlp
264
- model.layers.18.mlp.gate_proj
265
- model.layers.18.mlp.down_proj
266
- model.layers.18.mlp.up_proj
267
- model.layers.18.mlp.act_fn
268
- model.layers.18.input_layernorm
269
- model.layers.18.post_attention_layernorm
270
- model.layers.19
271
- model.layers.19.self_attn
272
- model.layers.19.self_attn.q_proj
273
- model.layers.19.self_attn.k_proj
274
- model.layers.19.self_attn.v_proj
275
- model.layers.19.self_attn.o_proj
276
- model.layers.19.self_attn.rotary_emb
277
- model.layers.19.mlp
278
- model.layers.19.mlp.gate_proj
279
- model.layers.19.mlp.down_proj
280
- model.layers.19.mlp.up_proj
281
- model.layers.19.mlp.act_fn
282
- model.layers.19.input_layernorm
283
- model.layers.19.post_attention_layernorm
284
- model.layers.20
285
- model.layers.20.self_attn
286
- model.layers.20.self_attn.q_proj
287
- model.layers.20.self_attn.k_proj
288
- model.layers.20.self_attn.v_proj
289
- model.layers.20.self_attn.o_proj
290
- model.layers.20.self_attn.rotary_emb
291
- model.layers.20.mlp
292
- model.layers.20.mlp.gate_proj
293
- model.layers.20.mlp.down_proj
294
- model.layers.20.mlp.up_proj
295
- model.layers.20.mlp.act_fn
296
- model.layers.20.input_layernorm
297
- model.layers.20.post_attention_layernorm
298
- model.layers.21
299
- model.layers.21.self_attn
300
- model.layers.21.self_attn.q_proj
301
- model.layers.21.self_attn.k_proj
302
- model.layers.21.self_attn.v_proj
303
- model.layers.21.self_attn.o_proj
304
- model.layers.21.self_attn.rotary_emb
305
- model.layers.21.mlp
306
- model.layers.21.mlp.gate_proj
307
- model.layers.21.mlp.down_proj
308
- model.layers.21.mlp.up_proj
309
- model.layers.21.mlp.act_fn
310
- model.layers.21.input_layernorm
311
- model.layers.21.post_attention_layernorm
312
- model.layers.22
313
- model.layers.22.self_attn
314
- model.layers.22.self_attn.q_proj
315
- model.layers.22.self_attn.k_proj
316
- model.layers.22.self_attn.v_proj
317
- model.layers.22.self_attn.o_proj
318
- model.layers.22.self_attn.rotary_emb
319
- model.layers.22.mlp
320
- model.layers.22.mlp.gate_proj
321
- model.layers.22.mlp.down_proj
322
- model.layers.22.mlp.up_proj
323
- model.layers.22.mlp.act_fn
324
- model.layers.22.input_layernorm
325
- model.layers.22.post_attention_layernorm
326
- model.layers.23
327
- model.layers.23.self_attn
328
- model.layers.23.self_attn.q_proj
329
- model.layers.23.self_attn.k_proj
330
- model.layers.23.self_attn.v_proj
331
- model.layers.23.self_attn.o_proj
332
- model.layers.23.self_attn.rotary_emb
333
- model.layers.23.mlp
334
- model.layers.23.mlp.gate_proj
335
- model.layers.23.mlp.down_proj
336
- model.layers.23.mlp.up_proj
337
- model.layers.23.mlp.act_fn
338
- model.layers.23.input_layernorm
339
- model.layers.23.post_attention_layernorm
340
- model.layers.24
341
- model.layers.24.self_attn
342
- model.layers.24.self_attn.q_proj
343
- model.layers.24.self_attn.k_proj
344
- model.layers.24.self_attn.v_proj
345
- model.layers.24.self_attn.o_proj
346
- model.layers.24.self_attn.rotary_emb
347
- model.layers.24.mlp
348
- model.layers.24.mlp.gate_proj
349
- model.layers.24.mlp.down_proj
350
- model.layers.24.mlp.up_proj
351
- model.layers.24.mlp.act_fn
352
- model.layers.24.input_layernorm
353
- model.layers.24.post_attention_layernorm
354
- model.layers.25
355
- model.layers.25.self_attn
356
- model.layers.25.self_attn.q_proj
357
- model.layers.25.self_attn.k_proj
358
- model.layers.25.self_attn.v_proj
359
- model.layers.25.self_attn.o_proj
360
- model.layers.25.self_attn.rotary_emb
361
- model.layers.25.mlp
362
- model.layers.25.mlp.gate_proj
363
- model.layers.25.mlp.down_proj
364
- model.layers.25.mlp.up_proj
365
- model.layers.25.mlp.act_fn
366
- model.layers.25.input_layernorm
367
- model.layers.25.post_attention_layernorm
368
- model.layers.26
369
- model.layers.26.self_attn
370
- model.layers.26.self_attn.q_proj
371
- model.layers.26.self_attn.k_proj
372
- model.layers.26.self_attn.v_proj
373
- model.layers.26.self_attn.o_proj
374
- model.layers.26.self_attn.rotary_emb
375
- model.layers.26.mlp
376
- model.layers.26.mlp.gate_proj
377
- model.layers.26.mlp.down_proj
378
- model.layers.26.mlp.up_proj
379
- model.layers.26.mlp.act_fn
380
- model.layers.26.input_layernorm
381
- model.layers.26.post_attention_layernorm
382
- model.layers.27
383
- model.layers.27.self_attn
384
- model.layers.27.self_attn.q_proj
385
- model.layers.27.self_attn.k_proj
386
- model.layers.27.self_attn.v_proj
387
- model.layers.27.self_attn.o_proj
388
- model.layers.27.self_attn.rotary_emb
389
- model.layers.27.mlp
390
- model.layers.27.mlp.gate_proj
391
- model.layers.27.mlp.down_proj
392
- model.layers.27.mlp.up_proj
393
- model.layers.27.mlp.act_fn
394
- model.layers.27.input_layernorm
395
- model.layers.27.post_attention_layernorm
396
- model.layers.28
397
- model.layers.28.self_attn
398
- model.layers.28.self_attn.q_proj
399
- model.layers.28.self_attn.k_proj
400
- model.layers.28.self_attn.v_proj
401
- model.layers.28.self_attn.o_proj
402
- model.layers.28.self_attn.rotary_emb
403
- model.layers.28.mlp
404
- model.layers.28.mlp.gate_proj
405
- model.layers.28.mlp.down_proj
406
- model.layers.28.mlp.up_proj
407
- model.layers.28.mlp.act_fn
408
- model.layers.28.input_layernorm
409
- model.layers.28.post_attention_layernorm
410
- model.layers.29
411
- model.layers.29.self_attn
412
- model.layers.29.self_attn.q_proj
413
- model.layers.29.self_attn.k_proj
414
- model.layers.29.self_attn.v_proj
415
- model.layers.29.self_attn.o_proj
416
- model.layers.29.self_attn.rotary_emb
417
- model.layers.29.mlp
418
- model.layers.29.mlp.gate_proj
419
- model.layers.29.mlp.down_proj
420
- model.layers.29.mlp.up_proj
421
- model.layers.29.mlp.act_fn
422
- model.layers.29.input_layernorm
423
- model.layers.29.post_attention_layernorm
424
- model.layers.30
425
- model.layers.30.self_attn
426
- model.layers.30.self_attn.q_proj
427
- model.layers.30.self_attn.k_proj
428
- model.layers.30.self_attn.v_proj
429
- model.layers.30.self_attn.o_proj
430
- model.layers.30.self_attn.rotary_emb
431
- model.layers.30.mlp
432
- model.layers.30.mlp.gate_proj
433
- model.layers.30.mlp.down_proj
434
- model.layers.30.mlp.up_proj
435
- model.layers.30.mlp.act_fn
436
- model.layers.30.input_layernorm
437
- model.layers.30.post_attention_layernorm
438
- model.layers.31
439
- model.layers.31.self_attn
440
- model.layers.31.self_attn.q_proj
441
- model.layers.31.self_attn.k_proj
442
- model.layers.31.self_attn.v_proj
443
- model.layers.31.self_attn.o_proj
444
- model.layers.31.self_attn.rotary_emb
445
- model.layers.31.mlp
446
- model.layers.31.mlp.gate_proj
447
- model.layers.31.mlp.down_proj
448
- model.layers.31.mlp.up_proj
449
- model.layers.31.mlp.act_fn
450
- model.layers.31.input_layernorm
451
- model.layers.31.post_attention_layernorm
452
- model.norm
453
- model.vision_tower
454
- model.mm_projector
455
- model.mm_projector.0
456
- model.mm_projector.1
457
- model.mm_projector.2
458
- model.region_encoder
459
- model.region_encoder.mlvl_fuse
460
- model.region_encoder.mlvl_fuse.input_conv
461
- model.region_encoder.mlvl_fuse.input_conv.0
462
- model.region_encoder.mlvl_fuse.input_conv.1
463
- model.region_encoder.mlvl_fuse.input_conv.2
464
- model.region_encoder.mlvl_fuse.input_conv.3
465
- model.region_encoder.mlvl_fuse.fuse_convs
466
- model.region_encoder.mlvl_fuse.fuse_convs.0
467
- model.region_encoder.mlvl_fuse.fuse_convs.0.conv
468
- model.region_encoder.mlvl_fuse.fuse_convs.0.gn
469
- model.region_encoder.mlvl_fuse.fuse_convs.0.activate
470
- model.region_encoder.mlvl_fuse.fuse_convs.1
471
- model.region_encoder.mlvl_fuse.fuse_convs.1.conv
472
- model.region_encoder.mlvl_fuse.fuse_convs.1.gn
473
- model.region_encoder.mlvl_fuse.fuse_convs.1.activate
474
- model.region_encoder.mlvl_fuse.fuse_convs.2
475
- model.region_encoder.mlvl_fuse.fuse_convs.2.conv
476
- model.region_encoder.mlvl_fuse.fuse_convs.2.gn
477
- model.region_encoder.mlvl_fuse.fuse_convs.2.activate
478
- model.region_encoder.mlvl_fuse.fuse_convs.3
479
- model.region_encoder.mlvl_fuse.fuse_convs.3.conv
480
- model.region_encoder.mlvl_fuse.fuse_convs.3.gn
481
- model.region_encoder.mlvl_fuse.fuse_convs.3.activate
482
- model.region_encoder.mlvl_fuse.fuse_convs.4
483
- model.region_encoder.mlvl_fuse.fuse_convs.4.conv
484
- model.region_encoder.mlvl_fuse.fuse_convs.4.gn
485
- model.region_encoder.mlvl_fuse.fuse_convs.4.activate
486
- model.region_encoder.roi_align
487
- model.region_encoder.roi_align.roi_layers
488
- model.region_encoder.roi_align.roi_layers.0
489
- model.region_encoder.roi_align.roi_layers.1
490
- model.region_encoder.roi_align.roi_layers.2
491
- model.region_encoder.roi_align.roi_layers.3
492
- model.region_encoder.roi_align.pconvs
493
- model.region_encoder.roi_align.pconvs.0
494
- model.region_encoder.roi_align.pconvs.1
495
- model.region_encoder.roi_align.pconvs.2
496
- model.region_encoder.roi_align.pconvs.3
497
- model.region_encoder.roi_align.pos_embedd
498
- model.region_encoder.roi_align.pos_embedd.0
499
- model.region_encoder.roi_align.pos_embedd.1
500
- model.region_encoder.roi_align.pos_embedd.2
501
- model.region_encoder.roi_align.pos_embedd.3
502
- model.region_encoder.roi_align.pos_embedd.4
503
- model.region_encoder.roi_align.pos_embedd.5
504
- model.region_encoder.roi_align.updims
505
- model.region_encoder.roi_align.flatten_linear
506
- model.grounding_encoder
507
- model.grounding_encoder.image_encoder
508
- model.grounding_encoder.image_encoder.patch_embed
509
- model.grounding_encoder.image_encoder.patch_embed.proj
510
- model.grounding_encoder.image_encoder.blocks
511
- model.grounding_encoder.image_encoder.blocks.0
512
- model.grounding_encoder.image_encoder.blocks.0.norm1
513
- model.grounding_encoder.image_encoder.blocks.0.attn
514
- model.grounding_encoder.image_encoder.blocks.0.attn.qkv
515
- model.grounding_encoder.image_encoder.blocks.0.attn.proj
516
- model.grounding_encoder.image_encoder.blocks.0.norm2
517
- model.grounding_encoder.image_encoder.blocks.0.mlp
518
- model.grounding_encoder.image_encoder.blocks.0.mlp.lin1
519
- model.grounding_encoder.image_encoder.blocks.0.mlp.lin2
520
- model.grounding_encoder.image_encoder.blocks.0.mlp.act
521
- model.grounding_encoder.image_encoder.blocks.1
522
- model.grounding_encoder.image_encoder.blocks.1.norm1
523
- model.grounding_encoder.image_encoder.blocks.1.attn
524
- model.grounding_encoder.image_encoder.blocks.1.attn.qkv
525
- model.grounding_encoder.image_encoder.blocks.1.attn.proj
526
- model.grounding_encoder.image_encoder.blocks.1.norm2
527
- model.grounding_encoder.image_encoder.blocks.1.mlp
528
- model.grounding_encoder.image_encoder.blocks.1.mlp.lin1
529
- model.grounding_encoder.image_encoder.blocks.1.mlp.lin2
530
- model.grounding_encoder.image_encoder.blocks.1.mlp.act
531
- model.grounding_encoder.image_encoder.blocks.2
532
- model.grounding_encoder.image_encoder.blocks.2.norm1
533
- model.grounding_encoder.image_encoder.blocks.2.attn
534
- model.grounding_encoder.image_encoder.blocks.2.attn.qkv
535
- model.grounding_encoder.image_encoder.blocks.2.attn.proj
536
- model.grounding_encoder.image_encoder.blocks.2.norm2
537
- model.grounding_encoder.image_encoder.blocks.2.mlp
538
- model.grounding_encoder.image_encoder.blocks.2.mlp.lin1
539
- model.grounding_encoder.image_encoder.blocks.2.mlp.lin2
540
- model.grounding_encoder.image_encoder.blocks.2.mlp.act
541
- model.grounding_encoder.image_encoder.blocks.3
542
- model.grounding_encoder.image_encoder.blocks.3.norm1
543
- model.grounding_encoder.image_encoder.blocks.3.attn
544
- model.grounding_encoder.image_encoder.blocks.3.attn.qkv
545
- model.grounding_encoder.image_encoder.blocks.3.attn.proj
546
- model.grounding_encoder.image_encoder.blocks.3.norm2
547
- model.grounding_encoder.image_encoder.blocks.3.mlp
548
- model.grounding_encoder.image_encoder.blocks.3.mlp.lin1
549
- model.grounding_encoder.image_encoder.blocks.3.mlp.lin2
550
- model.grounding_encoder.image_encoder.blocks.3.mlp.act
551
- model.grounding_encoder.image_encoder.blocks.4
552
- model.grounding_encoder.image_encoder.blocks.4.norm1
553
- model.grounding_encoder.image_encoder.blocks.4.attn
554
- model.grounding_encoder.image_encoder.blocks.4.attn.qkv
555
- model.grounding_encoder.image_encoder.blocks.4.attn.proj
556
- model.grounding_encoder.image_encoder.blocks.4.norm2
557
- model.grounding_encoder.image_encoder.blocks.4.mlp
558
- model.grounding_encoder.image_encoder.blocks.4.mlp.lin1
559
- model.grounding_encoder.image_encoder.blocks.4.mlp.lin2
560
- model.grounding_encoder.image_encoder.blocks.4.mlp.act
561
- model.grounding_encoder.image_encoder.blocks.5
562
- model.grounding_encoder.image_encoder.blocks.5.norm1
563
- model.grounding_encoder.image_encoder.blocks.5.attn
564
- model.grounding_encoder.image_encoder.blocks.5.attn.qkv
565
- model.grounding_encoder.image_encoder.blocks.5.attn.proj
566
- model.grounding_encoder.image_encoder.blocks.5.norm2
567
- model.grounding_encoder.image_encoder.blocks.5.mlp
568
- model.grounding_encoder.image_encoder.blocks.5.mlp.lin1
569
- model.grounding_encoder.image_encoder.blocks.5.mlp.lin2
570
- model.grounding_encoder.image_encoder.blocks.5.mlp.act
571
- model.grounding_encoder.image_encoder.blocks.6
572
- model.grounding_encoder.image_encoder.blocks.6.norm1
573
- model.grounding_encoder.image_encoder.blocks.6.attn
574
- model.grounding_encoder.image_encoder.blocks.6.attn.qkv
575
- model.grounding_encoder.image_encoder.blocks.6.attn.proj
576
- model.grounding_encoder.image_encoder.blocks.6.norm2
577
- model.grounding_encoder.image_encoder.blocks.6.mlp
578
- model.grounding_encoder.image_encoder.blocks.6.mlp.lin1
579
- model.grounding_encoder.image_encoder.blocks.6.mlp.lin2
580
- model.grounding_encoder.image_encoder.blocks.6.mlp.act
581
- model.grounding_encoder.image_encoder.blocks.7
582
- model.grounding_encoder.image_encoder.blocks.7.norm1
583
- model.grounding_encoder.image_encoder.blocks.7.attn
584
- model.grounding_encoder.image_encoder.blocks.7.attn.qkv
585
- model.grounding_encoder.image_encoder.blocks.7.attn.proj
586
- model.grounding_encoder.image_encoder.blocks.7.norm2
587
- model.grounding_encoder.image_encoder.blocks.7.mlp
588
- model.grounding_encoder.image_encoder.blocks.7.mlp.lin1
589
- model.grounding_encoder.image_encoder.blocks.7.mlp.lin2
590
- model.grounding_encoder.image_encoder.blocks.7.mlp.act
591
- model.grounding_encoder.image_encoder.blocks.8
592
- model.grounding_encoder.image_encoder.blocks.8.norm1
593
- model.grounding_encoder.image_encoder.blocks.8.attn
594
- model.grounding_encoder.image_encoder.blocks.8.attn.qkv
595
- model.grounding_encoder.image_encoder.blocks.8.attn.proj
596
- model.grounding_encoder.image_encoder.blocks.8.norm2
597
- model.grounding_encoder.image_encoder.blocks.8.mlp
598
- model.grounding_encoder.image_encoder.blocks.8.mlp.lin1
599
- model.grounding_encoder.image_encoder.blocks.8.mlp.lin2
600
- model.grounding_encoder.image_encoder.blocks.8.mlp.act
601
- model.grounding_encoder.image_encoder.blocks.9
602
- model.grounding_encoder.image_encoder.blocks.9.norm1
603
- model.grounding_encoder.image_encoder.blocks.9.attn
604
- model.grounding_encoder.image_encoder.blocks.9.attn.qkv
605
- model.grounding_encoder.image_encoder.blocks.9.attn.proj
606
- model.grounding_encoder.image_encoder.blocks.9.norm2
607
- model.grounding_encoder.image_encoder.blocks.9.mlp
608
- model.grounding_encoder.image_encoder.blocks.9.mlp.lin1
609
- model.grounding_encoder.image_encoder.blocks.9.mlp.lin2
610
- model.grounding_encoder.image_encoder.blocks.9.mlp.act
611
- model.grounding_encoder.image_encoder.blocks.10
612
- model.grounding_encoder.image_encoder.blocks.10.norm1
613
- model.grounding_encoder.image_encoder.blocks.10.attn
614
- model.grounding_encoder.image_encoder.blocks.10.attn.qkv
615
- model.grounding_encoder.image_encoder.blocks.10.attn.proj
616
- model.grounding_encoder.image_encoder.blocks.10.norm2
617
- model.grounding_encoder.image_encoder.blocks.10.mlp
618
- model.grounding_encoder.image_encoder.blocks.10.mlp.lin1
619
- model.grounding_encoder.image_encoder.blocks.10.mlp.lin2
620
- model.grounding_encoder.image_encoder.blocks.10.mlp.act
621
- model.grounding_encoder.image_encoder.blocks.11
622
- model.grounding_encoder.image_encoder.blocks.11.norm1
623
- model.grounding_encoder.image_encoder.blocks.11.attn
624
- model.grounding_encoder.image_encoder.blocks.11.attn.qkv
625
- model.grounding_encoder.image_encoder.blocks.11.attn.proj
626
- model.grounding_encoder.image_encoder.blocks.11.norm2
627
- model.grounding_encoder.image_encoder.blocks.11.mlp
628
- model.grounding_encoder.image_encoder.blocks.11.mlp.lin1
629
- model.grounding_encoder.image_encoder.blocks.11.mlp.lin2
630
- model.grounding_encoder.image_encoder.blocks.11.mlp.act
631
- model.grounding_encoder.image_encoder.blocks.12
632
- model.grounding_encoder.image_encoder.blocks.12.norm1
633
- model.grounding_encoder.image_encoder.blocks.12.attn
634
- model.grounding_encoder.image_encoder.blocks.12.attn.qkv
635
- model.grounding_encoder.image_encoder.blocks.12.attn.proj
636
- model.grounding_encoder.image_encoder.blocks.12.norm2
637
- model.grounding_encoder.image_encoder.blocks.12.mlp
638
- model.grounding_encoder.image_encoder.blocks.12.mlp.lin1
639
- model.grounding_encoder.image_encoder.blocks.12.mlp.lin2
640
- model.grounding_encoder.image_encoder.blocks.12.mlp.act
641
- model.grounding_encoder.image_encoder.blocks.13
642
- model.grounding_encoder.image_encoder.blocks.13.norm1
643
- model.grounding_encoder.image_encoder.blocks.13.attn
644
- model.grounding_encoder.image_encoder.blocks.13.attn.qkv
645
- model.grounding_encoder.image_encoder.blocks.13.attn.proj
646
- model.grounding_encoder.image_encoder.blocks.13.norm2
647
- model.grounding_encoder.image_encoder.blocks.13.mlp
648
- model.grounding_encoder.image_encoder.blocks.13.mlp.lin1
649
- model.grounding_encoder.image_encoder.blocks.13.mlp.lin2
650
- model.grounding_encoder.image_encoder.blocks.13.mlp.act
651
- model.grounding_encoder.image_encoder.blocks.14
652
- model.grounding_encoder.image_encoder.blocks.14.norm1
653
- model.grounding_encoder.image_encoder.blocks.14.attn
654
- model.grounding_encoder.image_encoder.blocks.14.attn.qkv
655
- model.grounding_encoder.image_encoder.blocks.14.attn.proj
656
- model.grounding_encoder.image_encoder.blocks.14.norm2
657
- model.grounding_encoder.image_encoder.blocks.14.mlp
658
- model.grounding_encoder.image_encoder.blocks.14.mlp.lin1
659
- model.grounding_encoder.image_encoder.blocks.14.mlp.lin2
660
- model.grounding_encoder.image_encoder.blocks.14.mlp.act
661
- model.grounding_encoder.image_encoder.blocks.15
662
- model.grounding_encoder.image_encoder.blocks.15.norm1
663
- model.grounding_encoder.image_encoder.blocks.15.attn
664
- model.grounding_encoder.image_encoder.blocks.15.attn.qkv
665
- model.grounding_encoder.image_encoder.blocks.15.attn.proj
666
- model.grounding_encoder.image_encoder.blocks.15.norm2
667
- model.grounding_encoder.image_encoder.blocks.15.mlp
668
- model.grounding_encoder.image_encoder.blocks.15.mlp.lin1
669
- model.grounding_encoder.image_encoder.blocks.15.mlp.lin2
670
- model.grounding_encoder.image_encoder.blocks.15.mlp.act
671
- model.grounding_encoder.image_encoder.blocks.16
672
- model.grounding_encoder.image_encoder.blocks.16.norm1
673
- model.grounding_encoder.image_encoder.blocks.16.attn
674
- model.grounding_encoder.image_encoder.blocks.16.attn.qkv
675
- model.grounding_encoder.image_encoder.blocks.16.attn.proj
676
- model.grounding_encoder.image_encoder.blocks.16.norm2
677
- model.grounding_encoder.image_encoder.blocks.16.mlp
678
- model.grounding_encoder.image_encoder.blocks.16.mlp.lin1
679
- model.grounding_encoder.image_encoder.blocks.16.mlp.lin2
680
- model.grounding_encoder.image_encoder.blocks.16.mlp.act
681
- model.grounding_encoder.image_encoder.blocks.17
682
- model.grounding_encoder.image_encoder.blocks.17.norm1
683
- model.grounding_encoder.image_encoder.blocks.17.attn
684
- model.grounding_encoder.image_encoder.blocks.17.attn.qkv
685
- model.grounding_encoder.image_encoder.blocks.17.attn.proj
686
- model.grounding_encoder.image_encoder.blocks.17.norm2
687
- model.grounding_encoder.image_encoder.blocks.17.mlp
688
- model.grounding_encoder.image_encoder.blocks.17.mlp.lin1
689
- model.grounding_encoder.image_encoder.blocks.17.mlp.lin2
690
- model.grounding_encoder.image_encoder.blocks.17.mlp.act
691
- model.grounding_encoder.image_encoder.blocks.18
692
- model.grounding_encoder.image_encoder.blocks.18.norm1
693
- model.grounding_encoder.image_encoder.blocks.18.attn
694
- model.grounding_encoder.image_encoder.blocks.18.attn.qkv
695
- model.grounding_encoder.image_encoder.blocks.18.attn.proj
696
- model.grounding_encoder.image_encoder.blocks.18.norm2
697
- model.grounding_encoder.image_encoder.blocks.18.mlp
698
- model.grounding_encoder.image_encoder.blocks.18.mlp.lin1
699
- model.grounding_encoder.image_encoder.blocks.18.mlp.lin2
700
- model.grounding_encoder.image_encoder.blocks.18.mlp.act
701
- model.grounding_encoder.image_encoder.blocks.19
702
- model.grounding_encoder.image_encoder.blocks.19.norm1
703
- model.grounding_encoder.image_encoder.blocks.19.attn
704
- model.grounding_encoder.image_encoder.blocks.19.attn.qkv
705
- model.grounding_encoder.image_encoder.blocks.19.attn.proj
706
- model.grounding_encoder.image_encoder.blocks.19.norm2
707
- model.grounding_encoder.image_encoder.blocks.19.mlp
708
- model.grounding_encoder.image_encoder.blocks.19.mlp.lin1
709
- model.grounding_encoder.image_encoder.blocks.19.mlp.lin2
710
- model.grounding_encoder.image_encoder.blocks.19.mlp.act
711
- model.grounding_encoder.image_encoder.blocks.20
712
- model.grounding_encoder.image_encoder.blocks.20.norm1
713
- model.grounding_encoder.image_encoder.blocks.20.attn
714
- model.grounding_encoder.image_encoder.blocks.20.attn.qkv
715
- model.grounding_encoder.image_encoder.blocks.20.attn.proj
716
- model.grounding_encoder.image_encoder.blocks.20.norm2
717
- model.grounding_encoder.image_encoder.blocks.20.mlp
718
- model.grounding_encoder.image_encoder.blocks.20.mlp.lin1
719
- model.grounding_encoder.image_encoder.blocks.20.mlp.lin2
720
- model.grounding_encoder.image_encoder.blocks.20.mlp.act
721
- model.grounding_encoder.image_encoder.blocks.21
722
- model.grounding_encoder.image_encoder.blocks.21.norm1
723
- model.grounding_encoder.image_encoder.blocks.21.attn
724
- model.grounding_encoder.image_encoder.blocks.21.attn.qkv
725
- model.grounding_encoder.image_encoder.blocks.21.attn.proj
726
- model.grounding_encoder.image_encoder.blocks.21.norm2
727
- model.grounding_encoder.image_encoder.blocks.21.mlp
728
- model.grounding_encoder.image_encoder.blocks.21.mlp.lin1
729
- model.grounding_encoder.image_encoder.blocks.21.mlp.lin2
730
- model.grounding_encoder.image_encoder.blocks.21.mlp.act
731
- model.grounding_encoder.image_encoder.blocks.22
732
- model.grounding_encoder.image_encoder.blocks.22.norm1
733
- model.grounding_encoder.image_encoder.blocks.22.attn
734
- model.grounding_encoder.image_encoder.blocks.22.attn.qkv
735
- model.grounding_encoder.image_encoder.blocks.22.attn.proj
736
- model.grounding_encoder.image_encoder.blocks.22.norm2
737
- model.grounding_encoder.image_encoder.blocks.22.mlp
738
- model.grounding_encoder.image_encoder.blocks.22.mlp.lin1
739
- model.grounding_encoder.image_encoder.blocks.22.mlp.lin2
740
- model.grounding_encoder.image_encoder.blocks.22.mlp.act
741
- model.grounding_encoder.image_encoder.blocks.23
742
- model.grounding_encoder.image_encoder.blocks.23.norm1
743
- model.grounding_encoder.image_encoder.blocks.23.attn
744
- model.grounding_encoder.image_encoder.blocks.23.attn.qkv
745
- model.grounding_encoder.image_encoder.blocks.23.attn.proj
746
- model.grounding_encoder.image_encoder.blocks.23.norm2
747
- model.grounding_encoder.image_encoder.blocks.23.mlp
748
- model.grounding_encoder.image_encoder.blocks.23.mlp.lin1
749
- model.grounding_encoder.image_encoder.blocks.23.mlp.lin2
750
- model.grounding_encoder.image_encoder.blocks.23.mlp.act
751
- model.grounding_encoder.image_encoder.blocks.24
752
- model.grounding_encoder.image_encoder.blocks.24.norm1
753
- model.grounding_encoder.image_encoder.blocks.24.attn
754
- model.grounding_encoder.image_encoder.blocks.24.attn.qkv
755
- model.grounding_encoder.image_encoder.blocks.24.attn.proj
756
- model.grounding_encoder.image_encoder.blocks.24.norm2
757
- model.grounding_encoder.image_encoder.blocks.24.mlp
758
- model.grounding_encoder.image_encoder.blocks.24.mlp.lin1
759
- model.grounding_encoder.image_encoder.blocks.24.mlp.lin2
760
- model.grounding_encoder.image_encoder.blocks.24.mlp.act
761
- model.grounding_encoder.image_encoder.blocks.25
762
- model.grounding_encoder.image_encoder.blocks.25.norm1
763
- model.grounding_encoder.image_encoder.blocks.25.attn
764
- model.grounding_encoder.image_encoder.blocks.25.attn.qkv
765
- model.grounding_encoder.image_encoder.blocks.25.attn.proj
766
- model.grounding_encoder.image_encoder.blocks.25.norm2
767
- model.grounding_encoder.image_encoder.blocks.25.mlp
768
- model.grounding_encoder.image_encoder.blocks.25.mlp.lin1
769
- model.grounding_encoder.image_encoder.blocks.25.mlp.lin2
770
- model.grounding_encoder.image_encoder.blocks.25.mlp.act
771
- model.grounding_encoder.image_encoder.blocks.26
772
- model.grounding_encoder.image_encoder.blocks.26.norm1
773
- model.grounding_encoder.image_encoder.blocks.26.attn
774
- model.grounding_encoder.image_encoder.blocks.26.attn.qkv
775
- model.grounding_encoder.image_encoder.blocks.26.attn.proj
776
- model.grounding_encoder.image_encoder.blocks.26.norm2
777
- model.grounding_encoder.image_encoder.blocks.26.mlp
778
- model.grounding_encoder.image_encoder.blocks.26.mlp.lin1
779
- model.grounding_encoder.image_encoder.blocks.26.mlp.lin2
780
- model.grounding_encoder.image_encoder.blocks.26.mlp.act
781
- model.grounding_encoder.image_encoder.blocks.27
782
- model.grounding_encoder.image_encoder.blocks.27.norm1
783
- model.grounding_encoder.image_encoder.blocks.27.attn
784
- model.grounding_encoder.image_encoder.blocks.27.attn.qkv
785
- model.grounding_encoder.image_encoder.blocks.27.attn.proj
786
- model.grounding_encoder.image_encoder.blocks.27.norm2
787
- model.grounding_encoder.image_encoder.blocks.27.mlp
788
- model.grounding_encoder.image_encoder.blocks.27.mlp.lin1
789
- model.grounding_encoder.image_encoder.blocks.27.mlp.lin2
790
- model.grounding_encoder.image_encoder.blocks.27.mlp.act
791
- model.grounding_encoder.image_encoder.blocks.28
792
- model.grounding_encoder.image_encoder.blocks.28.norm1
793
- model.grounding_encoder.image_encoder.blocks.28.attn
794
- model.grounding_encoder.image_encoder.blocks.28.attn.qkv
795
- model.grounding_encoder.image_encoder.blocks.28.attn.proj
796
- model.grounding_encoder.image_encoder.blocks.28.norm2
797
- model.grounding_encoder.image_encoder.blocks.28.mlp
798
- model.grounding_encoder.image_encoder.blocks.28.mlp.lin1
799
- model.grounding_encoder.image_encoder.blocks.28.mlp.lin2
800
- model.grounding_encoder.image_encoder.blocks.28.mlp.act
801
- model.grounding_encoder.image_encoder.blocks.29
802
- model.grounding_encoder.image_encoder.blocks.29.norm1
803
- model.grounding_encoder.image_encoder.blocks.29.attn
804
- model.grounding_encoder.image_encoder.blocks.29.attn.qkv
805
- model.grounding_encoder.image_encoder.blocks.29.attn.proj
806
- model.grounding_encoder.image_encoder.blocks.29.norm2
807
- model.grounding_encoder.image_encoder.blocks.29.mlp
808
- model.grounding_encoder.image_encoder.blocks.29.mlp.lin1
809
- model.grounding_encoder.image_encoder.blocks.29.mlp.lin2
810
- model.grounding_encoder.image_encoder.blocks.29.mlp.act
811
- model.grounding_encoder.image_encoder.blocks.30
812
- model.grounding_encoder.image_encoder.blocks.30.norm1
813
- model.grounding_encoder.image_encoder.blocks.30.attn
814
- model.grounding_encoder.image_encoder.blocks.30.attn.qkv
815
- model.grounding_encoder.image_encoder.blocks.30.attn.proj
816
- model.grounding_encoder.image_encoder.blocks.30.norm2
817
- model.grounding_encoder.image_encoder.blocks.30.mlp
818
- model.grounding_encoder.image_encoder.blocks.30.mlp.lin1
819
- model.grounding_encoder.image_encoder.blocks.30.mlp.lin2
820
- model.grounding_encoder.image_encoder.blocks.30.mlp.act
821
- model.grounding_encoder.image_encoder.blocks.31
822
- model.grounding_encoder.image_encoder.blocks.31.norm1
823
- model.grounding_encoder.image_encoder.blocks.31.attn
824
- model.grounding_encoder.image_encoder.blocks.31.attn.qkv
825
- model.grounding_encoder.image_encoder.blocks.31.attn.proj
826
- model.grounding_encoder.image_encoder.blocks.31.norm2
827
- model.grounding_encoder.image_encoder.blocks.31.mlp
828
- model.grounding_encoder.image_encoder.blocks.31.mlp.lin1
829
- model.grounding_encoder.image_encoder.blocks.31.mlp.lin2
830
- model.grounding_encoder.image_encoder.blocks.31.mlp.act
831
- model.grounding_encoder.image_encoder.neck
832
- model.grounding_encoder.image_encoder.neck.0
833
- model.grounding_encoder.image_encoder.neck.1
834
- model.grounding_encoder.image_encoder.neck.2
835
- model.grounding_encoder.image_encoder.neck.3
836
- model.grounding_encoder.prompt_encoder
837
- model.grounding_encoder.prompt_encoder.pe_layer
838
- model.grounding_encoder.prompt_encoder.point_embeddings
839
- model.grounding_encoder.prompt_encoder.point_embeddings.0
840
- model.grounding_encoder.prompt_encoder.point_embeddings.1
841
- model.grounding_encoder.prompt_encoder.point_embeddings.2
842
- model.grounding_encoder.prompt_encoder.point_embeddings.3
843
- model.grounding_encoder.prompt_encoder.not_a_point_embed
844
- model.grounding_encoder.prompt_encoder.mask_downscaling
845
- model.grounding_encoder.prompt_encoder.mask_downscaling.0
846
- model.grounding_encoder.prompt_encoder.mask_downscaling.1
847
- model.grounding_encoder.prompt_encoder.mask_downscaling.2
848
- model.grounding_encoder.prompt_encoder.mask_downscaling.3
849
- model.grounding_encoder.prompt_encoder.mask_downscaling.4
850
- model.grounding_encoder.prompt_encoder.mask_downscaling.5
851
- model.grounding_encoder.prompt_encoder.mask_downscaling.6
852
- model.grounding_encoder.prompt_encoder.no_mask_embed
853
- model.grounding_encoder.mask_decoder
854
- model.grounding_encoder.mask_decoder.transformer
855
- model.grounding_encoder.mask_decoder.transformer.layers
856
- model.grounding_encoder.mask_decoder.transformer.layers.0
857
- model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn
858
- model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.q_proj
859
- model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.k_proj
860
- model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.v_proj
861
- model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.out_proj
862
- model.grounding_encoder.mask_decoder.transformer.layers.0.norm1
863
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image
864
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.q_proj
865
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.k_proj
866
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.v_proj
867
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.out_proj
868
- model.grounding_encoder.mask_decoder.transformer.layers.0.norm2
869
- model.grounding_encoder.mask_decoder.transformer.layers.0.mlp
870
- model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.lin1
871
- model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.lin2
872
- model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.act
873
- model.grounding_encoder.mask_decoder.transformer.layers.0.norm3
874
- model.grounding_encoder.mask_decoder.transformer.layers.0.norm4
875
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token
876
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.q_proj
877
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.k_proj
878
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.v_proj
879
- model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.out_proj
880
- model.grounding_encoder.mask_decoder.transformer.layers.1
881
- model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn
882
- model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.q_proj
883
- model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.k_proj
884
- model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.v_proj
885
- model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.out_proj
886
- model.grounding_encoder.mask_decoder.transformer.layers.1.norm1
887
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image
888
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.q_proj
889
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.k_proj
890
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.v_proj
891
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.out_proj
892
- model.grounding_encoder.mask_decoder.transformer.layers.1.norm2
893
- model.grounding_encoder.mask_decoder.transformer.layers.1.mlp
894
- model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.lin1
895
- model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.lin2
896
- model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.act
897
- model.grounding_encoder.mask_decoder.transformer.layers.1.norm3
898
- model.grounding_encoder.mask_decoder.transformer.layers.1.norm4
899
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token
900
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.q_proj
901
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.k_proj
902
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.v_proj
903
- model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.out_proj
904
- model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image
905
- model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.q_proj
906
- model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.k_proj
907
- model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.v_proj
908
- model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.out_proj
909
- model.grounding_encoder.mask_decoder.transformer.norm_final_attn
910
- model.grounding_encoder.mask_decoder.iou_token
911
- model.grounding_encoder.mask_decoder.mask_tokens
912
- model.grounding_encoder.mask_decoder.output_upscaling
913
- model.grounding_encoder.mask_decoder.output_upscaling.0
914
- model.grounding_encoder.mask_decoder.output_upscaling.1
915
- model.grounding_encoder.mask_decoder.output_upscaling.2
916
- model.grounding_encoder.mask_decoder.output_upscaling.3
917
- model.grounding_encoder.mask_decoder.output_upscaling.4
918
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps
919
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0
920
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers
921
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.0
922
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.1
923
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.2
924
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1
925
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers
926
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.0
927
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.1
928
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.2
929
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2
930
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers
931
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.0
932
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.1
933
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.2
934
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3
935
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers
936
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.0
937
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.1
938
- model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.2
939
- model.grounding_encoder.mask_decoder.iou_prediction_head
940
- model.grounding_encoder.mask_decoder.iou_prediction_head.layers
941
- model.grounding_encoder.mask_decoder.iou_prediction_head.layers.0
942
- model.grounding_encoder.mask_decoder.iou_prediction_head.layers.1
943
- model.grounding_encoder.mask_decoder.iou_prediction_head.layers.2
944
- model.text_hidden_fcs
945
- model.text_hidden_fcs.0
946
- model.text_hidden_fcs.0.0
947
- model.text_hidden_fcs.0.1
948
- model.text_hidden_fcs.0.2
949
- model.text_hidden_fcs.0.3
950
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/THUDM/cogvlm-chat-hf.txt DELETED
@@ -1,1352 +0,0 @@
1
-
2
- model
3
- model.embed_tokens
4
- model.layers
5
- model.layers.0
6
- model.layers.0.self_attn
7
- model.layers.0.self_attn.rotary_emb
8
- model.layers.0.self_attn.vision_expert_query_key_value
9
- model.layers.0.self_attn.vision_expert_dense
10
- model.layers.0.self_attn.language_expert_query_key_value
11
- model.layers.0.self_attn.language_expert_dense
12
- model.layers.0.mlp
13
- model.layers.0.mlp.language_mlp
14
- model.layers.0.mlp.language_mlp.gate_proj
15
- model.layers.0.mlp.language_mlp.up_proj
16
- model.layers.0.mlp.language_mlp.down_proj
17
- model.layers.0.mlp.language_mlp.act_fn
18
- model.layers.0.mlp.vision_mlp
19
- model.layers.0.mlp.vision_mlp.gate_proj
20
- model.layers.0.mlp.vision_mlp.up_proj
21
- model.layers.0.mlp.vision_mlp.down_proj
22
- model.layers.0.mlp.vision_mlp.act_fn
23
- model.layers.0.input_layernorm
24
- model.layers.0.post_attention_layernorm
25
- model.layers.1
26
- model.layers.1.self_attn
27
- model.layers.1.self_attn.rotary_emb
28
- model.layers.1.self_attn.vision_expert_query_key_value
29
- model.layers.1.self_attn.vision_expert_dense
30
- model.layers.1.self_attn.language_expert_query_key_value
31
- model.layers.1.self_attn.language_expert_dense
32
- model.layers.1.mlp
33
- model.layers.1.mlp.language_mlp
34
- model.layers.1.mlp.language_mlp.gate_proj
35
- model.layers.1.mlp.language_mlp.up_proj
36
- model.layers.1.mlp.language_mlp.down_proj
37
- model.layers.1.mlp.language_mlp.act_fn
38
- model.layers.1.mlp.vision_mlp
39
- model.layers.1.mlp.vision_mlp.gate_proj
40
- model.layers.1.mlp.vision_mlp.up_proj
41
- model.layers.1.mlp.vision_mlp.down_proj
42
- model.layers.1.mlp.vision_mlp.act_fn
43
- model.layers.1.input_layernorm
44
- model.layers.1.post_attention_layernorm
45
- model.layers.2
46
- model.layers.2.self_attn
47
- model.layers.2.self_attn.rotary_emb
48
- model.layers.2.self_attn.vision_expert_query_key_value
49
- model.layers.2.self_attn.vision_expert_dense
50
- model.layers.2.self_attn.language_expert_query_key_value
51
- model.layers.2.self_attn.language_expert_dense
52
- model.layers.2.mlp
53
- model.layers.2.mlp.language_mlp
54
- model.layers.2.mlp.language_mlp.gate_proj
55
- model.layers.2.mlp.language_mlp.up_proj
56
- model.layers.2.mlp.language_mlp.down_proj
57
- model.layers.2.mlp.language_mlp.act_fn
58
- model.layers.2.mlp.vision_mlp
59
- model.layers.2.mlp.vision_mlp.gate_proj
60
- model.layers.2.mlp.vision_mlp.up_proj
61
- model.layers.2.mlp.vision_mlp.down_proj
62
- model.layers.2.mlp.vision_mlp.act_fn
63
- model.layers.2.input_layernorm
64
- model.layers.2.post_attention_layernorm
65
- model.layers.3
66
- model.layers.3.self_attn
67
- model.layers.3.self_attn.rotary_emb
68
- model.layers.3.self_attn.vision_expert_query_key_value
69
- model.layers.3.self_attn.vision_expert_dense
70
- model.layers.3.self_attn.language_expert_query_key_value
71
- model.layers.3.self_attn.language_expert_dense
72
- model.layers.3.mlp
73
- model.layers.3.mlp.language_mlp
74
- model.layers.3.mlp.language_mlp.gate_proj
75
- model.layers.3.mlp.language_mlp.up_proj
76
- model.layers.3.mlp.language_mlp.down_proj
77
- model.layers.3.mlp.language_mlp.act_fn
78
- model.layers.3.mlp.vision_mlp
79
- model.layers.3.mlp.vision_mlp.gate_proj
80
- model.layers.3.mlp.vision_mlp.up_proj
81
- model.layers.3.mlp.vision_mlp.down_proj
82
- model.layers.3.mlp.vision_mlp.act_fn
83
- model.layers.3.input_layernorm
84
- model.layers.3.post_attention_layernorm
85
- model.layers.4
86
- model.layers.4.self_attn
87
- model.layers.4.self_attn.rotary_emb
88
- model.layers.4.self_attn.vision_expert_query_key_value
89
- model.layers.4.self_attn.vision_expert_dense
90
- model.layers.4.self_attn.language_expert_query_key_value
91
- model.layers.4.self_attn.language_expert_dense
92
- model.layers.4.mlp
93
- model.layers.4.mlp.language_mlp
94
- model.layers.4.mlp.language_mlp.gate_proj
95
- model.layers.4.mlp.language_mlp.up_proj
96
- model.layers.4.mlp.language_mlp.down_proj
97
- model.layers.4.mlp.language_mlp.act_fn
98
- model.layers.4.mlp.vision_mlp
99
- model.layers.4.mlp.vision_mlp.gate_proj
100
- model.layers.4.mlp.vision_mlp.up_proj
101
- model.layers.4.mlp.vision_mlp.down_proj
102
- model.layers.4.mlp.vision_mlp.act_fn
103
- model.layers.4.input_layernorm
104
- model.layers.4.post_attention_layernorm
105
- model.layers.5
106
- model.layers.5.self_attn
107
- model.layers.5.self_attn.rotary_emb
108
- model.layers.5.self_attn.vision_expert_query_key_value
109
- model.layers.5.self_attn.vision_expert_dense
110
- model.layers.5.self_attn.language_expert_query_key_value
111
- model.layers.5.self_attn.language_expert_dense
112
- model.layers.5.mlp
113
- model.layers.5.mlp.language_mlp
114
- model.layers.5.mlp.language_mlp.gate_proj
115
- model.layers.5.mlp.language_mlp.up_proj
116
- model.layers.5.mlp.language_mlp.down_proj
117
- model.layers.5.mlp.language_mlp.act_fn
118
- model.layers.5.mlp.vision_mlp
119
- model.layers.5.mlp.vision_mlp.gate_proj
120
- model.layers.5.mlp.vision_mlp.up_proj
121
- model.layers.5.mlp.vision_mlp.down_proj
122
- model.layers.5.mlp.vision_mlp.act_fn
123
- model.layers.5.input_layernorm
124
- model.layers.5.post_attention_layernorm
125
- model.layers.6
126
- model.layers.6.self_attn
127
- model.layers.6.self_attn.rotary_emb
128
- model.layers.6.self_attn.vision_expert_query_key_value
129
- model.layers.6.self_attn.vision_expert_dense
130
- model.layers.6.self_attn.language_expert_query_key_value
131
- model.layers.6.self_attn.language_expert_dense
132
- model.layers.6.mlp
133
- model.layers.6.mlp.language_mlp
134
- model.layers.6.mlp.language_mlp.gate_proj
135
- model.layers.6.mlp.language_mlp.up_proj
136
- model.layers.6.mlp.language_mlp.down_proj
137
- model.layers.6.mlp.language_mlp.act_fn
138
- model.layers.6.mlp.vision_mlp
139
- model.layers.6.mlp.vision_mlp.gate_proj
140
- model.layers.6.mlp.vision_mlp.up_proj
141
- model.layers.6.mlp.vision_mlp.down_proj
142
- model.layers.6.mlp.vision_mlp.act_fn
143
- model.layers.6.input_layernorm
144
- model.layers.6.post_attention_layernorm
145
- model.layers.7
146
- model.layers.7.self_attn
147
- model.layers.7.self_attn.rotary_emb
148
- model.layers.7.self_attn.vision_expert_query_key_value
149
- model.layers.7.self_attn.vision_expert_dense
150
- model.layers.7.self_attn.language_expert_query_key_value
151
- model.layers.7.self_attn.language_expert_dense
152
- model.layers.7.mlp
153
- model.layers.7.mlp.language_mlp
154
- model.layers.7.mlp.language_mlp.gate_proj
155
- model.layers.7.mlp.language_mlp.up_proj
156
- model.layers.7.mlp.language_mlp.down_proj
157
- model.layers.7.mlp.language_mlp.act_fn
158
- model.layers.7.mlp.vision_mlp
159
- model.layers.7.mlp.vision_mlp.gate_proj
160
- model.layers.7.mlp.vision_mlp.up_proj
161
- model.layers.7.mlp.vision_mlp.down_proj
162
- model.layers.7.mlp.vision_mlp.act_fn
163
- model.layers.7.input_layernorm
164
- model.layers.7.post_attention_layernorm
165
- model.layers.8
166
- model.layers.8.self_attn
167
- model.layers.8.self_attn.rotary_emb
168
- model.layers.8.self_attn.vision_expert_query_key_value
169
- model.layers.8.self_attn.vision_expert_dense
170
- model.layers.8.self_attn.language_expert_query_key_value
171
- model.layers.8.self_attn.language_expert_dense
172
- model.layers.8.mlp
173
- model.layers.8.mlp.language_mlp
174
- model.layers.8.mlp.language_mlp.gate_proj
175
- model.layers.8.mlp.language_mlp.up_proj
176
- model.layers.8.mlp.language_mlp.down_proj
177
- model.layers.8.mlp.language_mlp.act_fn
178
- model.layers.8.mlp.vision_mlp
179
- model.layers.8.mlp.vision_mlp.gate_proj
180
- model.layers.8.mlp.vision_mlp.up_proj
181
- model.layers.8.mlp.vision_mlp.down_proj
182
- model.layers.8.mlp.vision_mlp.act_fn
183
- model.layers.8.input_layernorm
184
- model.layers.8.post_attention_layernorm
185
- model.layers.9
186
- model.layers.9.self_attn
187
- model.layers.9.self_attn.rotary_emb
188
- model.layers.9.self_attn.vision_expert_query_key_value
189
- model.layers.9.self_attn.vision_expert_dense
190
- model.layers.9.self_attn.language_expert_query_key_value
191
- model.layers.9.self_attn.language_expert_dense
192
- model.layers.9.mlp
193
- model.layers.9.mlp.language_mlp
194
- model.layers.9.mlp.language_mlp.gate_proj
195
- model.layers.9.mlp.language_mlp.up_proj
196
- model.layers.9.mlp.language_mlp.down_proj
197
- model.layers.9.mlp.language_mlp.act_fn
198
- model.layers.9.mlp.vision_mlp
199
- model.layers.9.mlp.vision_mlp.gate_proj
200
- model.layers.9.mlp.vision_mlp.up_proj
201
- model.layers.9.mlp.vision_mlp.down_proj
202
- model.layers.9.mlp.vision_mlp.act_fn
203
- model.layers.9.input_layernorm
204
- model.layers.9.post_attention_layernorm
205
- model.layers.10
206
- model.layers.10.self_attn
207
- model.layers.10.self_attn.rotary_emb
208
- model.layers.10.self_attn.vision_expert_query_key_value
209
- model.layers.10.self_attn.vision_expert_dense
210
- model.layers.10.self_attn.language_expert_query_key_value
211
- model.layers.10.self_attn.language_expert_dense
212
- model.layers.10.mlp
213
- model.layers.10.mlp.language_mlp
214
- model.layers.10.mlp.language_mlp.gate_proj
215
- model.layers.10.mlp.language_mlp.up_proj
216
- model.layers.10.mlp.language_mlp.down_proj
217
- model.layers.10.mlp.language_mlp.act_fn
218
- model.layers.10.mlp.vision_mlp
219
- model.layers.10.mlp.vision_mlp.gate_proj
220
- model.layers.10.mlp.vision_mlp.up_proj
221
- model.layers.10.mlp.vision_mlp.down_proj
222
- model.layers.10.mlp.vision_mlp.act_fn
223
- model.layers.10.input_layernorm
224
- model.layers.10.post_attention_layernorm
225
- model.layers.11
226
- model.layers.11.self_attn
227
- model.layers.11.self_attn.rotary_emb
228
- model.layers.11.self_attn.vision_expert_query_key_value
229
- model.layers.11.self_attn.vision_expert_dense
230
- model.layers.11.self_attn.language_expert_query_key_value
231
- model.layers.11.self_attn.language_expert_dense
232
- model.layers.11.mlp
233
- model.layers.11.mlp.language_mlp
234
- model.layers.11.mlp.language_mlp.gate_proj
235
- model.layers.11.mlp.language_mlp.up_proj
236
- model.layers.11.mlp.language_mlp.down_proj
237
- model.layers.11.mlp.language_mlp.act_fn
238
- model.layers.11.mlp.vision_mlp
239
- model.layers.11.mlp.vision_mlp.gate_proj
240
- model.layers.11.mlp.vision_mlp.up_proj
241
- model.layers.11.mlp.vision_mlp.down_proj
242
- model.layers.11.mlp.vision_mlp.act_fn
243
- model.layers.11.input_layernorm
244
- model.layers.11.post_attention_layernorm
245
- model.layers.12
246
- model.layers.12.self_attn
247
- model.layers.12.self_attn.rotary_emb
248
- model.layers.12.self_attn.vision_expert_query_key_value
249
- model.layers.12.self_attn.vision_expert_dense
250
- model.layers.12.self_attn.language_expert_query_key_value
251
- model.layers.12.self_attn.language_expert_dense
252
- model.layers.12.mlp
253
- model.layers.12.mlp.language_mlp
254
- model.layers.12.mlp.language_mlp.gate_proj
255
- model.layers.12.mlp.language_mlp.up_proj
256
- model.layers.12.mlp.language_mlp.down_proj
257
- model.layers.12.mlp.language_mlp.act_fn
258
- model.layers.12.mlp.vision_mlp
259
- model.layers.12.mlp.vision_mlp.gate_proj
260
- model.layers.12.mlp.vision_mlp.up_proj
261
- model.layers.12.mlp.vision_mlp.down_proj
262
- model.layers.12.mlp.vision_mlp.act_fn
263
- model.layers.12.input_layernorm
264
- model.layers.12.post_attention_layernorm
265
- model.layers.13
266
- model.layers.13.self_attn
267
- model.layers.13.self_attn.rotary_emb
268
- model.layers.13.self_attn.vision_expert_query_key_value
269
- model.layers.13.self_attn.vision_expert_dense
270
- model.layers.13.self_attn.language_expert_query_key_value
271
- model.layers.13.self_attn.language_expert_dense
272
- model.layers.13.mlp
273
- model.layers.13.mlp.language_mlp
274
- model.layers.13.mlp.language_mlp.gate_proj
275
- model.layers.13.mlp.language_mlp.up_proj
276
- model.layers.13.mlp.language_mlp.down_proj
277
- model.layers.13.mlp.language_mlp.act_fn
278
- model.layers.13.mlp.vision_mlp
279
- model.layers.13.mlp.vision_mlp.gate_proj
280
- model.layers.13.mlp.vision_mlp.up_proj
281
- model.layers.13.mlp.vision_mlp.down_proj
282
- model.layers.13.mlp.vision_mlp.act_fn
283
- model.layers.13.input_layernorm
284
- model.layers.13.post_attention_layernorm
285
- model.layers.14
286
- model.layers.14.self_attn
287
- model.layers.14.self_attn.rotary_emb
288
- model.layers.14.self_attn.vision_expert_query_key_value
289
- model.layers.14.self_attn.vision_expert_dense
290
- model.layers.14.self_attn.language_expert_query_key_value
291
- model.layers.14.self_attn.language_expert_dense
292
- model.layers.14.mlp
293
- model.layers.14.mlp.language_mlp
294
- model.layers.14.mlp.language_mlp.gate_proj
295
- model.layers.14.mlp.language_mlp.up_proj
296
- model.layers.14.mlp.language_mlp.down_proj
297
- model.layers.14.mlp.language_mlp.act_fn
298
- model.layers.14.mlp.vision_mlp
299
- model.layers.14.mlp.vision_mlp.gate_proj
300
- model.layers.14.mlp.vision_mlp.up_proj
301
- model.layers.14.mlp.vision_mlp.down_proj
302
- model.layers.14.mlp.vision_mlp.act_fn
303
- model.layers.14.input_layernorm
304
- model.layers.14.post_attention_layernorm
305
- model.layers.15
306
- model.layers.15.self_attn
307
- model.layers.15.self_attn.rotary_emb
308
- model.layers.15.self_attn.vision_expert_query_key_value
309
- model.layers.15.self_attn.vision_expert_dense
310
- model.layers.15.self_attn.language_expert_query_key_value
311
- model.layers.15.self_attn.language_expert_dense
312
- model.layers.15.mlp
313
- model.layers.15.mlp.language_mlp
314
- model.layers.15.mlp.language_mlp.gate_proj
315
- model.layers.15.mlp.language_mlp.up_proj
316
- model.layers.15.mlp.language_mlp.down_proj
317
- model.layers.15.mlp.language_mlp.act_fn
318
- model.layers.15.mlp.vision_mlp
319
- model.layers.15.mlp.vision_mlp.gate_proj
320
- model.layers.15.mlp.vision_mlp.up_proj
321
- model.layers.15.mlp.vision_mlp.down_proj
322
- model.layers.15.mlp.vision_mlp.act_fn
323
- model.layers.15.input_layernorm
324
- model.layers.15.post_attention_layernorm
325
- model.layers.16
326
- model.layers.16.self_attn
327
- model.layers.16.self_attn.rotary_emb
328
- model.layers.16.self_attn.vision_expert_query_key_value
329
- model.layers.16.self_attn.vision_expert_dense
330
- model.layers.16.self_attn.language_expert_query_key_value
331
- model.layers.16.self_attn.language_expert_dense
332
- model.layers.16.mlp
333
- model.layers.16.mlp.language_mlp
334
- model.layers.16.mlp.language_mlp.gate_proj
335
- model.layers.16.mlp.language_mlp.up_proj
336
- model.layers.16.mlp.language_mlp.down_proj
337
- model.layers.16.mlp.language_mlp.act_fn
338
- model.layers.16.mlp.vision_mlp
339
- model.layers.16.mlp.vision_mlp.gate_proj
340
- model.layers.16.mlp.vision_mlp.up_proj
341
- model.layers.16.mlp.vision_mlp.down_proj
342
- model.layers.16.mlp.vision_mlp.act_fn
343
- model.layers.16.input_layernorm
344
- model.layers.16.post_attention_layernorm
345
- model.layers.17
346
- model.layers.17.self_attn
347
- model.layers.17.self_attn.rotary_emb
348
- model.layers.17.self_attn.vision_expert_query_key_value
349
- model.layers.17.self_attn.vision_expert_dense
350
- model.layers.17.self_attn.language_expert_query_key_value
351
- model.layers.17.self_attn.language_expert_dense
352
- model.layers.17.mlp
353
- model.layers.17.mlp.language_mlp
354
- model.layers.17.mlp.language_mlp.gate_proj
355
- model.layers.17.mlp.language_mlp.up_proj
356
- model.layers.17.mlp.language_mlp.down_proj
357
- model.layers.17.mlp.language_mlp.act_fn
358
- model.layers.17.mlp.vision_mlp
359
- model.layers.17.mlp.vision_mlp.gate_proj
360
- model.layers.17.mlp.vision_mlp.up_proj
361
- model.layers.17.mlp.vision_mlp.down_proj
362
- model.layers.17.mlp.vision_mlp.act_fn
363
- model.layers.17.input_layernorm
364
- model.layers.17.post_attention_layernorm
365
- model.layers.18
366
- model.layers.18.self_attn
367
- model.layers.18.self_attn.rotary_emb
368
- model.layers.18.self_attn.vision_expert_query_key_value
369
- model.layers.18.self_attn.vision_expert_dense
370
- model.layers.18.self_attn.language_expert_query_key_value
371
- model.layers.18.self_attn.language_expert_dense
372
- model.layers.18.mlp
373
- model.layers.18.mlp.language_mlp
374
- model.layers.18.mlp.language_mlp.gate_proj
375
- model.layers.18.mlp.language_mlp.up_proj
376
- model.layers.18.mlp.language_mlp.down_proj
377
- model.layers.18.mlp.language_mlp.act_fn
378
- model.layers.18.mlp.vision_mlp
379
- model.layers.18.mlp.vision_mlp.gate_proj
380
- model.layers.18.mlp.vision_mlp.up_proj
381
- model.layers.18.mlp.vision_mlp.down_proj
382
- model.layers.18.mlp.vision_mlp.act_fn
383
- model.layers.18.input_layernorm
384
- model.layers.18.post_attention_layernorm
385
- model.layers.19
386
- model.layers.19.self_attn
387
- model.layers.19.self_attn.rotary_emb
388
- model.layers.19.self_attn.vision_expert_query_key_value
389
- model.layers.19.self_attn.vision_expert_dense
390
- model.layers.19.self_attn.language_expert_query_key_value
391
- model.layers.19.self_attn.language_expert_dense
392
- model.layers.19.mlp
393
- model.layers.19.mlp.language_mlp
394
- model.layers.19.mlp.language_mlp.gate_proj
395
- model.layers.19.mlp.language_mlp.up_proj
396
- model.layers.19.mlp.language_mlp.down_proj
397
- model.layers.19.mlp.language_mlp.act_fn
398
- model.layers.19.mlp.vision_mlp
399
- model.layers.19.mlp.vision_mlp.gate_proj
400
- model.layers.19.mlp.vision_mlp.up_proj
401
- model.layers.19.mlp.vision_mlp.down_proj
402
- model.layers.19.mlp.vision_mlp.act_fn
403
- model.layers.19.input_layernorm
404
- model.layers.19.post_attention_layernorm
405
- model.layers.20
406
- model.layers.20.self_attn
407
- model.layers.20.self_attn.rotary_emb
408
- model.layers.20.self_attn.vision_expert_query_key_value
409
- model.layers.20.self_attn.vision_expert_dense
410
- model.layers.20.self_attn.language_expert_query_key_value
411
- model.layers.20.self_attn.language_expert_dense
412
- model.layers.20.mlp
413
- model.layers.20.mlp.language_mlp
414
- model.layers.20.mlp.language_mlp.gate_proj
415
- model.layers.20.mlp.language_mlp.up_proj
416
- model.layers.20.mlp.language_mlp.down_proj
417
- model.layers.20.mlp.language_mlp.act_fn
418
- model.layers.20.mlp.vision_mlp
419
- model.layers.20.mlp.vision_mlp.gate_proj
420
- model.layers.20.mlp.vision_mlp.up_proj
421
- model.layers.20.mlp.vision_mlp.down_proj
422
- model.layers.20.mlp.vision_mlp.act_fn
423
- model.layers.20.input_layernorm
424
- model.layers.20.post_attention_layernorm
425
- model.layers.21
426
- model.layers.21.self_attn
427
- model.layers.21.self_attn.rotary_emb
428
- model.layers.21.self_attn.vision_expert_query_key_value
429
- model.layers.21.self_attn.vision_expert_dense
430
- model.layers.21.self_attn.language_expert_query_key_value
431
- model.layers.21.self_attn.language_expert_dense
432
- model.layers.21.mlp
433
- model.layers.21.mlp.language_mlp
434
- model.layers.21.mlp.language_mlp.gate_proj
435
- model.layers.21.mlp.language_mlp.up_proj
436
- model.layers.21.mlp.language_mlp.down_proj
437
- model.layers.21.mlp.language_mlp.act_fn
438
- model.layers.21.mlp.vision_mlp
439
- model.layers.21.mlp.vision_mlp.gate_proj
440
- model.layers.21.mlp.vision_mlp.up_proj
441
- model.layers.21.mlp.vision_mlp.down_proj
442
- model.layers.21.mlp.vision_mlp.act_fn
443
- model.layers.21.input_layernorm
444
- model.layers.21.post_attention_layernorm
445
- model.layers.22
446
- model.layers.22.self_attn
447
- model.layers.22.self_attn.rotary_emb
448
- model.layers.22.self_attn.vision_expert_query_key_value
449
- model.layers.22.self_attn.vision_expert_dense
450
- model.layers.22.self_attn.language_expert_query_key_value
451
- model.layers.22.self_attn.language_expert_dense
452
- model.layers.22.mlp
453
- model.layers.22.mlp.language_mlp
454
- model.layers.22.mlp.language_mlp.gate_proj
455
- model.layers.22.mlp.language_mlp.up_proj
456
- model.layers.22.mlp.language_mlp.down_proj
457
- model.layers.22.mlp.language_mlp.act_fn
458
- model.layers.22.mlp.vision_mlp
459
- model.layers.22.mlp.vision_mlp.gate_proj
460
- model.layers.22.mlp.vision_mlp.up_proj
461
- model.layers.22.mlp.vision_mlp.down_proj
462
- model.layers.22.mlp.vision_mlp.act_fn
463
- model.layers.22.input_layernorm
464
- model.layers.22.post_attention_layernorm
465
- model.layers.23
466
- model.layers.23.self_attn
467
- model.layers.23.self_attn.rotary_emb
468
- model.layers.23.self_attn.vision_expert_query_key_value
469
- model.layers.23.self_attn.vision_expert_dense
470
- model.layers.23.self_attn.language_expert_query_key_value
471
- model.layers.23.self_attn.language_expert_dense
472
- model.layers.23.mlp
473
- model.layers.23.mlp.language_mlp
474
- model.layers.23.mlp.language_mlp.gate_proj
475
- model.layers.23.mlp.language_mlp.up_proj
476
- model.layers.23.mlp.language_mlp.down_proj
477
- model.layers.23.mlp.language_mlp.act_fn
478
- model.layers.23.mlp.vision_mlp
479
- model.layers.23.mlp.vision_mlp.gate_proj
480
- model.layers.23.mlp.vision_mlp.up_proj
481
- model.layers.23.mlp.vision_mlp.down_proj
482
- model.layers.23.mlp.vision_mlp.act_fn
483
- model.layers.23.input_layernorm
484
- model.layers.23.post_attention_layernorm
485
- model.layers.24
486
- model.layers.24.self_attn
487
- model.layers.24.self_attn.rotary_emb
488
- model.layers.24.self_attn.vision_expert_query_key_value
489
- model.layers.24.self_attn.vision_expert_dense
490
- model.layers.24.self_attn.language_expert_query_key_value
491
- model.layers.24.self_attn.language_expert_dense
492
- model.layers.24.mlp
493
- model.layers.24.mlp.language_mlp
494
- model.layers.24.mlp.language_mlp.gate_proj
495
- model.layers.24.mlp.language_mlp.up_proj
496
- model.layers.24.mlp.language_mlp.down_proj
497
- model.layers.24.mlp.language_mlp.act_fn
498
- model.layers.24.mlp.vision_mlp
499
- model.layers.24.mlp.vision_mlp.gate_proj
500
- model.layers.24.mlp.vision_mlp.up_proj
501
- model.layers.24.mlp.vision_mlp.down_proj
502
- model.layers.24.mlp.vision_mlp.act_fn
503
- model.layers.24.input_layernorm
504
- model.layers.24.post_attention_layernorm
505
- model.layers.25
506
- model.layers.25.self_attn
507
- model.layers.25.self_attn.rotary_emb
508
- model.layers.25.self_attn.vision_expert_query_key_value
509
- model.layers.25.self_attn.vision_expert_dense
510
- model.layers.25.self_attn.language_expert_query_key_value
511
- model.layers.25.self_attn.language_expert_dense
512
- model.layers.25.mlp
513
- model.layers.25.mlp.language_mlp
514
- model.layers.25.mlp.language_mlp.gate_proj
515
- model.layers.25.mlp.language_mlp.up_proj
516
- model.layers.25.mlp.language_mlp.down_proj
517
- model.layers.25.mlp.language_mlp.act_fn
518
- model.layers.25.mlp.vision_mlp
519
- model.layers.25.mlp.vision_mlp.gate_proj
520
- model.layers.25.mlp.vision_mlp.up_proj
521
- model.layers.25.mlp.vision_mlp.down_proj
522
- model.layers.25.mlp.vision_mlp.act_fn
523
- model.layers.25.input_layernorm
524
- model.layers.25.post_attention_layernorm
525
- model.layers.26
526
- model.layers.26.self_attn
527
- model.layers.26.self_attn.rotary_emb
528
- model.layers.26.self_attn.vision_expert_query_key_value
529
- model.layers.26.self_attn.vision_expert_dense
530
- model.layers.26.self_attn.language_expert_query_key_value
531
- model.layers.26.self_attn.language_expert_dense
532
- model.layers.26.mlp
533
- model.layers.26.mlp.language_mlp
534
- model.layers.26.mlp.language_mlp.gate_proj
535
- model.layers.26.mlp.language_mlp.up_proj
536
- model.layers.26.mlp.language_mlp.down_proj
537
- model.layers.26.mlp.language_mlp.act_fn
538
- model.layers.26.mlp.vision_mlp
539
- model.layers.26.mlp.vision_mlp.gate_proj
540
- model.layers.26.mlp.vision_mlp.up_proj
541
- model.layers.26.mlp.vision_mlp.down_proj
542
- model.layers.26.mlp.vision_mlp.act_fn
543
- model.layers.26.input_layernorm
544
- model.layers.26.post_attention_layernorm
545
- model.layers.27
546
- model.layers.27.self_attn
547
- model.layers.27.self_attn.rotary_emb
548
- model.layers.27.self_attn.vision_expert_query_key_value
549
- model.layers.27.self_attn.vision_expert_dense
550
- model.layers.27.self_attn.language_expert_query_key_value
551
- model.layers.27.self_attn.language_expert_dense
552
- model.layers.27.mlp
553
- model.layers.27.mlp.language_mlp
554
- model.layers.27.mlp.language_mlp.gate_proj
555
- model.layers.27.mlp.language_mlp.up_proj
556
- model.layers.27.mlp.language_mlp.down_proj
557
- model.layers.27.mlp.language_mlp.act_fn
558
- model.layers.27.mlp.vision_mlp
559
- model.layers.27.mlp.vision_mlp.gate_proj
560
- model.layers.27.mlp.vision_mlp.up_proj
561
- model.layers.27.mlp.vision_mlp.down_proj
562
- model.layers.27.mlp.vision_mlp.act_fn
563
- model.layers.27.input_layernorm
564
- model.layers.27.post_attention_layernorm
565
- model.layers.28
566
- model.layers.28.self_attn
567
- model.layers.28.self_attn.rotary_emb
568
- model.layers.28.self_attn.vision_expert_query_key_value
569
- model.layers.28.self_attn.vision_expert_dense
570
- model.layers.28.self_attn.language_expert_query_key_value
571
- model.layers.28.self_attn.language_expert_dense
572
- model.layers.28.mlp
573
- model.layers.28.mlp.language_mlp
574
- model.layers.28.mlp.language_mlp.gate_proj
575
- model.layers.28.mlp.language_mlp.up_proj
576
- model.layers.28.mlp.language_mlp.down_proj
577
- model.layers.28.mlp.language_mlp.act_fn
578
- model.layers.28.mlp.vision_mlp
579
- model.layers.28.mlp.vision_mlp.gate_proj
580
- model.layers.28.mlp.vision_mlp.up_proj
581
- model.layers.28.mlp.vision_mlp.down_proj
582
- model.layers.28.mlp.vision_mlp.act_fn
583
- model.layers.28.input_layernorm
584
- model.layers.28.post_attention_layernorm
585
- model.layers.29
586
- model.layers.29.self_attn
587
- model.layers.29.self_attn.rotary_emb
588
- model.layers.29.self_attn.vision_expert_query_key_value
589
- model.layers.29.self_attn.vision_expert_dense
590
- model.layers.29.self_attn.language_expert_query_key_value
591
- model.layers.29.self_attn.language_expert_dense
592
- model.layers.29.mlp
593
- model.layers.29.mlp.language_mlp
594
- model.layers.29.mlp.language_mlp.gate_proj
595
- model.layers.29.mlp.language_mlp.up_proj
596
- model.layers.29.mlp.language_mlp.down_proj
597
- model.layers.29.mlp.language_mlp.act_fn
598
- model.layers.29.mlp.vision_mlp
599
- model.layers.29.mlp.vision_mlp.gate_proj
600
- model.layers.29.mlp.vision_mlp.up_proj
601
- model.layers.29.mlp.vision_mlp.down_proj
602
- model.layers.29.mlp.vision_mlp.act_fn
603
- model.layers.29.input_layernorm
604
- model.layers.29.post_attention_layernorm
605
- model.layers.30
606
- model.layers.30.self_attn
607
- model.layers.30.self_attn.rotary_emb
608
- model.layers.30.self_attn.vision_expert_query_key_value
609
- model.layers.30.self_attn.vision_expert_dense
610
- model.layers.30.self_attn.language_expert_query_key_value
611
- model.layers.30.self_attn.language_expert_dense
612
- model.layers.30.mlp
613
- model.layers.30.mlp.language_mlp
614
- model.layers.30.mlp.language_mlp.gate_proj
615
- model.layers.30.mlp.language_mlp.up_proj
616
- model.layers.30.mlp.language_mlp.down_proj
617
- model.layers.30.mlp.language_mlp.act_fn
618
- model.layers.30.mlp.vision_mlp
619
- model.layers.30.mlp.vision_mlp.gate_proj
620
- model.layers.30.mlp.vision_mlp.up_proj
621
- model.layers.30.mlp.vision_mlp.down_proj
622
- model.layers.30.mlp.vision_mlp.act_fn
623
- model.layers.30.input_layernorm
624
- model.layers.30.post_attention_layernorm
625
- model.layers.31
626
- model.layers.31.self_attn
627
- model.layers.31.self_attn.rotary_emb
628
- model.layers.31.self_attn.vision_expert_query_key_value
629
- model.layers.31.self_attn.vision_expert_dense
630
- model.layers.31.self_attn.language_expert_query_key_value
631
- model.layers.31.self_attn.language_expert_dense
632
- model.layers.31.mlp
633
- model.layers.31.mlp.language_mlp
634
- model.layers.31.mlp.language_mlp.gate_proj
635
- model.layers.31.mlp.language_mlp.up_proj
636
- model.layers.31.mlp.language_mlp.down_proj
637
- model.layers.31.mlp.language_mlp.act_fn
638
- model.layers.31.mlp.vision_mlp
639
- model.layers.31.mlp.vision_mlp.gate_proj
640
- model.layers.31.mlp.vision_mlp.up_proj
641
- model.layers.31.mlp.vision_mlp.down_proj
642
- model.layers.31.mlp.vision_mlp.act_fn
643
- model.layers.31.input_layernorm
644
- model.layers.31.post_attention_layernorm
645
- model.norm
646
- model.vision
647
- model.vision.patch_embedding
648
- model.vision.patch_embedding.proj
649
- model.vision.patch_embedding.position_embedding
650
- model.vision.transformer
651
- model.vision.transformer.layers
652
- model.vision.transformer.layers.0
653
- model.vision.transformer.layers.0.input_layernorm
654
- model.vision.transformer.layers.0.attention
655
- model.vision.transformer.layers.0.attention.query_key_value
656
- model.vision.transformer.layers.0.attention.dense
657
- model.vision.transformer.layers.0.attention.output_dropout
658
- model.vision.transformer.layers.0.mlp
659
- model.vision.transformer.layers.0.mlp.activation_fn
660
- model.vision.transformer.layers.0.mlp.fc1
661
- model.vision.transformer.layers.0.mlp.fc2
662
- model.vision.transformer.layers.0.post_attention_layernorm
663
- model.vision.transformer.layers.1
664
- model.vision.transformer.layers.1.input_layernorm
665
- model.vision.transformer.layers.1.attention
666
- model.vision.transformer.layers.1.attention.query_key_value
667
- model.vision.transformer.layers.1.attention.dense
668
- model.vision.transformer.layers.1.attention.output_dropout
669
- model.vision.transformer.layers.1.mlp
670
- model.vision.transformer.layers.1.mlp.activation_fn
671
- model.vision.transformer.layers.1.mlp.fc1
672
- model.vision.transformer.layers.1.mlp.fc2
673
- model.vision.transformer.layers.1.post_attention_layernorm
674
- model.vision.transformer.layers.2
675
- model.vision.transformer.layers.2.input_layernorm
676
- model.vision.transformer.layers.2.attention
677
- model.vision.transformer.layers.2.attention.query_key_value
678
- model.vision.transformer.layers.2.attention.dense
679
- model.vision.transformer.layers.2.attention.output_dropout
680
- model.vision.transformer.layers.2.mlp
681
- model.vision.transformer.layers.2.mlp.activation_fn
682
- model.vision.transformer.layers.2.mlp.fc1
683
- model.vision.transformer.layers.2.mlp.fc2
684
- model.vision.transformer.layers.2.post_attention_layernorm
685
- model.vision.transformer.layers.3
686
- model.vision.transformer.layers.3.input_layernorm
687
- model.vision.transformer.layers.3.attention
688
- model.vision.transformer.layers.3.attention.query_key_value
689
- model.vision.transformer.layers.3.attention.dense
690
- model.vision.transformer.layers.3.attention.output_dropout
691
- model.vision.transformer.layers.3.mlp
692
- model.vision.transformer.layers.3.mlp.activation_fn
693
- model.vision.transformer.layers.3.mlp.fc1
694
- model.vision.transformer.layers.3.mlp.fc2
695
- model.vision.transformer.layers.3.post_attention_layernorm
696
- model.vision.transformer.layers.4
697
- model.vision.transformer.layers.4.input_layernorm
698
- model.vision.transformer.layers.4.attention
699
- model.vision.transformer.layers.4.attention.query_key_value
700
- model.vision.transformer.layers.4.attention.dense
701
- model.vision.transformer.layers.4.attention.output_dropout
702
- model.vision.transformer.layers.4.mlp
703
- model.vision.transformer.layers.4.mlp.activation_fn
704
- model.vision.transformer.layers.4.mlp.fc1
705
- model.vision.transformer.layers.4.mlp.fc2
706
- model.vision.transformer.layers.4.post_attention_layernorm
707
- model.vision.transformer.layers.5
708
- model.vision.transformer.layers.5.input_layernorm
709
- model.vision.transformer.layers.5.attention
710
- model.vision.transformer.layers.5.attention.query_key_value
711
- model.vision.transformer.layers.5.attention.dense
712
- model.vision.transformer.layers.5.attention.output_dropout
713
- model.vision.transformer.layers.5.mlp
714
- model.vision.transformer.layers.5.mlp.activation_fn
715
- model.vision.transformer.layers.5.mlp.fc1
716
- model.vision.transformer.layers.5.mlp.fc2
717
- model.vision.transformer.layers.5.post_attention_layernorm
718
- model.vision.transformer.layers.6
719
- model.vision.transformer.layers.6.input_layernorm
720
- model.vision.transformer.layers.6.attention
721
- model.vision.transformer.layers.6.attention.query_key_value
722
- model.vision.transformer.layers.6.attention.dense
723
- model.vision.transformer.layers.6.attention.output_dropout
724
- model.vision.transformer.layers.6.mlp
725
- model.vision.transformer.layers.6.mlp.activation_fn
726
- model.vision.transformer.layers.6.mlp.fc1
727
- model.vision.transformer.layers.6.mlp.fc2
728
- model.vision.transformer.layers.6.post_attention_layernorm
729
- model.vision.transformer.layers.7
730
- model.vision.transformer.layers.7.input_layernorm
731
- model.vision.transformer.layers.7.attention
732
- model.vision.transformer.layers.7.attention.query_key_value
733
- model.vision.transformer.layers.7.attention.dense
734
- model.vision.transformer.layers.7.attention.output_dropout
735
- model.vision.transformer.layers.7.mlp
736
- model.vision.transformer.layers.7.mlp.activation_fn
737
- model.vision.transformer.layers.7.mlp.fc1
738
- model.vision.transformer.layers.7.mlp.fc2
739
- model.vision.transformer.layers.7.post_attention_layernorm
740
- model.vision.transformer.layers.8
741
- model.vision.transformer.layers.8.input_layernorm
742
- model.vision.transformer.layers.8.attention
743
- model.vision.transformer.layers.8.attention.query_key_value
744
- model.vision.transformer.layers.8.attention.dense
745
- model.vision.transformer.layers.8.attention.output_dropout
746
- model.vision.transformer.layers.8.mlp
747
- model.vision.transformer.layers.8.mlp.activation_fn
748
- model.vision.transformer.layers.8.mlp.fc1
749
- model.vision.transformer.layers.8.mlp.fc2
750
- model.vision.transformer.layers.8.post_attention_layernorm
751
- model.vision.transformer.layers.9
752
- model.vision.transformer.layers.9.input_layernorm
753
- model.vision.transformer.layers.9.attention
754
- model.vision.transformer.layers.9.attention.query_key_value
755
- model.vision.transformer.layers.9.attention.dense
756
- model.vision.transformer.layers.9.attention.output_dropout
757
- model.vision.transformer.layers.9.mlp
758
- model.vision.transformer.layers.9.mlp.activation_fn
759
- model.vision.transformer.layers.9.mlp.fc1
760
- model.vision.transformer.layers.9.mlp.fc2
761
- model.vision.transformer.layers.9.post_attention_layernorm
762
- model.vision.transformer.layers.10
763
- model.vision.transformer.layers.10.input_layernorm
764
- model.vision.transformer.layers.10.attention
765
- model.vision.transformer.layers.10.attention.query_key_value
766
- model.vision.transformer.layers.10.attention.dense
767
- model.vision.transformer.layers.10.attention.output_dropout
768
- model.vision.transformer.layers.10.mlp
769
- model.vision.transformer.layers.10.mlp.activation_fn
770
- model.vision.transformer.layers.10.mlp.fc1
771
- model.vision.transformer.layers.10.mlp.fc2
772
- model.vision.transformer.layers.10.post_attention_layernorm
773
- model.vision.transformer.layers.11
774
- model.vision.transformer.layers.11.input_layernorm
775
- model.vision.transformer.layers.11.attention
776
- model.vision.transformer.layers.11.attention.query_key_value
777
- model.vision.transformer.layers.11.attention.dense
778
- model.vision.transformer.layers.11.attention.output_dropout
779
- model.vision.transformer.layers.11.mlp
780
- model.vision.transformer.layers.11.mlp.activation_fn
781
- model.vision.transformer.layers.11.mlp.fc1
782
- model.vision.transformer.layers.11.mlp.fc2
783
- model.vision.transformer.layers.11.post_attention_layernorm
784
- model.vision.transformer.layers.12
785
- model.vision.transformer.layers.12.input_layernorm
786
- model.vision.transformer.layers.12.attention
787
- model.vision.transformer.layers.12.attention.query_key_value
788
- model.vision.transformer.layers.12.attention.dense
789
- model.vision.transformer.layers.12.attention.output_dropout
790
- model.vision.transformer.layers.12.mlp
791
- model.vision.transformer.layers.12.mlp.activation_fn
792
- model.vision.transformer.layers.12.mlp.fc1
793
- model.vision.transformer.layers.12.mlp.fc2
794
- model.vision.transformer.layers.12.post_attention_layernorm
795
- model.vision.transformer.layers.13
796
- model.vision.transformer.layers.13.input_layernorm
797
- model.vision.transformer.layers.13.attention
798
- model.vision.transformer.layers.13.attention.query_key_value
799
- model.vision.transformer.layers.13.attention.dense
800
- model.vision.transformer.layers.13.attention.output_dropout
801
- model.vision.transformer.layers.13.mlp
802
- model.vision.transformer.layers.13.mlp.activation_fn
803
- model.vision.transformer.layers.13.mlp.fc1
804
- model.vision.transformer.layers.13.mlp.fc2
805
- model.vision.transformer.layers.13.post_attention_layernorm
806
- model.vision.transformer.layers.14
807
- model.vision.transformer.layers.14.input_layernorm
808
- model.vision.transformer.layers.14.attention
809
- model.vision.transformer.layers.14.attention.query_key_value
810
- model.vision.transformer.layers.14.attention.dense
811
- model.vision.transformer.layers.14.attention.output_dropout
812
- model.vision.transformer.layers.14.mlp
813
- model.vision.transformer.layers.14.mlp.activation_fn
814
- model.vision.transformer.layers.14.mlp.fc1
815
- model.vision.transformer.layers.14.mlp.fc2
816
- model.vision.transformer.layers.14.post_attention_layernorm
817
- model.vision.transformer.layers.15
818
- model.vision.transformer.layers.15.input_layernorm
819
- model.vision.transformer.layers.15.attention
820
- model.vision.transformer.layers.15.attention.query_key_value
821
- model.vision.transformer.layers.15.attention.dense
822
- model.vision.transformer.layers.15.attention.output_dropout
823
- model.vision.transformer.layers.15.mlp
824
- model.vision.transformer.layers.15.mlp.activation_fn
825
- model.vision.transformer.layers.15.mlp.fc1
826
- model.vision.transformer.layers.15.mlp.fc2
827
- model.vision.transformer.layers.15.post_attention_layernorm
828
- model.vision.transformer.layers.16
829
- model.vision.transformer.layers.16.input_layernorm
830
- model.vision.transformer.layers.16.attention
831
- model.vision.transformer.layers.16.attention.query_key_value
832
- model.vision.transformer.layers.16.attention.dense
833
- model.vision.transformer.layers.16.attention.output_dropout
834
- model.vision.transformer.layers.16.mlp
835
- model.vision.transformer.layers.16.mlp.activation_fn
836
- model.vision.transformer.layers.16.mlp.fc1
837
- model.vision.transformer.layers.16.mlp.fc2
838
- model.vision.transformer.layers.16.post_attention_layernorm
839
- model.vision.transformer.layers.17
840
- model.vision.transformer.layers.17.input_layernorm
841
- model.vision.transformer.layers.17.attention
842
- model.vision.transformer.layers.17.attention.query_key_value
843
- model.vision.transformer.layers.17.attention.dense
844
- model.vision.transformer.layers.17.attention.output_dropout
845
- model.vision.transformer.layers.17.mlp
846
- model.vision.transformer.layers.17.mlp.activation_fn
847
- model.vision.transformer.layers.17.mlp.fc1
848
- model.vision.transformer.layers.17.mlp.fc2
849
- model.vision.transformer.layers.17.post_attention_layernorm
850
- model.vision.transformer.layers.18
851
- model.vision.transformer.layers.18.input_layernorm
852
- model.vision.transformer.layers.18.attention
853
- model.vision.transformer.layers.18.attention.query_key_value
854
- model.vision.transformer.layers.18.attention.dense
855
- model.vision.transformer.layers.18.attention.output_dropout
856
- model.vision.transformer.layers.18.mlp
857
- model.vision.transformer.layers.18.mlp.activation_fn
858
- model.vision.transformer.layers.18.mlp.fc1
859
- model.vision.transformer.layers.18.mlp.fc2
860
- model.vision.transformer.layers.18.post_attention_layernorm
861
- model.vision.transformer.layers.19
862
- model.vision.transformer.layers.19.input_layernorm
863
- model.vision.transformer.layers.19.attention
864
- model.vision.transformer.layers.19.attention.query_key_value
865
- model.vision.transformer.layers.19.attention.dense
866
- model.vision.transformer.layers.19.attention.output_dropout
867
- model.vision.transformer.layers.19.mlp
868
- model.vision.transformer.layers.19.mlp.activation_fn
869
- model.vision.transformer.layers.19.mlp.fc1
870
- model.vision.transformer.layers.19.mlp.fc2
871
- model.vision.transformer.layers.19.post_attention_layernorm
872
- model.vision.transformer.layers.20
873
- model.vision.transformer.layers.20.input_layernorm
874
- model.vision.transformer.layers.20.attention
875
- model.vision.transformer.layers.20.attention.query_key_value
876
- model.vision.transformer.layers.20.attention.dense
877
- model.vision.transformer.layers.20.attention.output_dropout
878
- model.vision.transformer.layers.20.mlp
879
- model.vision.transformer.layers.20.mlp.activation_fn
880
- model.vision.transformer.layers.20.mlp.fc1
881
- model.vision.transformer.layers.20.mlp.fc2
882
- model.vision.transformer.layers.20.post_attention_layernorm
883
- model.vision.transformer.layers.21
884
- model.vision.transformer.layers.21.input_layernorm
885
- model.vision.transformer.layers.21.attention
886
- model.vision.transformer.layers.21.attention.query_key_value
887
- model.vision.transformer.layers.21.attention.dense
888
- model.vision.transformer.layers.21.attention.output_dropout
889
- model.vision.transformer.layers.21.mlp
890
- model.vision.transformer.layers.21.mlp.activation_fn
891
- model.vision.transformer.layers.21.mlp.fc1
892
- model.vision.transformer.layers.21.mlp.fc2
893
- model.vision.transformer.layers.21.post_attention_layernorm
894
- model.vision.transformer.layers.22
895
- model.vision.transformer.layers.22.input_layernorm
896
- model.vision.transformer.layers.22.attention
897
- model.vision.transformer.layers.22.attention.query_key_value
898
- model.vision.transformer.layers.22.attention.dense
899
- model.vision.transformer.layers.22.attention.output_dropout
900
- model.vision.transformer.layers.22.mlp
901
- model.vision.transformer.layers.22.mlp.activation_fn
902
- model.vision.transformer.layers.22.mlp.fc1
903
- model.vision.transformer.layers.22.mlp.fc2
904
- model.vision.transformer.layers.22.post_attention_layernorm
905
- model.vision.transformer.layers.23
906
- model.vision.transformer.layers.23.input_layernorm
907
- model.vision.transformer.layers.23.attention
908
- model.vision.transformer.layers.23.attention.query_key_value
909
- model.vision.transformer.layers.23.attention.dense
910
- model.vision.transformer.layers.23.attention.output_dropout
911
- model.vision.transformer.layers.23.mlp
912
- model.vision.transformer.layers.23.mlp.activation_fn
913
- model.vision.transformer.layers.23.mlp.fc1
914
- model.vision.transformer.layers.23.mlp.fc2
915
- model.vision.transformer.layers.23.post_attention_layernorm
916
- model.vision.transformer.layers.24
917
- model.vision.transformer.layers.24.input_layernorm
918
- model.vision.transformer.layers.24.attention
919
- model.vision.transformer.layers.24.attention.query_key_value
920
- model.vision.transformer.layers.24.attention.dense
921
- model.vision.transformer.layers.24.attention.output_dropout
922
- model.vision.transformer.layers.24.mlp
923
- model.vision.transformer.layers.24.mlp.activation_fn
924
- model.vision.transformer.layers.24.mlp.fc1
925
- model.vision.transformer.layers.24.mlp.fc2
926
- model.vision.transformer.layers.24.post_attention_layernorm
927
- model.vision.transformer.layers.25
928
- model.vision.transformer.layers.25.input_layernorm
929
- model.vision.transformer.layers.25.attention
930
- model.vision.transformer.layers.25.attention.query_key_value
931
- model.vision.transformer.layers.25.attention.dense
932
- model.vision.transformer.layers.25.attention.output_dropout
933
- model.vision.transformer.layers.25.mlp
934
- model.vision.transformer.layers.25.mlp.activation_fn
935
- model.vision.transformer.layers.25.mlp.fc1
936
- model.vision.transformer.layers.25.mlp.fc2
937
- model.vision.transformer.layers.25.post_attention_layernorm
938
- model.vision.transformer.layers.26
939
- model.vision.transformer.layers.26.input_layernorm
940
- model.vision.transformer.layers.26.attention
941
- model.vision.transformer.layers.26.attention.query_key_value
942
- model.vision.transformer.layers.26.attention.dense
943
- model.vision.transformer.layers.26.attention.output_dropout
944
- model.vision.transformer.layers.26.mlp
945
- model.vision.transformer.layers.26.mlp.activation_fn
946
- model.vision.transformer.layers.26.mlp.fc1
947
- model.vision.transformer.layers.26.mlp.fc2
948
- model.vision.transformer.layers.26.post_attention_layernorm
949
- model.vision.transformer.layers.27
950
- model.vision.transformer.layers.27.input_layernorm
951
- model.vision.transformer.layers.27.attention
952
- model.vision.transformer.layers.27.attention.query_key_value
953
- model.vision.transformer.layers.27.attention.dense
954
- model.vision.transformer.layers.27.attention.output_dropout
955
- model.vision.transformer.layers.27.mlp
956
- model.vision.transformer.layers.27.mlp.activation_fn
957
- model.vision.transformer.layers.27.mlp.fc1
958
- model.vision.transformer.layers.27.mlp.fc2
959
- model.vision.transformer.layers.27.post_attention_layernorm
960
- model.vision.transformer.layers.28
961
- model.vision.transformer.layers.28.input_layernorm
962
- model.vision.transformer.layers.28.attention
963
- model.vision.transformer.layers.28.attention.query_key_value
964
- model.vision.transformer.layers.28.attention.dense
965
- model.vision.transformer.layers.28.attention.output_dropout
966
- model.vision.transformer.layers.28.mlp
967
- model.vision.transformer.layers.28.mlp.activation_fn
968
- model.vision.transformer.layers.28.mlp.fc1
969
- model.vision.transformer.layers.28.mlp.fc2
970
- model.vision.transformer.layers.28.post_attention_layernorm
971
- model.vision.transformer.layers.29
972
- model.vision.transformer.layers.29.input_layernorm
973
- model.vision.transformer.layers.29.attention
974
- model.vision.transformer.layers.29.attention.query_key_value
975
- model.vision.transformer.layers.29.attention.dense
976
- model.vision.transformer.layers.29.attention.output_dropout
977
- model.vision.transformer.layers.29.mlp
978
- model.vision.transformer.layers.29.mlp.activation_fn
979
- model.vision.transformer.layers.29.mlp.fc1
980
- model.vision.transformer.layers.29.mlp.fc2
981
- model.vision.transformer.layers.29.post_attention_layernorm
982
- model.vision.transformer.layers.30
983
- model.vision.transformer.layers.30.input_layernorm
984
- model.vision.transformer.layers.30.attention
985
- model.vision.transformer.layers.30.attention.query_key_value
986
- model.vision.transformer.layers.30.attention.dense
987
- model.vision.transformer.layers.30.attention.output_dropout
988
- model.vision.transformer.layers.30.mlp
989
- model.vision.transformer.layers.30.mlp.activation_fn
990
- model.vision.transformer.layers.30.mlp.fc1
991
- model.vision.transformer.layers.30.mlp.fc2
992
- model.vision.transformer.layers.30.post_attention_layernorm
993
- model.vision.transformer.layers.31
994
- model.vision.transformer.layers.31.input_layernorm
995
- model.vision.transformer.layers.31.attention
996
- model.vision.transformer.layers.31.attention.query_key_value
997
- model.vision.transformer.layers.31.attention.dense
998
- model.vision.transformer.layers.31.attention.output_dropout
999
- model.vision.transformer.layers.31.mlp
1000
- model.vision.transformer.layers.31.mlp.activation_fn
1001
- model.vision.transformer.layers.31.mlp.fc1
1002
- model.vision.transformer.layers.31.mlp.fc2
1003
- model.vision.transformer.layers.31.post_attention_layernorm
1004
- model.vision.transformer.layers.32
1005
- model.vision.transformer.layers.32.input_layernorm
1006
- model.vision.transformer.layers.32.attention
1007
- model.vision.transformer.layers.32.attention.query_key_value
1008
- model.vision.transformer.layers.32.attention.dense
1009
- model.vision.transformer.layers.32.attention.output_dropout
1010
- model.vision.transformer.layers.32.mlp
1011
- model.vision.transformer.layers.32.mlp.activation_fn
1012
- model.vision.transformer.layers.32.mlp.fc1
1013
- model.vision.transformer.layers.32.mlp.fc2
1014
- model.vision.transformer.layers.32.post_attention_layernorm
1015
- model.vision.transformer.layers.33
1016
- model.vision.transformer.layers.33.input_layernorm
1017
- model.vision.transformer.layers.33.attention
1018
- model.vision.transformer.layers.33.attention.query_key_value
1019
- model.vision.transformer.layers.33.attention.dense
1020
- model.vision.transformer.layers.33.attention.output_dropout
1021
- model.vision.transformer.layers.33.mlp
1022
- model.vision.transformer.layers.33.mlp.activation_fn
1023
- model.vision.transformer.layers.33.mlp.fc1
1024
- model.vision.transformer.layers.33.mlp.fc2
1025
- model.vision.transformer.layers.33.post_attention_layernorm
1026
- model.vision.transformer.layers.34
1027
- model.vision.transformer.layers.34.input_layernorm
1028
- model.vision.transformer.layers.34.attention
1029
- model.vision.transformer.layers.34.attention.query_key_value
1030
- model.vision.transformer.layers.34.attention.dense
1031
- model.vision.transformer.layers.34.attention.output_dropout
1032
- model.vision.transformer.layers.34.mlp
1033
- model.vision.transformer.layers.34.mlp.activation_fn
1034
- model.vision.transformer.layers.34.mlp.fc1
1035
- model.vision.transformer.layers.34.mlp.fc2
1036
- model.vision.transformer.layers.34.post_attention_layernorm
1037
- model.vision.transformer.layers.35
1038
- model.vision.transformer.layers.35.input_layernorm
1039
- model.vision.transformer.layers.35.attention
1040
- model.vision.transformer.layers.35.attention.query_key_value
1041
- model.vision.transformer.layers.35.attention.dense
1042
- model.vision.transformer.layers.35.attention.output_dropout
1043
- model.vision.transformer.layers.35.mlp
1044
- model.vision.transformer.layers.35.mlp.activation_fn
1045
- model.vision.transformer.layers.35.mlp.fc1
1046
- model.vision.transformer.layers.35.mlp.fc2
1047
- model.vision.transformer.layers.35.post_attention_layernorm
1048
- model.vision.transformer.layers.36
1049
- model.vision.transformer.layers.36.input_layernorm
1050
- model.vision.transformer.layers.36.attention
1051
- model.vision.transformer.layers.36.attention.query_key_value
1052
- model.vision.transformer.layers.36.attention.dense
1053
- model.vision.transformer.layers.36.attention.output_dropout
1054
- model.vision.transformer.layers.36.mlp
1055
- model.vision.transformer.layers.36.mlp.activation_fn
1056
- model.vision.transformer.layers.36.mlp.fc1
1057
- model.vision.transformer.layers.36.mlp.fc2
1058
- model.vision.transformer.layers.36.post_attention_layernorm
1059
- model.vision.transformer.layers.37
1060
- model.vision.transformer.layers.37.input_layernorm
1061
- model.vision.transformer.layers.37.attention
1062
- model.vision.transformer.layers.37.attention.query_key_value
1063
- model.vision.transformer.layers.37.attention.dense
1064
- model.vision.transformer.layers.37.attention.output_dropout
1065
- model.vision.transformer.layers.37.mlp
1066
- model.vision.transformer.layers.37.mlp.activation_fn
1067
- model.vision.transformer.layers.37.mlp.fc1
1068
- model.vision.transformer.layers.37.mlp.fc2
1069
- model.vision.transformer.layers.37.post_attention_layernorm
1070
- model.vision.transformer.layers.38
1071
- model.vision.transformer.layers.38.input_layernorm
1072
- model.vision.transformer.layers.38.attention
1073
- model.vision.transformer.layers.38.attention.query_key_value
1074
- model.vision.transformer.layers.38.attention.dense
1075
- model.vision.transformer.layers.38.attention.output_dropout
1076
- model.vision.transformer.layers.38.mlp
1077
- model.vision.transformer.layers.38.mlp.activation_fn
1078
- model.vision.transformer.layers.38.mlp.fc1
1079
- model.vision.transformer.layers.38.mlp.fc2
1080
- model.vision.transformer.layers.38.post_attention_layernorm
1081
- model.vision.transformer.layers.39
1082
- model.vision.transformer.layers.39.input_layernorm
1083
- model.vision.transformer.layers.39.attention
1084
- model.vision.transformer.layers.39.attention.query_key_value
1085
- model.vision.transformer.layers.39.attention.dense
1086
- model.vision.transformer.layers.39.attention.output_dropout
1087
- model.vision.transformer.layers.39.mlp
1088
- model.vision.transformer.layers.39.mlp.activation_fn
1089
- model.vision.transformer.layers.39.mlp.fc1
1090
- model.vision.transformer.layers.39.mlp.fc2
1091
- model.vision.transformer.layers.39.post_attention_layernorm
1092
- model.vision.transformer.layers.40
1093
- model.vision.transformer.layers.40.input_layernorm
1094
- model.vision.transformer.layers.40.attention
1095
- model.vision.transformer.layers.40.attention.query_key_value
1096
- model.vision.transformer.layers.40.attention.dense
1097
- model.vision.transformer.layers.40.attention.output_dropout
1098
- model.vision.transformer.layers.40.mlp
1099
- model.vision.transformer.layers.40.mlp.activation_fn
1100
- model.vision.transformer.layers.40.mlp.fc1
1101
- model.vision.transformer.layers.40.mlp.fc2
1102
- model.vision.transformer.layers.40.post_attention_layernorm
1103
- model.vision.transformer.layers.41
1104
- model.vision.transformer.layers.41.input_layernorm
1105
- model.vision.transformer.layers.41.attention
1106
- model.vision.transformer.layers.41.attention.query_key_value
1107
- model.vision.transformer.layers.41.attention.dense
1108
- model.vision.transformer.layers.41.attention.output_dropout
1109
- model.vision.transformer.layers.41.mlp
1110
- model.vision.transformer.layers.41.mlp.activation_fn
1111
- model.vision.transformer.layers.41.mlp.fc1
1112
- model.vision.transformer.layers.41.mlp.fc2
1113
- model.vision.transformer.layers.41.post_attention_layernorm
1114
- model.vision.transformer.layers.42
1115
- model.vision.transformer.layers.42.input_layernorm
1116
- model.vision.transformer.layers.42.attention
1117
- model.vision.transformer.layers.42.attention.query_key_value
1118
- model.vision.transformer.layers.42.attention.dense
1119
- model.vision.transformer.layers.42.attention.output_dropout
1120
- model.vision.transformer.layers.42.mlp
1121
- model.vision.transformer.layers.42.mlp.activation_fn
1122
- model.vision.transformer.layers.42.mlp.fc1
1123
- model.vision.transformer.layers.42.mlp.fc2
1124
- model.vision.transformer.layers.42.post_attention_layernorm
1125
- model.vision.transformer.layers.43
1126
- model.vision.transformer.layers.43.input_layernorm
1127
- model.vision.transformer.layers.43.attention
1128
- model.vision.transformer.layers.43.attention.query_key_value
1129
- model.vision.transformer.layers.43.attention.dense
1130
- model.vision.transformer.layers.43.attention.output_dropout
1131
- model.vision.transformer.layers.43.mlp
1132
- model.vision.transformer.layers.43.mlp.activation_fn
1133
- model.vision.transformer.layers.43.mlp.fc1
1134
- model.vision.transformer.layers.43.mlp.fc2
1135
- model.vision.transformer.layers.43.post_attention_layernorm
1136
- model.vision.transformer.layers.44
1137
- model.vision.transformer.layers.44.input_layernorm
1138
- model.vision.transformer.layers.44.attention
1139
- model.vision.transformer.layers.44.attention.query_key_value
1140
- model.vision.transformer.layers.44.attention.dense
1141
- model.vision.transformer.layers.44.attention.output_dropout
1142
- model.vision.transformer.layers.44.mlp
1143
- model.vision.transformer.layers.44.mlp.activation_fn
1144
- model.vision.transformer.layers.44.mlp.fc1
1145
- model.vision.transformer.layers.44.mlp.fc2
1146
- model.vision.transformer.layers.44.post_attention_layernorm
1147
- model.vision.transformer.layers.45
1148
- model.vision.transformer.layers.45.input_layernorm
1149
- model.vision.transformer.layers.45.attention
1150
- model.vision.transformer.layers.45.attention.query_key_value
1151
- model.vision.transformer.layers.45.attention.dense
1152
- model.vision.transformer.layers.45.attention.output_dropout
1153
- model.vision.transformer.layers.45.mlp
1154
- model.vision.transformer.layers.45.mlp.activation_fn
1155
- model.vision.transformer.layers.45.mlp.fc1
1156
- model.vision.transformer.layers.45.mlp.fc2
1157
- model.vision.transformer.layers.45.post_attention_layernorm
1158
- model.vision.transformer.layers.46
1159
- model.vision.transformer.layers.46.input_layernorm
1160
- model.vision.transformer.layers.46.attention
1161
- model.vision.transformer.layers.46.attention.query_key_value
1162
- model.vision.transformer.layers.46.attention.dense
1163
- model.vision.transformer.layers.46.attention.output_dropout
1164
- model.vision.transformer.layers.46.mlp
1165
- model.vision.transformer.layers.46.mlp.activation_fn
1166
- model.vision.transformer.layers.46.mlp.fc1
1167
- model.vision.transformer.layers.46.mlp.fc2
1168
- model.vision.transformer.layers.46.post_attention_layernorm
1169
- model.vision.transformer.layers.47
1170
- model.vision.transformer.layers.47.input_layernorm
1171
- model.vision.transformer.layers.47.attention
1172
- model.vision.transformer.layers.47.attention.query_key_value
1173
- model.vision.transformer.layers.47.attention.dense
1174
- model.vision.transformer.layers.47.attention.output_dropout
1175
- model.vision.transformer.layers.47.mlp
1176
- model.vision.transformer.layers.47.mlp.activation_fn
1177
- model.vision.transformer.layers.47.mlp.fc1
1178
- model.vision.transformer.layers.47.mlp.fc2
1179
- model.vision.transformer.layers.47.post_attention_layernorm
1180
- model.vision.transformer.layers.48
1181
- model.vision.transformer.layers.48.input_layernorm
1182
- model.vision.transformer.layers.48.attention
1183
- model.vision.transformer.layers.48.attention.query_key_value
1184
- model.vision.transformer.layers.48.attention.dense
1185
- model.vision.transformer.layers.48.attention.output_dropout
1186
- model.vision.transformer.layers.48.mlp
1187
- model.vision.transformer.layers.48.mlp.activation_fn
1188
- model.vision.transformer.layers.48.mlp.fc1
1189
- model.vision.transformer.layers.48.mlp.fc2
1190
- model.vision.transformer.layers.48.post_attention_layernorm
1191
- model.vision.transformer.layers.49
1192
- model.vision.transformer.layers.49.input_layernorm
1193
- model.vision.transformer.layers.49.attention
1194
- model.vision.transformer.layers.49.attention.query_key_value
1195
- model.vision.transformer.layers.49.attention.dense
1196
- model.vision.transformer.layers.49.attention.output_dropout
1197
- model.vision.transformer.layers.49.mlp
1198
- model.vision.transformer.layers.49.mlp.activation_fn
1199
- model.vision.transformer.layers.49.mlp.fc1
1200
- model.vision.transformer.layers.49.mlp.fc2
1201
- model.vision.transformer.layers.49.post_attention_layernorm
1202
- model.vision.transformer.layers.50
1203
- model.vision.transformer.layers.50.input_layernorm
1204
- model.vision.transformer.layers.50.attention
1205
- model.vision.transformer.layers.50.attention.query_key_value
1206
- model.vision.transformer.layers.50.attention.dense
1207
- model.vision.transformer.layers.50.attention.output_dropout
1208
- model.vision.transformer.layers.50.mlp
1209
- model.vision.transformer.layers.50.mlp.activation_fn
1210
- model.vision.transformer.layers.50.mlp.fc1
1211
- model.vision.transformer.layers.50.mlp.fc2
1212
- model.vision.transformer.layers.50.post_attention_layernorm
1213
- model.vision.transformer.layers.51
1214
- model.vision.transformer.layers.51.input_layernorm
1215
- model.vision.transformer.layers.51.attention
1216
- model.vision.transformer.layers.51.attention.query_key_value
1217
- model.vision.transformer.layers.51.attention.dense
1218
- model.vision.transformer.layers.51.attention.output_dropout
1219
- model.vision.transformer.layers.51.mlp
1220
- model.vision.transformer.layers.51.mlp.activation_fn
1221
- model.vision.transformer.layers.51.mlp.fc1
1222
- model.vision.transformer.layers.51.mlp.fc2
1223
- model.vision.transformer.layers.51.post_attention_layernorm
1224
- model.vision.transformer.layers.52
1225
- model.vision.transformer.layers.52.input_layernorm
1226
- model.vision.transformer.layers.52.attention
1227
- model.vision.transformer.layers.52.attention.query_key_value
1228
- model.vision.transformer.layers.52.attention.dense
1229
- model.vision.transformer.layers.52.attention.output_dropout
1230
- model.vision.transformer.layers.52.mlp
1231
- model.vision.transformer.layers.52.mlp.activation_fn
1232
- model.vision.transformer.layers.52.mlp.fc1
1233
- model.vision.transformer.layers.52.mlp.fc2
1234
- model.vision.transformer.layers.52.post_attention_layernorm
1235
- model.vision.transformer.layers.53
1236
- model.vision.transformer.layers.53.input_layernorm
1237
- model.vision.transformer.layers.53.attention
1238
- model.vision.transformer.layers.53.attention.query_key_value
1239
- model.vision.transformer.layers.53.attention.dense
1240
- model.vision.transformer.layers.53.attention.output_dropout
1241
- model.vision.transformer.layers.53.mlp
1242
- model.vision.transformer.layers.53.mlp.activation_fn
1243
- model.vision.transformer.layers.53.mlp.fc1
1244
- model.vision.transformer.layers.53.mlp.fc2
1245
- model.vision.transformer.layers.53.post_attention_layernorm
1246
- model.vision.transformer.layers.54
1247
- model.vision.transformer.layers.54.input_layernorm
1248
- model.vision.transformer.layers.54.attention
1249
- model.vision.transformer.layers.54.attention.query_key_value
1250
- model.vision.transformer.layers.54.attention.dense
1251
- model.vision.transformer.layers.54.attention.output_dropout
1252
- model.vision.transformer.layers.54.mlp
1253
- model.vision.transformer.layers.54.mlp.activation_fn
1254
- model.vision.transformer.layers.54.mlp.fc1
1255
- model.vision.transformer.layers.54.mlp.fc2
1256
- model.vision.transformer.layers.54.post_attention_layernorm
1257
- model.vision.transformer.layers.55
1258
- model.vision.transformer.layers.55.input_layernorm
1259
- model.vision.transformer.layers.55.attention
1260
- model.vision.transformer.layers.55.attention.query_key_value
1261
- model.vision.transformer.layers.55.attention.dense
1262
- model.vision.transformer.layers.55.attention.output_dropout
1263
- model.vision.transformer.layers.55.mlp
1264
- model.vision.transformer.layers.55.mlp.activation_fn
1265
- model.vision.transformer.layers.55.mlp.fc1
1266
- model.vision.transformer.layers.55.mlp.fc2
1267
- model.vision.transformer.layers.55.post_attention_layernorm
1268
- model.vision.transformer.layers.56
1269
- model.vision.transformer.layers.56.input_layernorm
1270
- model.vision.transformer.layers.56.attention
1271
- model.vision.transformer.layers.56.attention.query_key_value
1272
- model.vision.transformer.layers.56.attention.dense
1273
- model.vision.transformer.layers.56.attention.output_dropout
1274
- model.vision.transformer.layers.56.mlp
1275
- model.vision.transformer.layers.56.mlp.activation_fn
1276
- model.vision.transformer.layers.56.mlp.fc1
1277
- model.vision.transformer.layers.56.mlp.fc2
1278
- model.vision.transformer.layers.56.post_attention_layernorm
1279
- model.vision.transformer.layers.57
1280
- model.vision.transformer.layers.57.input_layernorm
1281
- model.vision.transformer.layers.57.attention
1282
- model.vision.transformer.layers.57.attention.query_key_value
1283
- model.vision.transformer.layers.57.attention.dense
1284
- model.vision.transformer.layers.57.attention.output_dropout
1285
- model.vision.transformer.layers.57.mlp
1286
- model.vision.transformer.layers.57.mlp.activation_fn
1287
- model.vision.transformer.layers.57.mlp.fc1
1288
- model.vision.transformer.layers.57.mlp.fc2
1289
- model.vision.transformer.layers.57.post_attention_layernorm
1290
- model.vision.transformer.layers.58
1291
- model.vision.transformer.layers.58.input_layernorm
1292
- model.vision.transformer.layers.58.attention
1293
- model.vision.transformer.layers.58.attention.query_key_value
1294
- model.vision.transformer.layers.58.attention.dense
1295
- model.vision.transformer.layers.58.attention.output_dropout
1296
- model.vision.transformer.layers.58.mlp
1297
- model.vision.transformer.layers.58.mlp.activation_fn
1298
- model.vision.transformer.layers.58.mlp.fc1
1299
- model.vision.transformer.layers.58.mlp.fc2
1300
- model.vision.transformer.layers.58.post_attention_layernorm
1301
- model.vision.transformer.layers.59
1302
- model.vision.transformer.layers.59.input_layernorm
1303
- model.vision.transformer.layers.59.attention
1304
- model.vision.transformer.layers.59.attention.query_key_value
1305
- model.vision.transformer.layers.59.attention.dense
1306
- model.vision.transformer.layers.59.attention.output_dropout
1307
- model.vision.transformer.layers.59.mlp
1308
- model.vision.transformer.layers.59.mlp.activation_fn
1309
- model.vision.transformer.layers.59.mlp.fc1
1310
- model.vision.transformer.layers.59.mlp.fc2
1311
- model.vision.transformer.layers.59.post_attention_layernorm
1312
- model.vision.transformer.layers.60
1313
- model.vision.transformer.layers.60.input_layernorm
1314
- model.vision.transformer.layers.60.attention
1315
- model.vision.transformer.layers.60.attention.query_key_value
1316
- model.vision.transformer.layers.60.attention.dense
1317
- model.vision.transformer.layers.60.attention.output_dropout
1318
- model.vision.transformer.layers.60.mlp
1319
- model.vision.transformer.layers.60.mlp.activation_fn
1320
- model.vision.transformer.layers.60.mlp.fc1
1321
- model.vision.transformer.layers.60.mlp.fc2
1322
- model.vision.transformer.layers.60.post_attention_layernorm
1323
- model.vision.transformer.layers.61
1324
- model.vision.transformer.layers.61.input_layernorm
1325
- model.vision.transformer.layers.61.attention
1326
- model.vision.transformer.layers.61.attention.query_key_value
1327
- model.vision.transformer.layers.61.attention.dense
1328
- model.vision.transformer.layers.61.attention.output_dropout
1329
- model.vision.transformer.layers.61.mlp
1330
- model.vision.transformer.layers.61.mlp.activation_fn
1331
- model.vision.transformer.layers.61.mlp.fc1
1332
- model.vision.transformer.layers.61.mlp.fc2
1333
- model.vision.transformer.layers.61.post_attention_layernorm
1334
- model.vision.transformer.layers.62
1335
- model.vision.transformer.layers.62.input_layernorm
1336
- model.vision.transformer.layers.62.attention
1337
- model.vision.transformer.layers.62.attention.query_key_value
1338
- model.vision.transformer.layers.62.attention.dense
1339
- model.vision.transformer.layers.62.attention.output_dropout
1340
- model.vision.transformer.layers.62.mlp
1341
- model.vision.transformer.layers.62.mlp.activation_fn
1342
- model.vision.transformer.layers.62.mlp.fc1
1343
- model.vision.transformer.layers.62.mlp.fc2
1344
- model.vision.transformer.layers.62.post_attention_layernorm
1345
- model.vision.linear_proj
1346
- model.vision.linear_proj.linear_proj
1347
- model.vision.linear_proj.norm1
1348
- model.vision.linear_proj.act1
1349
- model.vision.linear_proj.dense_h_to_4h
1350
- model.vision.linear_proj.gate_proj
1351
- model.vision.linear_proj.dense_4h_to_h
1352
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/allenai/Molmo-7B-D-0924.txt DELETED
@@ -1,606 +0,0 @@
1
-
2
- model
3
- model.transformer
4
- model.transformer.wte
5
- model.transformer.emb_drop
6
- model.transformer.ln_f
7
- model.transformer.blocks
8
- model.transformer.blocks.0
9
- model.transformer.blocks.0.dropout
10
- model.transformer.blocks.0.act
11
- model.transformer.blocks.0.attn_out
12
- model.transformer.blocks.0.ff_out
13
- model.transformer.blocks.0.rotary_emb
14
- model.transformer.blocks.0.attn_norm
15
- model.transformer.blocks.0.ff_norm
16
- model.transformer.blocks.0.att_proj
17
- model.transformer.blocks.0.ff_proj
18
- model.transformer.blocks.1
19
- model.transformer.blocks.1.dropout
20
- model.transformer.blocks.1.act
21
- model.transformer.blocks.1.attn_out
22
- model.transformer.blocks.1.ff_out
23
- model.transformer.blocks.1.rotary_emb
24
- model.transformer.blocks.1.attn_norm
25
- model.transformer.blocks.1.ff_norm
26
- model.transformer.blocks.1.att_proj
27
- model.transformer.blocks.1.ff_proj
28
- model.transformer.blocks.2
29
- model.transformer.blocks.2.dropout
30
- model.transformer.blocks.2.act
31
- model.transformer.blocks.2.attn_out
32
- model.transformer.blocks.2.ff_out
33
- model.transformer.blocks.2.rotary_emb
34
- model.transformer.blocks.2.attn_norm
35
- model.transformer.blocks.2.ff_norm
36
- model.transformer.blocks.2.att_proj
37
- model.transformer.blocks.2.ff_proj
38
- model.transformer.blocks.3
39
- model.transformer.blocks.3.dropout
40
- model.transformer.blocks.3.act
41
- model.transformer.blocks.3.attn_out
42
- model.transformer.blocks.3.ff_out
43
- model.transformer.blocks.3.rotary_emb
44
- model.transformer.blocks.3.attn_norm
45
- model.transformer.blocks.3.ff_norm
46
- model.transformer.blocks.3.att_proj
47
- model.transformer.blocks.3.ff_proj
48
- model.transformer.blocks.4
49
- model.transformer.blocks.4.dropout
50
- model.transformer.blocks.4.act
51
- model.transformer.blocks.4.attn_out
52
- model.transformer.blocks.4.ff_out
53
- model.transformer.blocks.4.rotary_emb
54
- model.transformer.blocks.4.attn_norm
55
- model.transformer.blocks.4.ff_norm
56
- model.transformer.blocks.4.att_proj
57
- model.transformer.blocks.4.ff_proj
58
- model.transformer.blocks.5
59
- model.transformer.blocks.5.dropout
60
- model.transformer.blocks.5.act
61
- model.transformer.blocks.5.attn_out
62
- model.transformer.blocks.5.ff_out
63
- model.transformer.blocks.5.rotary_emb
64
- model.transformer.blocks.5.attn_norm
65
- model.transformer.blocks.5.ff_norm
66
- model.transformer.blocks.5.att_proj
67
- model.transformer.blocks.5.ff_proj
68
- model.transformer.blocks.6
69
- model.transformer.blocks.6.dropout
70
- model.transformer.blocks.6.act
71
- model.transformer.blocks.6.attn_out
72
- model.transformer.blocks.6.ff_out
73
- model.transformer.blocks.6.rotary_emb
74
- model.transformer.blocks.6.attn_norm
75
- model.transformer.blocks.6.ff_norm
76
- model.transformer.blocks.6.att_proj
77
- model.transformer.blocks.6.ff_proj
78
- model.transformer.blocks.7
79
- model.transformer.blocks.7.dropout
80
- model.transformer.blocks.7.act
81
- model.transformer.blocks.7.attn_out
82
- model.transformer.blocks.7.ff_out
83
- model.transformer.blocks.7.rotary_emb
84
- model.transformer.blocks.7.attn_norm
85
- model.transformer.blocks.7.ff_norm
86
- model.transformer.blocks.7.att_proj
87
- model.transformer.blocks.7.ff_proj
88
- model.transformer.blocks.8
89
- model.transformer.blocks.8.dropout
90
- model.transformer.blocks.8.act
91
- model.transformer.blocks.8.attn_out
92
- model.transformer.blocks.8.ff_out
93
- model.transformer.blocks.8.rotary_emb
94
- model.transformer.blocks.8.attn_norm
95
- model.transformer.blocks.8.ff_norm
96
- model.transformer.blocks.8.att_proj
97
- model.transformer.blocks.8.ff_proj
98
- model.transformer.blocks.9
99
- model.transformer.blocks.9.dropout
100
- model.transformer.blocks.9.act
101
- model.transformer.blocks.9.attn_out
102
- model.transformer.blocks.9.ff_out
103
- model.transformer.blocks.9.rotary_emb
104
- model.transformer.blocks.9.attn_norm
105
- model.transformer.blocks.9.ff_norm
106
- model.transformer.blocks.9.att_proj
107
- model.transformer.blocks.9.ff_proj
108
- model.transformer.blocks.10
109
- model.transformer.blocks.10.dropout
110
- model.transformer.blocks.10.act
111
- model.transformer.blocks.10.attn_out
112
- model.transformer.blocks.10.ff_out
113
- model.transformer.blocks.10.rotary_emb
114
- model.transformer.blocks.10.attn_norm
115
- model.transformer.blocks.10.ff_norm
116
- model.transformer.blocks.10.att_proj
117
- model.transformer.blocks.10.ff_proj
118
- model.transformer.blocks.11
119
- model.transformer.blocks.11.dropout
120
- model.transformer.blocks.11.act
121
- model.transformer.blocks.11.attn_out
122
- model.transformer.blocks.11.ff_out
123
- model.transformer.blocks.11.rotary_emb
124
- model.transformer.blocks.11.attn_norm
125
- model.transformer.blocks.11.ff_norm
126
- model.transformer.blocks.11.att_proj
127
- model.transformer.blocks.11.ff_proj
128
- model.transformer.blocks.12
129
- model.transformer.blocks.12.dropout
130
- model.transformer.blocks.12.act
131
- model.transformer.blocks.12.attn_out
132
- model.transformer.blocks.12.ff_out
133
- model.transformer.blocks.12.rotary_emb
134
- model.transformer.blocks.12.attn_norm
135
- model.transformer.blocks.12.ff_norm
136
- model.transformer.blocks.12.att_proj
137
- model.transformer.blocks.12.ff_proj
138
- model.transformer.blocks.13
139
- model.transformer.blocks.13.dropout
140
- model.transformer.blocks.13.act
141
- model.transformer.blocks.13.attn_out
142
- model.transformer.blocks.13.ff_out
143
- model.transformer.blocks.13.rotary_emb
144
- model.transformer.blocks.13.attn_norm
145
- model.transformer.blocks.13.ff_norm
146
- model.transformer.blocks.13.att_proj
147
- model.transformer.blocks.13.ff_proj
148
- model.transformer.blocks.14
149
- model.transformer.blocks.14.dropout
150
- model.transformer.blocks.14.act
151
- model.transformer.blocks.14.attn_out
152
- model.transformer.blocks.14.ff_out
153
- model.transformer.blocks.14.rotary_emb
154
- model.transformer.blocks.14.attn_norm
155
- model.transformer.blocks.14.ff_norm
156
- model.transformer.blocks.14.att_proj
157
- model.transformer.blocks.14.ff_proj
158
- model.transformer.blocks.15
159
- model.transformer.blocks.15.dropout
160
- model.transformer.blocks.15.act
161
- model.transformer.blocks.15.attn_out
162
- model.transformer.blocks.15.ff_out
163
- model.transformer.blocks.15.rotary_emb
164
- model.transformer.blocks.15.attn_norm
165
- model.transformer.blocks.15.ff_norm
166
- model.transformer.blocks.15.att_proj
167
- model.transformer.blocks.15.ff_proj
168
- model.transformer.blocks.16
169
- model.transformer.blocks.16.dropout
170
- model.transformer.blocks.16.act
171
- model.transformer.blocks.16.attn_out
172
- model.transformer.blocks.16.ff_out
173
- model.transformer.blocks.16.rotary_emb
174
- model.transformer.blocks.16.attn_norm
175
- model.transformer.blocks.16.ff_norm
176
- model.transformer.blocks.16.att_proj
177
- model.transformer.blocks.16.ff_proj
178
- model.transformer.blocks.17
179
- model.transformer.blocks.17.dropout
180
- model.transformer.blocks.17.act
181
- model.transformer.blocks.17.attn_out
182
- model.transformer.blocks.17.ff_out
183
- model.transformer.blocks.17.rotary_emb
184
- model.transformer.blocks.17.attn_norm
185
- model.transformer.blocks.17.ff_norm
186
- model.transformer.blocks.17.att_proj
187
- model.transformer.blocks.17.ff_proj
188
- model.transformer.blocks.18
189
- model.transformer.blocks.18.dropout
190
- model.transformer.blocks.18.act
191
- model.transformer.blocks.18.attn_out
192
- model.transformer.blocks.18.ff_out
193
- model.transformer.blocks.18.rotary_emb
194
- model.transformer.blocks.18.attn_norm
195
- model.transformer.blocks.18.ff_norm
196
- model.transformer.blocks.18.att_proj
197
- model.transformer.blocks.18.ff_proj
198
- model.transformer.blocks.19
199
- model.transformer.blocks.19.dropout
200
- model.transformer.blocks.19.act
201
- model.transformer.blocks.19.attn_out
202
- model.transformer.blocks.19.ff_out
203
- model.transformer.blocks.19.rotary_emb
204
- model.transformer.blocks.19.attn_norm
205
- model.transformer.blocks.19.ff_norm
206
- model.transformer.blocks.19.att_proj
207
- model.transformer.blocks.19.ff_proj
208
- model.transformer.blocks.20
209
- model.transformer.blocks.20.dropout
210
- model.transformer.blocks.20.act
211
- model.transformer.blocks.20.attn_out
212
- model.transformer.blocks.20.ff_out
213
- model.transformer.blocks.20.rotary_emb
214
- model.transformer.blocks.20.attn_norm
215
- model.transformer.blocks.20.ff_norm
216
- model.transformer.blocks.20.att_proj
217
- model.transformer.blocks.20.ff_proj
218
- model.transformer.blocks.21
219
- model.transformer.blocks.21.dropout
220
- model.transformer.blocks.21.act
221
- model.transformer.blocks.21.attn_out
222
- model.transformer.blocks.21.ff_out
223
- model.transformer.blocks.21.rotary_emb
224
- model.transformer.blocks.21.attn_norm
225
- model.transformer.blocks.21.ff_norm
226
- model.transformer.blocks.21.att_proj
227
- model.transformer.blocks.21.ff_proj
228
- model.transformer.blocks.22
229
- model.transformer.blocks.22.dropout
230
- model.transformer.blocks.22.act
231
- model.transformer.blocks.22.attn_out
232
- model.transformer.blocks.22.ff_out
233
- model.transformer.blocks.22.rotary_emb
234
- model.transformer.blocks.22.attn_norm
235
- model.transformer.blocks.22.ff_norm
236
- model.transformer.blocks.22.att_proj
237
- model.transformer.blocks.22.ff_proj
238
- model.transformer.blocks.23
239
- model.transformer.blocks.23.dropout
240
- model.transformer.blocks.23.act
241
- model.transformer.blocks.23.attn_out
242
- model.transformer.blocks.23.ff_out
243
- model.transformer.blocks.23.rotary_emb
244
- model.transformer.blocks.23.attn_norm
245
- model.transformer.blocks.23.ff_norm
246
- model.transformer.blocks.23.att_proj
247
- model.transformer.blocks.23.ff_proj
248
- model.transformer.blocks.24
249
- model.transformer.blocks.24.dropout
250
- model.transformer.blocks.24.act
251
- model.transformer.blocks.24.attn_out
252
- model.transformer.blocks.24.ff_out
253
- model.transformer.blocks.24.rotary_emb
254
- model.transformer.blocks.24.attn_norm
255
- model.transformer.blocks.24.ff_norm
256
- model.transformer.blocks.24.att_proj
257
- model.transformer.blocks.24.ff_proj
258
- model.transformer.blocks.25
259
- model.transformer.blocks.25.dropout
260
- model.transformer.blocks.25.act
261
- model.transformer.blocks.25.attn_out
262
- model.transformer.blocks.25.ff_out
263
- model.transformer.blocks.25.rotary_emb
264
- model.transformer.blocks.25.attn_norm
265
- model.transformer.blocks.25.ff_norm
266
- model.transformer.blocks.25.att_proj
267
- model.transformer.blocks.25.ff_proj
268
- model.transformer.blocks.26
269
- model.transformer.blocks.26.dropout
270
- model.transformer.blocks.26.act
271
- model.transformer.blocks.26.attn_out
272
- model.transformer.blocks.26.ff_out
273
- model.transformer.blocks.26.rotary_emb
274
- model.transformer.blocks.26.attn_norm
275
- model.transformer.blocks.26.ff_norm
276
- model.transformer.blocks.26.att_proj
277
- model.transformer.blocks.26.ff_proj
278
- model.transformer.blocks.27
279
- model.transformer.blocks.27.dropout
280
- model.transformer.blocks.27.act
281
- model.transformer.blocks.27.attn_out
282
- model.transformer.blocks.27.ff_out
283
- model.transformer.blocks.27.rotary_emb
284
- model.transformer.blocks.27.attn_norm
285
- model.transformer.blocks.27.ff_norm
286
- model.transformer.blocks.27.att_proj
287
- model.transformer.blocks.27.ff_proj
288
- model.transformer.ff_out
289
- model.vision_backbone
290
- model.vision_backbone.image_vit
291
- model.vision_backbone.image_vit.patch_embedding
292
- model.vision_backbone.image_vit.pre_ln
293
- model.vision_backbone.image_vit.transformer
294
- model.vision_backbone.image_vit.transformer.resblocks
295
- model.vision_backbone.image_vit.transformer.resblocks.0
296
- model.vision_backbone.image_vit.transformer.resblocks.0.attention
297
- model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq
298
- model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk
299
- model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv
300
- model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo
301
- model.vision_backbone.image_vit.transformer.resblocks.0.attention.residual_dropout
302
- model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward
303
- model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1
304
- model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.act
305
- model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2
306
- model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm
307
- model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm
308
- model.vision_backbone.image_vit.transformer.resblocks.1
309
- model.vision_backbone.image_vit.transformer.resblocks.1.attention
310
- model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq
311
- model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk
312
- model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv
313
- model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo
314
- model.vision_backbone.image_vit.transformer.resblocks.1.attention.residual_dropout
315
- model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward
316
- model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1
317
- model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.act
318
- model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2
319
- model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm
320
- model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm
321
- model.vision_backbone.image_vit.transformer.resblocks.2
322
- model.vision_backbone.image_vit.transformer.resblocks.2.attention
323
- model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq
324
- model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk
325
- model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv
326
- model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo
327
- model.vision_backbone.image_vit.transformer.resblocks.2.attention.residual_dropout
328
- model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward
329
- model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1
330
- model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.act
331
- model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2
332
- model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm
333
- model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm
334
- model.vision_backbone.image_vit.transformer.resblocks.3
335
- model.vision_backbone.image_vit.transformer.resblocks.3.attention
336
- model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq
337
- model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk
338
- model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv
339
- model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo
340
- model.vision_backbone.image_vit.transformer.resblocks.3.attention.residual_dropout
341
- model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward
342
- model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1
343
- model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.act
344
- model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2
345
- model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm
346
- model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm
347
- model.vision_backbone.image_vit.transformer.resblocks.4
348
- model.vision_backbone.image_vit.transformer.resblocks.4.attention
349
- model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq
350
- model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk
351
- model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv
352
- model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo
353
- model.vision_backbone.image_vit.transformer.resblocks.4.attention.residual_dropout
354
- model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward
355
- model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1
356
- model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.act
357
- model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2
358
- model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm
359
- model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm
360
- model.vision_backbone.image_vit.transformer.resblocks.5
361
- model.vision_backbone.image_vit.transformer.resblocks.5.attention
362
- model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq
363
- model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk
364
- model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv
365
- model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo
366
- model.vision_backbone.image_vit.transformer.resblocks.5.attention.residual_dropout
367
- model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward
368
- model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1
369
- model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.act
370
- model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2
371
- model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm
372
- model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm
373
- model.vision_backbone.image_vit.transformer.resblocks.6
374
- model.vision_backbone.image_vit.transformer.resblocks.6.attention
375
- model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq
376
- model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk
377
- model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv
378
- model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo
379
- model.vision_backbone.image_vit.transformer.resblocks.6.attention.residual_dropout
380
- model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward
381
- model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1
382
- model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.act
383
- model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2
384
- model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm
385
- model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm
386
- model.vision_backbone.image_vit.transformer.resblocks.7
387
- model.vision_backbone.image_vit.transformer.resblocks.7.attention
388
- model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq
389
- model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk
390
- model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv
391
- model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo
392
- model.vision_backbone.image_vit.transformer.resblocks.7.attention.residual_dropout
393
- model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward
394
- model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1
395
- model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.act
396
- model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2
397
- model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm
398
- model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm
399
- model.vision_backbone.image_vit.transformer.resblocks.8
400
- model.vision_backbone.image_vit.transformer.resblocks.8.attention
401
- model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq
402
- model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk
403
- model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv
404
- model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo
405
- model.vision_backbone.image_vit.transformer.resblocks.8.attention.residual_dropout
406
- model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward
407
- model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1
408
- model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.act
409
- model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2
410
- model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm
411
- model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm
412
- model.vision_backbone.image_vit.transformer.resblocks.9
413
- model.vision_backbone.image_vit.transformer.resblocks.9.attention
414
- model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq
415
- model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk
416
- model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv
417
- model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo
418
- model.vision_backbone.image_vit.transformer.resblocks.9.attention.residual_dropout
419
- model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward
420
- model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1
421
- model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.act
422
- model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2
423
- model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm
424
- model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm
425
- model.vision_backbone.image_vit.transformer.resblocks.10
426
- model.vision_backbone.image_vit.transformer.resblocks.10.attention
427
- model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq
428
- model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk
429
- model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv
430
- model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo
431
- model.vision_backbone.image_vit.transformer.resblocks.10.attention.residual_dropout
432
- model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward
433
- model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1
434
- model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.act
435
- model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2
436
- model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm
437
- model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm
438
- model.vision_backbone.image_vit.transformer.resblocks.11
439
- model.vision_backbone.image_vit.transformer.resblocks.11.attention
440
- model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq
441
- model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk
442
- model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv
443
- model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo
444
- model.vision_backbone.image_vit.transformer.resblocks.11.attention.residual_dropout
445
- model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward
446
- model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1
447
- model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.act
448
- model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2
449
- model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm
450
- model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm
451
- model.vision_backbone.image_vit.transformer.resblocks.12
452
- model.vision_backbone.image_vit.transformer.resblocks.12.attention
453
- model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq
454
- model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk
455
- model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv
456
- model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo
457
- model.vision_backbone.image_vit.transformer.resblocks.12.attention.residual_dropout
458
- model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward
459
- model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1
460
- model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.act
461
- model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2
462
- model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm
463
- model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm
464
- model.vision_backbone.image_vit.transformer.resblocks.13
465
- model.vision_backbone.image_vit.transformer.resblocks.13.attention
466
- model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq
467
- model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk
468
- model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv
469
- model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo
470
- model.vision_backbone.image_vit.transformer.resblocks.13.attention.residual_dropout
471
- model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward
472
- model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1
473
- model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.act
474
- model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2
475
- model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm
476
- model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm
477
- model.vision_backbone.image_vit.transformer.resblocks.14
478
- model.vision_backbone.image_vit.transformer.resblocks.14.attention
479
- model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq
480
- model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk
481
- model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv
482
- model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo
483
- model.vision_backbone.image_vit.transformer.resblocks.14.attention.residual_dropout
484
- model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward
485
- model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1
486
- model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.act
487
- model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2
488
- model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm
489
- model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm
490
- model.vision_backbone.image_vit.transformer.resblocks.15
491
- model.vision_backbone.image_vit.transformer.resblocks.15.attention
492
- model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq
493
- model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk
494
- model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv
495
- model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo
496
- model.vision_backbone.image_vit.transformer.resblocks.15.attention.residual_dropout
497
- model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward
498
- model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1
499
- model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.act
500
- model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2
501
- model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm
502
- model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm
503
- model.vision_backbone.image_vit.transformer.resblocks.16
504
- model.vision_backbone.image_vit.transformer.resblocks.16.attention
505
- model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq
506
- model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk
507
- model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv
508
- model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo
509
- model.vision_backbone.image_vit.transformer.resblocks.16.attention.residual_dropout
510
- model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward
511
- model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1
512
- model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.act
513
- model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2
514
- model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm
515
- model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm
516
- model.vision_backbone.image_vit.transformer.resblocks.17
517
- model.vision_backbone.image_vit.transformer.resblocks.17.attention
518
- model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq
519
- model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk
520
- model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv
521
- model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo
522
- model.vision_backbone.image_vit.transformer.resblocks.17.attention.residual_dropout
523
- model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward
524
- model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1
525
- model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.act
526
- model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2
527
- model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm
528
- model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm
529
- model.vision_backbone.image_vit.transformer.resblocks.18
530
- model.vision_backbone.image_vit.transformer.resblocks.18.attention
531
- model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq
532
- model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk
533
- model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv
534
- model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo
535
- model.vision_backbone.image_vit.transformer.resblocks.18.attention.residual_dropout
536
- model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward
537
- model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1
538
- model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.act
539
- model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2
540
- model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm
541
- model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm
542
- model.vision_backbone.image_vit.transformer.resblocks.19
543
- model.vision_backbone.image_vit.transformer.resblocks.19.attention
544
- model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq
545
- model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk
546
- model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv
547
- model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo
548
- model.vision_backbone.image_vit.transformer.resblocks.19.attention.residual_dropout
549
- model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward
550
- model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1
551
- model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.act
552
- model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2
553
- model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm
554
- model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm
555
- model.vision_backbone.image_vit.transformer.resblocks.20
556
- model.vision_backbone.image_vit.transformer.resblocks.20.attention
557
- model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq
558
- model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk
559
- model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv
560
- model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo
561
- model.vision_backbone.image_vit.transformer.resblocks.20.attention.residual_dropout
562
- model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward
563
- model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1
564
- model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.act
565
- model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2
566
- model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm
567
- model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm
568
- model.vision_backbone.image_vit.transformer.resblocks.21
569
- model.vision_backbone.image_vit.transformer.resblocks.21.attention
570
- model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq
571
- model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk
572
- model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv
573
- model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo
574
- model.vision_backbone.image_vit.transformer.resblocks.21.attention.residual_dropout
575
- model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward
576
- model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1
577
- model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.act
578
- model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2
579
- model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm
580
- model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm
581
- model.vision_backbone.image_vit.transformer.resblocks.22
582
- model.vision_backbone.image_vit.transformer.resblocks.22.attention
583
- model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq
584
- model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk
585
- model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv
586
- model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo
587
- model.vision_backbone.image_vit.transformer.resblocks.22.attention.residual_dropout
588
- model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward
589
- model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1
590
- model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.act
591
- model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2
592
- model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm
593
- model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm
594
- model.vision_backbone.image_pooling_2d
595
- model.vision_backbone.image_pooling_2d.wq
596
- model.vision_backbone.image_pooling_2d.wk
597
- model.vision_backbone.image_pooling_2d.wv
598
- model.vision_backbone.image_pooling_2d.wo
599
- model.vision_backbone.image_pooling_2d.residual_dropout
600
- model.vision_backbone.image_projector
601
- model.vision_backbone.image_projector.w1
602
- model.vision_backbone.image_projector.w2
603
- model.vision_backbone.image_projector.w3
604
- model.vision_backbone.image_projector.act
605
- model.vision_backbone.image_projector.dropout
606
- model.vision_backbone.image_feature_dropout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/deepseek-community/Janus-Pro-1B.txt DELETED
@@ -1,1033 +0,0 @@
1
-
2
- model
3
- model.vision_model
4
- model.vision_model.embeddings
5
- model.vision_model.embeddings.patch_embedding
6
- model.vision_model.embeddings.position_embedding
7
- model.vision_model.encoder
8
- model.vision_model.encoder.layers
9
- model.vision_model.encoder.layers.0
10
- model.vision_model.encoder.layers.0.layer_norm1
11
- model.vision_model.encoder.layers.0.self_attn
12
- model.vision_model.encoder.layers.0.self_attn.q_proj
13
- model.vision_model.encoder.layers.0.self_attn.k_proj
14
- model.vision_model.encoder.layers.0.self_attn.v_proj
15
- model.vision_model.encoder.layers.0.self_attn.projection_layer
16
- model.vision_model.encoder.layers.0.self_attn.projection_dropout
17
- model.vision_model.encoder.layers.0.self_attn.q_norm
18
- model.vision_model.encoder.layers.0.self_attn.k_norm
19
- model.vision_model.encoder.layers.0.layer_norm2
20
- model.vision_model.encoder.layers.0.mlp
21
- model.vision_model.encoder.layers.0.mlp.activation_fn
22
- model.vision_model.encoder.layers.0.mlp.fc1
23
- model.vision_model.encoder.layers.0.mlp.fc2
24
- model.vision_model.encoder.layers.0.mlp.dropout1
25
- model.vision_model.encoder.layers.0.mlp.dropout2
26
- model.vision_model.encoder.layers.1
27
- model.vision_model.encoder.layers.1.layer_norm1
28
- model.vision_model.encoder.layers.1.self_attn
29
- model.vision_model.encoder.layers.1.self_attn.q_proj
30
- model.vision_model.encoder.layers.1.self_attn.k_proj
31
- model.vision_model.encoder.layers.1.self_attn.v_proj
32
- model.vision_model.encoder.layers.1.self_attn.projection_layer
33
- model.vision_model.encoder.layers.1.self_attn.projection_dropout
34
- model.vision_model.encoder.layers.1.self_attn.q_norm
35
- model.vision_model.encoder.layers.1.self_attn.k_norm
36
- model.vision_model.encoder.layers.1.layer_norm2
37
- model.vision_model.encoder.layers.1.mlp
38
- model.vision_model.encoder.layers.1.mlp.activation_fn
39
- model.vision_model.encoder.layers.1.mlp.fc1
40
- model.vision_model.encoder.layers.1.mlp.fc2
41
- model.vision_model.encoder.layers.1.mlp.dropout1
42
- model.vision_model.encoder.layers.1.mlp.dropout2
43
- model.vision_model.encoder.layers.2
44
- model.vision_model.encoder.layers.2.layer_norm1
45
- model.vision_model.encoder.layers.2.self_attn
46
- model.vision_model.encoder.layers.2.self_attn.q_proj
47
- model.vision_model.encoder.layers.2.self_attn.k_proj
48
- model.vision_model.encoder.layers.2.self_attn.v_proj
49
- model.vision_model.encoder.layers.2.self_attn.projection_layer
50
- model.vision_model.encoder.layers.2.self_attn.projection_dropout
51
- model.vision_model.encoder.layers.2.self_attn.q_norm
52
- model.vision_model.encoder.layers.2.self_attn.k_norm
53
- model.vision_model.encoder.layers.2.layer_norm2
54
- model.vision_model.encoder.layers.2.mlp
55
- model.vision_model.encoder.layers.2.mlp.activation_fn
56
- model.vision_model.encoder.layers.2.mlp.fc1
57
- model.vision_model.encoder.layers.2.mlp.fc2
58
- model.vision_model.encoder.layers.2.mlp.dropout1
59
- model.vision_model.encoder.layers.2.mlp.dropout2
60
- model.vision_model.encoder.layers.3
61
- model.vision_model.encoder.layers.3.layer_norm1
62
- model.vision_model.encoder.layers.3.self_attn
63
- model.vision_model.encoder.layers.3.self_attn.q_proj
64
- model.vision_model.encoder.layers.3.self_attn.k_proj
65
- model.vision_model.encoder.layers.3.self_attn.v_proj
66
- model.vision_model.encoder.layers.3.self_attn.projection_layer
67
- model.vision_model.encoder.layers.3.self_attn.projection_dropout
68
- model.vision_model.encoder.layers.3.self_attn.q_norm
69
- model.vision_model.encoder.layers.3.self_attn.k_norm
70
- model.vision_model.encoder.layers.3.layer_norm2
71
- model.vision_model.encoder.layers.3.mlp
72
- model.vision_model.encoder.layers.3.mlp.activation_fn
73
- model.vision_model.encoder.layers.3.mlp.fc1
74
- model.vision_model.encoder.layers.3.mlp.fc2
75
- model.vision_model.encoder.layers.3.mlp.dropout1
76
- model.vision_model.encoder.layers.3.mlp.dropout2
77
- model.vision_model.encoder.layers.4
78
- model.vision_model.encoder.layers.4.layer_norm1
79
- model.vision_model.encoder.layers.4.self_attn
80
- model.vision_model.encoder.layers.4.self_attn.q_proj
81
- model.vision_model.encoder.layers.4.self_attn.k_proj
82
- model.vision_model.encoder.layers.4.self_attn.v_proj
83
- model.vision_model.encoder.layers.4.self_attn.projection_layer
84
- model.vision_model.encoder.layers.4.self_attn.projection_dropout
85
- model.vision_model.encoder.layers.4.self_attn.q_norm
86
- model.vision_model.encoder.layers.4.self_attn.k_norm
87
- model.vision_model.encoder.layers.4.layer_norm2
88
- model.vision_model.encoder.layers.4.mlp
89
- model.vision_model.encoder.layers.4.mlp.activation_fn
90
- model.vision_model.encoder.layers.4.mlp.fc1
91
- model.vision_model.encoder.layers.4.mlp.fc2
92
- model.vision_model.encoder.layers.4.mlp.dropout1
93
- model.vision_model.encoder.layers.4.mlp.dropout2
94
- model.vision_model.encoder.layers.5
95
- model.vision_model.encoder.layers.5.layer_norm1
96
- model.vision_model.encoder.layers.5.self_attn
97
- model.vision_model.encoder.layers.5.self_attn.q_proj
98
- model.vision_model.encoder.layers.5.self_attn.k_proj
99
- model.vision_model.encoder.layers.5.self_attn.v_proj
100
- model.vision_model.encoder.layers.5.self_attn.projection_layer
101
- model.vision_model.encoder.layers.5.self_attn.projection_dropout
102
- model.vision_model.encoder.layers.5.self_attn.q_norm
103
- model.vision_model.encoder.layers.5.self_attn.k_norm
104
- model.vision_model.encoder.layers.5.layer_norm2
105
- model.vision_model.encoder.layers.5.mlp
106
- model.vision_model.encoder.layers.5.mlp.activation_fn
107
- model.vision_model.encoder.layers.5.mlp.fc1
108
- model.vision_model.encoder.layers.5.mlp.fc2
109
- model.vision_model.encoder.layers.5.mlp.dropout1
110
- model.vision_model.encoder.layers.5.mlp.dropout2
111
- model.vision_model.encoder.layers.6
112
- model.vision_model.encoder.layers.6.layer_norm1
113
- model.vision_model.encoder.layers.6.self_attn
114
- model.vision_model.encoder.layers.6.self_attn.q_proj
115
- model.vision_model.encoder.layers.6.self_attn.k_proj
116
- model.vision_model.encoder.layers.6.self_attn.v_proj
117
- model.vision_model.encoder.layers.6.self_attn.projection_layer
118
- model.vision_model.encoder.layers.6.self_attn.projection_dropout
119
- model.vision_model.encoder.layers.6.self_attn.q_norm
120
- model.vision_model.encoder.layers.6.self_attn.k_norm
121
- model.vision_model.encoder.layers.6.layer_norm2
122
- model.vision_model.encoder.layers.6.mlp
123
- model.vision_model.encoder.layers.6.mlp.activation_fn
124
- model.vision_model.encoder.layers.6.mlp.fc1
125
- model.vision_model.encoder.layers.6.mlp.fc2
126
- model.vision_model.encoder.layers.6.mlp.dropout1
127
- model.vision_model.encoder.layers.6.mlp.dropout2
128
- model.vision_model.encoder.layers.7
129
- model.vision_model.encoder.layers.7.layer_norm1
130
- model.vision_model.encoder.layers.7.self_attn
131
- model.vision_model.encoder.layers.7.self_attn.q_proj
132
- model.vision_model.encoder.layers.7.self_attn.k_proj
133
- model.vision_model.encoder.layers.7.self_attn.v_proj
134
- model.vision_model.encoder.layers.7.self_attn.projection_layer
135
- model.vision_model.encoder.layers.7.self_attn.projection_dropout
136
- model.vision_model.encoder.layers.7.self_attn.q_norm
137
- model.vision_model.encoder.layers.7.self_attn.k_norm
138
- model.vision_model.encoder.layers.7.layer_norm2
139
- model.vision_model.encoder.layers.7.mlp
140
- model.vision_model.encoder.layers.7.mlp.activation_fn
141
- model.vision_model.encoder.layers.7.mlp.fc1
142
- model.vision_model.encoder.layers.7.mlp.fc2
143
- model.vision_model.encoder.layers.7.mlp.dropout1
144
- model.vision_model.encoder.layers.7.mlp.dropout2
145
- model.vision_model.encoder.layers.8
146
- model.vision_model.encoder.layers.8.layer_norm1
147
- model.vision_model.encoder.layers.8.self_attn
148
- model.vision_model.encoder.layers.8.self_attn.q_proj
149
- model.vision_model.encoder.layers.8.self_attn.k_proj
150
- model.vision_model.encoder.layers.8.self_attn.v_proj
151
- model.vision_model.encoder.layers.8.self_attn.projection_layer
152
- model.vision_model.encoder.layers.8.self_attn.projection_dropout
153
- model.vision_model.encoder.layers.8.self_attn.q_norm
154
- model.vision_model.encoder.layers.8.self_attn.k_norm
155
- model.vision_model.encoder.layers.8.layer_norm2
156
- model.vision_model.encoder.layers.8.mlp
157
- model.vision_model.encoder.layers.8.mlp.activation_fn
158
- model.vision_model.encoder.layers.8.mlp.fc1
159
- model.vision_model.encoder.layers.8.mlp.fc2
160
- model.vision_model.encoder.layers.8.mlp.dropout1
161
- model.vision_model.encoder.layers.8.mlp.dropout2
162
- model.vision_model.encoder.layers.9
163
- model.vision_model.encoder.layers.9.layer_norm1
164
- model.vision_model.encoder.layers.9.self_attn
165
- model.vision_model.encoder.layers.9.self_attn.q_proj
166
- model.vision_model.encoder.layers.9.self_attn.k_proj
167
- model.vision_model.encoder.layers.9.self_attn.v_proj
168
- model.vision_model.encoder.layers.9.self_attn.projection_layer
169
- model.vision_model.encoder.layers.9.self_attn.projection_dropout
170
- model.vision_model.encoder.layers.9.self_attn.q_norm
171
- model.vision_model.encoder.layers.9.self_attn.k_norm
172
- model.vision_model.encoder.layers.9.layer_norm2
173
- model.vision_model.encoder.layers.9.mlp
174
- model.vision_model.encoder.layers.9.mlp.activation_fn
175
- model.vision_model.encoder.layers.9.mlp.fc1
176
- model.vision_model.encoder.layers.9.mlp.fc2
177
- model.vision_model.encoder.layers.9.mlp.dropout1
178
- model.vision_model.encoder.layers.9.mlp.dropout2
179
- model.vision_model.encoder.layers.10
180
- model.vision_model.encoder.layers.10.layer_norm1
181
- model.vision_model.encoder.layers.10.self_attn
182
- model.vision_model.encoder.layers.10.self_attn.q_proj
183
- model.vision_model.encoder.layers.10.self_attn.k_proj
184
- model.vision_model.encoder.layers.10.self_attn.v_proj
185
- model.vision_model.encoder.layers.10.self_attn.projection_layer
186
- model.vision_model.encoder.layers.10.self_attn.projection_dropout
187
- model.vision_model.encoder.layers.10.self_attn.q_norm
188
- model.vision_model.encoder.layers.10.self_attn.k_norm
189
- model.vision_model.encoder.layers.10.layer_norm2
190
- model.vision_model.encoder.layers.10.mlp
191
- model.vision_model.encoder.layers.10.mlp.activation_fn
192
- model.vision_model.encoder.layers.10.mlp.fc1
193
- model.vision_model.encoder.layers.10.mlp.fc2
194
- model.vision_model.encoder.layers.10.mlp.dropout1
195
- model.vision_model.encoder.layers.10.mlp.dropout2
196
- model.vision_model.encoder.layers.11
197
- model.vision_model.encoder.layers.11.layer_norm1
198
- model.vision_model.encoder.layers.11.self_attn
199
- model.vision_model.encoder.layers.11.self_attn.q_proj
200
- model.vision_model.encoder.layers.11.self_attn.k_proj
201
- model.vision_model.encoder.layers.11.self_attn.v_proj
202
- model.vision_model.encoder.layers.11.self_attn.projection_layer
203
- model.vision_model.encoder.layers.11.self_attn.projection_dropout
204
- model.vision_model.encoder.layers.11.self_attn.q_norm
205
- model.vision_model.encoder.layers.11.self_attn.k_norm
206
- model.vision_model.encoder.layers.11.layer_norm2
207
- model.vision_model.encoder.layers.11.mlp
208
- model.vision_model.encoder.layers.11.mlp.activation_fn
209
- model.vision_model.encoder.layers.11.mlp.fc1
210
- model.vision_model.encoder.layers.11.mlp.fc2
211
- model.vision_model.encoder.layers.11.mlp.dropout1
212
- model.vision_model.encoder.layers.11.mlp.dropout2
213
- model.vision_model.encoder.layers.12
214
- model.vision_model.encoder.layers.12.layer_norm1
215
- model.vision_model.encoder.layers.12.self_attn
216
- model.vision_model.encoder.layers.12.self_attn.q_proj
217
- model.vision_model.encoder.layers.12.self_attn.k_proj
218
- model.vision_model.encoder.layers.12.self_attn.v_proj
219
- model.vision_model.encoder.layers.12.self_attn.projection_layer
220
- model.vision_model.encoder.layers.12.self_attn.projection_dropout
221
- model.vision_model.encoder.layers.12.self_attn.q_norm
222
- model.vision_model.encoder.layers.12.self_attn.k_norm
223
- model.vision_model.encoder.layers.12.layer_norm2
224
- model.vision_model.encoder.layers.12.mlp
225
- model.vision_model.encoder.layers.12.mlp.activation_fn
226
- model.vision_model.encoder.layers.12.mlp.fc1
227
- model.vision_model.encoder.layers.12.mlp.fc2
228
- model.vision_model.encoder.layers.12.mlp.dropout1
229
- model.vision_model.encoder.layers.12.mlp.dropout2
230
- model.vision_model.encoder.layers.13
231
- model.vision_model.encoder.layers.13.layer_norm1
232
- model.vision_model.encoder.layers.13.self_attn
233
- model.vision_model.encoder.layers.13.self_attn.q_proj
234
- model.vision_model.encoder.layers.13.self_attn.k_proj
235
- model.vision_model.encoder.layers.13.self_attn.v_proj
236
- model.vision_model.encoder.layers.13.self_attn.projection_layer
237
- model.vision_model.encoder.layers.13.self_attn.projection_dropout
238
- model.vision_model.encoder.layers.13.self_attn.q_norm
239
- model.vision_model.encoder.layers.13.self_attn.k_norm
240
- model.vision_model.encoder.layers.13.layer_norm2
241
- model.vision_model.encoder.layers.13.mlp
242
- model.vision_model.encoder.layers.13.mlp.activation_fn
243
- model.vision_model.encoder.layers.13.mlp.fc1
244
- model.vision_model.encoder.layers.13.mlp.fc2
245
- model.vision_model.encoder.layers.13.mlp.dropout1
246
- model.vision_model.encoder.layers.13.mlp.dropout2
247
- model.vision_model.encoder.layers.14
248
- model.vision_model.encoder.layers.14.layer_norm1
249
- model.vision_model.encoder.layers.14.self_attn
250
- model.vision_model.encoder.layers.14.self_attn.q_proj
251
- model.vision_model.encoder.layers.14.self_attn.k_proj
252
- model.vision_model.encoder.layers.14.self_attn.v_proj
253
- model.vision_model.encoder.layers.14.self_attn.projection_layer
254
- model.vision_model.encoder.layers.14.self_attn.projection_dropout
255
- model.vision_model.encoder.layers.14.self_attn.q_norm
256
- model.vision_model.encoder.layers.14.self_attn.k_norm
257
- model.vision_model.encoder.layers.14.layer_norm2
258
- model.vision_model.encoder.layers.14.mlp
259
- model.vision_model.encoder.layers.14.mlp.activation_fn
260
- model.vision_model.encoder.layers.14.mlp.fc1
261
- model.vision_model.encoder.layers.14.mlp.fc2
262
- model.vision_model.encoder.layers.14.mlp.dropout1
263
- model.vision_model.encoder.layers.14.mlp.dropout2
264
- model.vision_model.encoder.layers.15
265
- model.vision_model.encoder.layers.15.layer_norm1
266
- model.vision_model.encoder.layers.15.self_attn
267
- model.vision_model.encoder.layers.15.self_attn.q_proj
268
- model.vision_model.encoder.layers.15.self_attn.k_proj
269
- model.vision_model.encoder.layers.15.self_attn.v_proj
270
- model.vision_model.encoder.layers.15.self_attn.projection_layer
271
- model.vision_model.encoder.layers.15.self_attn.projection_dropout
272
- model.vision_model.encoder.layers.15.self_attn.q_norm
273
- model.vision_model.encoder.layers.15.self_attn.k_norm
274
- model.vision_model.encoder.layers.15.layer_norm2
275
- model.vision_model.encoder.layers.15.mlp
276
- model.vision_model.encoder.layers.15.mlp.activation_fn
277
- model.vision_model.encoder.layers.15.mlp.fc1
278
- model.vision_model.encoder.layers.15.mlp.fc2
279
- model.vision_model.encoder.layers.15.mlp.dropout1
280
- model.vision_model.encoder.layers.15.mlp.dropout2
281
- model.vision_model.encoder.layers.16
282
- model.vision_model.encoder.layers.16.layer_norm1
283
- model.vision_model.encoder.layers.16.self_attn
284
- model.vision_model.encoder.layers.16.self_attn.q_proj
285
- model.vision_model.encoder.layers.16.self_attn.k_proj
286
- model.vision_model.encoder.layers.16.self_attn.v_proj
287
- model.vision_model.encoder.layers.16.self_attn.projection_layer
288
- model.vision_model.encoder.layers.16.self_attn.projection_dropout
289
- model.vision_model.encoder.layers.16.self_attn.q_norm
290
- model.vision_model.encoder.layers.16.self_attn.k_norm
291
- model.vision_model.encoder.layers.16.layer_norm2
292
- model.vision_model.encoder.layers.16.mlp
293
- model.vision_model.encoder.layers.16.mlp.activation_fn
294
- model.vision_model.encoder.layers.16.mlp.fc1
295
- model.vision_model.encoder.layers.16.mlp.fc2
296
- model.vision_model.encoder.layers.16.mlp.dropout1
297
- model.vision_model.encoder.layers.16.mlp.dropout2
298
- model.vision_model.encoder.layers.17
299
- model.vision_model.encoder.layers.17.layer_norm1
300
- model.vision_model.encoder.layers.17.self_attn
301
- model.vision_model.encoder.layers.17.self_attn.q_proj
302
- model.vision_model.encoder.layers.17.self_attn.k_proj
303
- model.vision_model.encoder.layers.17.self_attn.v_proj
304
- model.vision_model.encoder.layers.17.self_attn.projection_layer
305
- model.vision_model.encoder.layers.17.self_attn.projection_dropout
306
- model.vision_model.encoder.layers.17.self_attn.q_norm
307
- model.vision_model.encoder.layers.17.self_attn.k_norm
308
- model.vision_model.encoder.layers.17.layer_norm2
309
- model.vision_model.encoder.layers.17.mlp
310
- model.vision_model.encoder.layers.17.mlp.activation_fn
311
- model.vision_model.encoder.layers.17.mlp.fc1
312
- model.vision_model.encoder.layers.17.mlp.fc2
313
- model.vision_model.encoder.layers.17.mlp.dropout1
314
- model.vision_model.encoder.layers.17.mlp.dropout2
315
- model.vision_model.encoder.layers.18
316
- model.vision_model.encoder.layers.18.layer_norm1
317
- model.vision_model.encoder.layers.18.self_attn
318
- model.vision_model.encoder.layers.18.self_attn.q_proj
319
- model.vision_model.encoder.layers.18.self_attn.k_proj
320
- model.vision_model.encoder.layers.18.self_attn.v_proj
321
- model.vision_model.encoder.layers.18.self_attn.projection_layer
322
- model.vision_model.encoder.layers.18.self_attn.projection_dropout
323
- model.vision_model.encoder.layers.18.self_attn.q_norm
324
- model.vision_model.encoder.layers.18.self_attn.k_norm
325
- model.vision_model.encoder.layers.18.layer_norm2
326
- model.vision_model.encoder.layers.18.mlp
327
- model.vision_model.encoder.layers.18.mlp.activation_fn
328
- model.vision_model.encoder.layers.18.mlp.fc1
329
- model.vision_model.encoder.layers.18.mlp.fc2
330
- model.vision_model.encoder.layers.18.mlp.dropout1
331
- model.vision_model.encoder.layers.18.mlp.dropout2
332
- model.vision_model.encoder.layers.19
333
- model.vision_model.encoder.layers.19.layer_norm1
334
- model.vision_model.encoder.layers.19.self_attn
335
- model.vision_model.encoder.layers.19.self_attn.q_proj
336
- model.vision_model.encoder.layers.19.self_attn.k_proj
337
- model.vision_model.encoder.layers.19.self_attn.v_proj
338
- model.vision_model.encoder.layers.19.self_attn.projection_layer
339
- model.vision_model.encoder.layers.19.self_attn.projection_dropout
340
- model.vision_model.encoder.layers.19.self_attn.q_norm
341
- model.vision_model.encoder.layers.19.self_attn.k_norm
342
- model.vision_model.encoder.layers.19.layer_norm2
343
- model.vision_model.encoder.layers.19.mlp
344
- model.vision_model.encoder.layers.19.mlp.activation_fn
345
- model.vision_model.encoder.layers.19.mlp.fc1
346
- model.vision_model.encoder.layers.19.mlp.fc2
347
- model.vision_model.encoder.layers.19.mlp.dropout1
348
- model.vision_model.encoder.layers.19.mlp.dropout2
349
- model.vision_model.encoder.layers.20
350
- model.vision_model.encoder.layers.20.layer_norm1
351
- model.vision_model.encoder.layers.20.self_attn
352
- model.vision_model.encoder.layers.20.self_attn.q_proj
353
- model.vision_model.encoder.layers.20.self_attn.k_proj
354
- model.vision_model.encoder.layers.20.self_attn.v_proj
355
- model.vision_model.encoder.layers.20.self_attn.projection_layer
356
- model.vision_model.encoder.layers.20.self_attn.projection_dropout
357
- model.vision_model.encoder.layers.20.self_attn.q_norm
358
- model.vision_model.encoder.layers.20.self_attn.k_norm
359
- model.vision_model.encoder.layers.20.layer_norm2
360
- model.vision_model.encoder.layers.20.mlp
361
- model.vision_model.encoder.layers.20.mlp.activation_fn
362
- model.vision_model.encoder.layers.20.mlp.fc1
363
- model.vision_model.encoder.layers.20.mlp.fc2
364
- model.vision_model.encoder.layers.20.mlp.dropout1
365
- model.vision_model.encoder.layers.20.mlp.dropout2
366
- model.vision_model.encoder.layers.21
367
- model.vision_model.encoder.layers.21.layer_norm1
368
- model.vision_model.encoder.layers.21.self_attn
369
- model.vision_model.encoder.layers.21.self_attn.q_proj
370
- model.vision_model.encoder.layers.21.self_attn.k_proj
371
- model.vision_model.encoder.layers.21.self_attn.v_proj
372
- model.vision_model.encoder.layers.21.self_attn.projection_layer
373
- model.vision_model.encoder.layers.21.self_attn.projection_dropout
374
- model.vision_model.encoder.layers.21.self_attn.q_norm
375
- model.vision_model.encoder.layers.21.self_attn.k_norm
376
- model.vision_model.encoder.layers.21.layer_norm2
377
- model.vision_model.encoder.layers.21.mlp
378
- model.vision_model.encoder.layers.21.mlp.activation_fn
379
- model.vision_model.encoder.layers.21.mlp.fc1
380
- model.vision_model.encoder.layers.21.mlp.fc2
381
- model.vision_model.encoder.layers.21.mlp.dropout1
382
- model.vision_model.encoder.layers.21.mlp.dropout2
383
- model.vision_model.encoder.layers.22
384
- model.vision_model.encoder.layers.22.layer_norm1
385
- model.vision_model.encoder.layers.22.self_attn
386
- model.vision_model.encoder.layers.22.self_attn.q_proj
387
- model.vision_model.encoder.layers.22.self_attn.k_proj
388
- model.vision_model.encoder.layers.22.self_attn.v_proj
389
- model.vision_model.encoder.layers.22.self_attn.projection_layer
390
- model.vision_model.encoder.layers.22.self_attn.projection_dropout
391
- model.vision_model.encoder.layers.22.self_attn.q_norm
392
- model.vision_model.encoder.layers.22.self_attn.k_norm
393
- model.vision_model.encoder.layers.22.layer_norm2
394
- model.vision_model.encoder.layers.22.mlp
395
- model.vision_model.encoder.layers.22.mlp.activation_fn
396
- model.vision_model.encoder.layers.22.mlp.fc1
397
- model.vision_model.encoder.layers.22.mlp.fc2
398
- model.vision_model.encoder.layers.22.mlp.dropout1
399
- model.vision_model.encoder.layers.22.mlp.dropout2
400
- model.vision_model.encoder.layers.23
401
- model.vision_model.encoder.layers.23.layer_norm1
402
- model.vision_model.encoder.layers.23.self_attn
403
- model.vision_model.encoder.layers.23.self_attn.q_proj
404
- model.vision_model.encoder.layers.23.self_attn.k_proj
405
- model.vision_model.encoder.layers.23.self_attn.v_proj
406
- model.vision_model.encoder.layers.23.self_attn.projection_layer
407
- model.vision_model.encoder.layers.23.self_attn.projection_dropout
408
- model.vision_model.encoder.layers.23.self_attn.q_norm
409
- model.vision_model.encoder.layers.23.self_attn.k_norm
410
- model.vision_model.encoder.layers.23.layer_norm2
411
- model.vision_model.encoder.layers.23.mlp
412
- model.vision_model.encoder.layers.23.mlp.activation_fn
413
- model.vision_model.encoder.layers.23.mlp.fc1
414
- model.vision_model.encoder.layers.23.mlp.fc2
415
- model.vision_model.encoder.layers.23.mlp.dropout1
416
- model.vision_model.encoder.layers.23.mlp.dropout2
417
- model.vision_model.post_layernorm
418
- model.aligner
419
- model.aligner.fc1
420
- model.aligner.hidden_layers
421
- model.aligner.hidden_layers.0
422
- model.aligner.activation_fn
423
- model.vqmodel
424
- model.vqmodel.encoder
425
- model.vqmodel.encoder.conv_in
426
- model.vqmodel.encoder.down
427
- model.vqmodel.encoder.down.0
428
- model.vqmodel.encoder.down.0.block
429
- model.vqmodel.encoder.down.0.block.0
430
- model.vqmodel.encoder.down.0.block.0.norm1
431
- model.vqmodel.encoder.down.0.block.0.conv1
432
- model.vqmodel.encoder.down.0.block.0.norm2
433
- model.vqmodel.encoder.down.0.block.0.dropout
434
- model.vqmodel.encoder.down.0.block.0.conv2
435
- model.vqmodel.encoder.down.0.block.1
436
- model.vqmodel.encoder.down.0.block.1.norm1
437
- model.vqmodel.encoder.down.0.block.1.conv1
438
- model.vqmodel.encoder.down.0.block.1.norm2
439
- model.vqmodel.encoder.down.0.block.1.dropout
440
- model.vqmodel.encoder.down.0.block.1.conv2
441
- model.vqmodel.encoder.down.0.attn
442
- model.vqmodel.encoder.down.0.downsample
443
- model.vqmodel.encoder.down.0.downsample.conv
444
- model.vqmodel.encoder.down.1
445
- model.vqmodel.encoder.down.1.block
446
- model.vqmodel.encoder.down.1.block.0
447
- model.vqmodel.encoder.down.1.block.0.norm1
448
- model.vqmodel.encoder.down.1.block.0.conv1
449
- model.vqmodel.encoder.down.1.block.0.norm2
450
- model.vqmodel.encoder.down.1.block.0.dropout
451
- model.vqmodel.encoder.down.1.block.0.conv2
452
- model.vqmodel.encoder.down.1.block.1
453
- model.vqmodel.encoder.down.1.block.1.norm1
454
- model.vqmodel.encoder.down.1.block.1.conv1
455
- model.vqmodel.encoder.down.1.block.1.norm2
456
- model.vqmodel.encoder.down.1.block.1.dropout
457
- model.vqmodel.encoder.down.1.block.1.conv2
458
- model.vqmodel.encoder.down.1.attn
459
- model.vqmodel.encoder.down.1.downsample
460
- model.vqmodel.encoder.down.1.downsample.conv
461
- model.vqmodel.encoder.down.2
462
- model.vqmodel.encoder.down.2.block
463
- model.vqmodel.encoder.down.2.block.0
464
- model.vqmodel.encoder.down.2.block.0.norm1
465
- model.vqmodel.encoder.down.2.block.0.conv1
466
- model.vqmodel.encoder.down.2.block.0.norm2
467
- model.vqmodel.encoder.down.2.block.0.dropout
468
- model.vqmodel.encoder.down.2.block.0.conv2
469
- model.vqmodel.encoder.down.2.block.0.nin_shortcut
470
- model.vqmodel.encoder.down.2.block.1
471
- model.vqmodel.encoder.down.2.block.1.norm1
472
- model.vqmodel.encoder.down.2.block.1.conv1
473
- model.vqmodel.encoder.down.2.block.1.norm2
474
- model.vqmodel.encoder.down.2.block.1.dropout
475
- model.vqmodel.encoder.down.2.block.1.conv2
476
- model.vqmodel.encoder.down.2.attn
477
- model.vqmodel.encoder.down.2.downsample
478
- model.vqmodel.encoder.down.2.downsample.conv
479
- model.vqmodel.encoder.down.3
480
- model.vqmodel.encoder.down.3.block
481
- model.vqmodel.encoder.down.3.block.0
482
- model.vqmodel.encoder.down.3.block.0.norm1
483
- model.vqmodel.encoder.down.3.block.0.conv1
484
- model.vqmodel.encoder.down.3.block.0.norm2
485
- model.vqmodel.encoder.down.3.block.0.dropout
486
- model.vqmodel.encoder.down.3.block.0.conv2
487
- model.vqmodel.encoder.down.3.block.1
488
- model.vqmodel.encoder.down.3.block.1.norm1
489
- model.vqmodel.encoder.down.3.block.1.conv1
490
- model.vqmodel.encoder.down.3.block.1.norm2
491
- model.vqmodel.encoder.down.3.block.1.dropout
492
- model.vqmodel.encoder.down.3.block.1.conv2
493
- model.vqmodel.encoder.down.3.attn
494
- model.vqmodel.encoder.down.3.downsample
495
- model.vqmodel.encoder.down.3.downsample.conv
496
- model.vqmodel.encoder.down.4
497
- model.vqmodel.encoder.down.4.block
498
- model.vqmodel.encoder.down.4.block.0
499
- model.vqmodel.encoder.down.4.block.0.norm1
500
- model.vqmodel.encoder.down.4.block.0.conv1
501
- model.vqmodel.encoder.down.4.block.0.norm2
502
- model.vqmodel.encoder.down.4.block.0.dropout
503
- model.vqmodel.encoder.down.4.block.0.conv2
504
- model.vqmodel.encoder.down.4.block.0.nin_shortcut
505
- model.vqmodel.encoder.down.4.block.1
506
- model.vqmodel.encoder.down.4.block.1.norm1
507
- model.vqmodel.encoder.down.4.block.1.conv1
508
- model.vqmodel.encoder.down.4.block.1.norm2
509
- model.vqmodel.encoder.down.4.block.1.dropout
510
- model.vqmodel.encoder.down.4.block.1.conv2
511
- model.vqmodel.encoder.down.4.attn
512
- model.vqmodel.encoder.down.4.attn.0
513
- model.vqmodel.encoder.down.4.attn.0.norm
514
- model.vqmodel.encoder.down.4.attn.0.q
515
- model.vqmodel.encoder.down.4.attn.0.k
516
- model.vqmodel.encoder.down.4.attn.0.v
517
- model.vqmodel.encoder.down.4.attn.0.proj_out
518
- model.vqmodel.encoder.down.4.attn.1
519
- model.vqmodel.encoder.down.4.attn.1.norm
520
- model.vqmodel.encoder.down.4.attn.1.q
521
- model.vqmodel.encoder.down.4.attn.1.k
522
- model.vqmodel.encoder.down.4.attn.1.v
523
- model.vqmodel.encoder.down.4.attn.1.proj_out
524
- model.vqmodel.encoder.mid
525
- model.vqmodel.encoder.mid.block_1
526
- model.vqmodel.encoder.mid.block_1.norm1
527
- model.vqmodel.encoder.mid.block_1.conv1
528
- model.vqmodel.encoder.mid.block_1.norm2
529
- model.vqmodel.encoder.mid.block_1.dropout
530
- model.vqmodel.encoder.mid.block_1.conv2
531
- model.vqmodel.encoder.mid.attn_1
532
- model.vqmodel.encoder.mid.attn_1.norm
533
- model.vqmodel.encoder.mid.attn_1.q
534
- model.vqmodel.encoder.mid.attn_1.k
535
- model.vqmodel.encoder.mid.attn_1.v
536
- model.vqmodel.encoder.mid.attn_1.proj_out
537
- model.vqmodel.encoder.mid.block_2
538
- model.vqmodel.encoder.mid.block_2.norm1
539
- model.vqmodel.encoder.mid.block_2.conv1
540
- model.vqmodel.encoder.mid.block_2.norm2
541
- model.vqmodel.encoder.mid.block_2.dropout
542
- model.vqmodel.encoder.mid.block_2.conv2
543
- model.vqmodel.encoder.norm_out
544
- model.vqmodel.encoder.conv_out
545
- model.vqmodel.quantize
546
- model.vqmodel.quantize.embedding
547
- model.vqmodel.quant_conv
548
- model.vqmodel.post_quant_conv
549
- model.vqmodel.decoder
550
- model.vqmodel.decoder.conv_in
551
- model.vqmodel.decoder.mid
552
- model.vqmodel.decoder.mid.block_1
553
- model.vqmodel.decoder.mid.block_1.norm1
554
- model.vqmodel.decoder.mid.block_1.conv1
555
- model.vqmodel.decoder.mid.block_1.norm2
556
- model.vqmodel.decoder.mid.block_1.dropout
557
- model.vqmodel.decoder.mid.block_1.conv2
558
- model.vqmodel.decoder.mid.attn_1
559
- model.vqmodel.decoder.mid.attn_1.norm
560
- model.vqmodel.decoder.mid.attn_1.q
561
- model.vqmodel.decoder.mid.attn_1.k
562
- model.vqmodel.decoder.mid.attn_1.v
563
- model.vqmodel.decoder.mid.attn_1.proj_out
564
- model.vqmodel.decoder.mid.block_2
565
- model.vqmodel.decoder.mid.block_2.norm1
566
- model.vqmodel.decoder.mid.block_2.conv1
567
- model.vqmodel.decoder.mid.block_2.norm2
568
- model.vqmodel.decoder.mid.block_2.dropout
569
- model.vqmodel.decoder.mid.block_2.conv2
570
- model.vqmodel.decoder.up
571
- model.vqmodel.decoder.up.0
572
- model.vqmodel.decoder.up.0.block
573
- model.vqmodel.decoder.up.0.block.0
574
- model.vqmodel.decoder.up.0.block.0.norm1
575
- model.vqmodel.decoder.up.0.block.0.conv1
576
- model.vqmodel.decoder.up.0.block.0.norm2
577
- model.vqmodel.decoder.up.0.block.0.dropout
578
- model.vqmodel.decoder.up.0.block.0.conv2
579
- model.vqmodel.decoder.up.0.block.1
580
- model.vqmodel.decoder.up.0.block.1.norm1
581
- model.vqmodel.decoder.up.0.block.1.conv1
582
- model.vqmodel.decoder.up.0.block.1.norm2
583
- model.vqmodel.decoder.up.0.block.1.dropout
584
- model.vqmodel.decoder.up.0.block.1.conv2
585
- model.vqmodel.decoder.up.0.block.2
586
- model.vqmodel.decoder.up.0.block.2.norm1
587
- model.vqmodel.decoder.up.0.block.2.conv1
588
- model.vqmodel.decoder.up.0.block.2.norm2
589
- model.vqmodel.decoder.up.0.block.2.dropout
590
- model.vqmodel.decoder.up.0.block.2.conv2
591
- model.vqmodel.decoder.up.0.attn
592
- model.vqmodel.decoder.up.0.attn.0
593
- model.vqmodel.decoder.up.0.attn.0.norm
594
- model.vqmodel.decoder.up.0.attn.0.q
595
- model.vqmodel.decoder.up.0.attn.0.k
596
- model.vqmodel.decoder.up.0.attn.0.v
597
- model.vqmodel.decoder.up.0.attn.0.proj_out
598
- model.vqmodel.decoder.up.0.attn.1
599
- model.vqmodel.decoder.up.0.attn.1.norm
600
- model.vqmodel.decoder.up.0.attn.1.q
601
- model.vqmodel.decoder.up.0.attn.1.k
602
- model.vqmodel.decoder.up.0.attn.1.v
603
- model.vqmodel.decoder.up.0.attn.1.proj_out
604
- model.vqmodel.decoder.up.0.attn.2
605
- model.vqmodel.decoder.up.0.attn.2.norm
606
- model.vqmodel.decoder.up.0.attn.2.q
607
- model.vqmodel.decoder.up.0.attn.2.k
608
- model.vqmodel.decoder.up.0.attn.2.v
609
- model.vqmodel.decoder.up.0.attn.2.proj_out
610
- model.vqmodel.decoder.up.0.upsample
611
- model.vqmodel.decoder.up.0.upsample.conv
612
- model.vqmodel.decoder.up.1
613
- model.vqmodel.decoder.up.1.block
614
- model.vqmodel.decoder.up.1.block.0
615
- model.vqmodel.decoder.up.1.block.0.norm1
616
- model.vqmodel.decoder.up.1.block.0.conv1
617
- model.vqmodel.decoder.up.1.block.0.norm2
618
- model.vqmodel.decoder.up.1.block.0.dropout
619
- model.vqmodel.decoder.up.1.block.0.conv2
620
- model.vqmodel.decoder.up.1.block.0.nin_shortcut
621
- model.vqmodel.decoder.up.1.block.1
622
- model.vqmodel.decoder.up.1.block.1.norm1
623
- model.vqmodel.decoder.up.1.block.1.conv1
624
- model.vqmodel.decoder.up.1.block.1.norm2
625
- model.vqmodel.decoder.up.1.block.1.dropout
626
- model.vqmodel.decoder.up.1.block.1.conv2
627
- model.vqmodel.decoder.up.1.block.2
628
- model.vqmodel.decoder.up.1.block.2.norm1
629
- model.vqmodel.decoder.up.1.block.2.conv1
630
- model.vqmodel.decoder.up.1.block.2.norm2
631
- model.vqmodel.decoder.up.1.block.2.dropout
632
- model.vqmodel.decoder.up.1.block.2.conv2
633
- model.vqmodel.decoder.up.1.attn
634
- model.vqmodel.decoder.up.1.upsample
635
- model.vqmodel.decoder.up.1.upsample.conv
636
- model.vqmodel.decoder.up.2
637
- model.vqmodel.decoder.up.2.block
638
- model.vqmodel.decoder.up.2.block.0
639
- model.vqmodel.decoder.up.2.block.0.norm1
640
- model.vqmodel.decoder.up.2.block.0.conv1
641
- model.vqmodel.decoder.up.2.block.0.norm2
642
- model.vqmodel.decoder.up.2.block.0.dropout
643
- model.vqmodel.decoder.up.2.block.0.conv2
644
- model.vqmodel.decoder.up.2.block.1
645
- model.vqmodel.decoder.up.2.block.1.norm1
646
- model.vqmodel.decoder.up.2.block.1.conv1
647
- model.vqmodel.decoder.up.2.block.1.norm2
648
- model.vqmodel.decoder.up.2.block.1.dropout
649
- model.vqmodel.decoder.up.2.block.1.conv2
650
- model.vqmodel.decoder.up.2.block.2
651
- model.vqmodel.decoder.up.2.block.2.norm1
652
- model.vqmodel.decoder.up.2.block.2.conv1
653
- model.vqmodel.decoder.up.2.block.2.norm2
654
- model.vqmodel.decoder.up.2.block.2.dropout
655
- model.vqmodel.decoder.up.2.block.2.conv2
656
- model.vqmodel.decoder.up.2.attn
657
- model.vqmodel.decoder.up.2.upsample
658
- model.vqmodel.decoder.up.2.upsample.conv
659
- model.vqmodel.decoder.up.3
660
- model.vqmodel.decoder.up.3.block
661
- model.vqmodel.decoder.up.3.block.0
662
- model.vqmodel.decoder.up.3.block.0.norm1
663
- model.vqmodel.decoder.up.3.block.0.conv1
664
- model.vqmodel.decoder.up.3.block.0.norm2
665
- model.vqmodel.decoder.up.3.block.0.dropout
666
- model.vqmodel.decoder.up.3.block.0.conv2
667
- model.vqmodel.decoder.up.3.block.0.nin_shortcut
668
- model.vqmodel.decoder.up.3.block.1
669
- model.vqmodel.decoder.up.3.block.1.norm1
670
- model.vqmodel.decoder.up.3.block.1.conv1
671
- model.vqmodel.decoder.up.3.block.1.norm2
672
- model.vqmodel.decoder.up.3.block.1.dropout
673
- model.vqmodel.decoder.up.3.block.1.conv2
674
- model.vqmodel.decoder.up.3.block.2
675
- model.vqmodel.decoder.up.3.block.2.norm1
676
- model.vqmodel.decoder.up.3.block.2.conv1
677
- model.vqmodel.decoder.up.3.block.2.norm2
678
- model.vqmodel.decoder.up.3.block.2.dropout
679
- model.vqmodel.decoder.up.3.block.2.conv2
680
- model.vqmodel.decoder.up.3.attn
681
- model.vqmodel.decoder.up.3.upsample
682
- model.vqmodel.decoder.up.3.upsample.conv
683
- model.vqmodel.decoder.up.4
684
- model.vqmodel.decoder.up.4.block
685
- model.vqmodel.decoder.up.4.block.0
686
- model.vqmodel.decoder.up.4.block.0.norm1
687
- model.vqmodel.decoder.up.4.block.0.conv1
688
- model.vqmodel.decoder.up.4.block.0.norm2
689
- model.vqmodel.decoder.up.4.block.0.dropout
690
- model.vqmodel.decoder.up.4.block.0.conv2
691
- model.vqmodel.decoder.up.4.block.1
692
- model.vqmodel.decoder.up.4.block.1.norm1
693
- model.vqmodel.decoder.up.4.block.1.conv1
694
- model.vqmodel.decoder.up.4.block.1.norm2
695
- model.vqmodel.decoder.up.4.block.1.dropout
696
- model.vqmodel.decoder.up.4.block.1.conv2
697
- model.vqmodel.decoder.up.4.block.2
698
- model.vqmodel.decoder.up.4.block.2.norm1
699
- model.vqmodel.decoder.up.4.block.2.conv1
700
- model.vqmodel.decoder.up.4.block.2.norm2
701
- model.vqmodel.decoder.up.4.block.2.dropout
702
- model.vqmodel.decoder.up.4.block.2.conv2
703
- model.vqmodel.decoder.up.4.attn
704
- model.vqmodel.decoder.norm_out
705
- model.vqmodel.decoder.conv_out
706
- model.generation_embeddings
707
- model.generation_aligner
708
- model.generation_aligner.fc1
709
- model.generation_aligner.hidden_layers
710
- model.generation_aligner.hidden_layers.0
711
- model.generation_aligner.activation_fn
712
- model.generation_head
713
- model.generation_head.proj_out
714
- model.generation_head.activation_fn
715
- model.generation_head.vision_head
716
- model.language_model
717
- model.language_model.embed_tokens
718
- model.language_model.layers
719
- model.language_model.layers.0
720
- model.language_model.layers.0.self_attn
721
- model.language_model.layers.0.self_attn.q_proj
722
- model.language_model.layers.0.self_attn.k_proj
723
- model.language_model.layers.0.self_attn.v_proj
724
- model.language_model.layers.0.self_attn.o_proj
725
- model.language_model.layers.0.mlp
726
- model.language_model.layers.0.mlp.gate_proj
727
- model.language_model.layers.0.mlp.up_proj
728
- model.language_model.layers.0.mlp.down_proj
729
- model.language_model.layers.0.mlp.act_fn
730
- model.language_model.layers.0.input_layernorm
731
- model.language_model.layers.0.post_attention_layernorm
732
- model.language_model.layers.1
733
- model.language_model.layers.1.self_attn
734
- model.language_model.layers.1.self_attn.q_proj
735
- model.language_model.layers.1.self_attn.k_proj
736
- model.language_model.layers.1.self_attn.v_proj
737
- model.language_model.layers.1.self_attn.o_proj
738
- model.language_model.layers.1.mlp
739
- model.language_model.layers.1.mlp.gate_proj
740
- model.language_model.layers.1.mlp.up_proj
741
- model.language_model.layers.1.mlp.down_proj
742
- model.language_model.layers.1.mlp.act_fn
743
- model.language_model.layers.1.input_layernorm
744
- model.language_model.layers.1.post_attention_layernorm
745
- model.language_model.layers.2
746
- model.language_model.layers.2.self_attn
747
- model.language_model.layers.2.self_attn.q_proj
748
- model.language_model.layers.2.self_attn.k_proj
749
- model.language_model.layers.2.self_attn.v_proj
750
- model.language_model.layers.2.self_attn.o_proj
751
- model.language_model.layers.2.mlp
752
- model.language_model.layers.2.mlp.gate_proj
753
- model.language_model.layers.2.mlp.up_proj
754
- model.language_model.layers.2.mlp.down_proj
755
- model.language_model.layers.2.mlp.act_fn
756
- model.language_model.layers.2.input_layernorm
757
- model.language_model.layers.2.post_attention_layernorm
758
- model.language_model.layers.3
759
- model.language_model.layers.3.self_attn
760
- model.language_model.layers.3.self_attn.q_proj
761
- model.language_model.layers.3.self_attn.k_proj
762
- model.language_model.layers.3.self_attn.v_proj
763
- model.language_model.layers.3.self_attn.o_proj
764
- model.language_model.layers.3.mlp
765
- model.language_model.layers.3.mlp.gate_proj
766
- model.language_model.layers.3.mlp.up_proj
767
- model.language_model.layers.3.mlp.down_proj
768
- model.language_model.layers.3.mlp.act_fn
769
- model.language_model.layers.3.input_layernorm
770
- model.language_model.layers.3.post_attention_layernorm
771
- model.language_model.layers.4
772
- model.language_model.layers.4.self_attn
773
- model.language_model.layers.4.self_attn.q_proj
774
- model.language_model.layers.4.self_attn.k_proj
775
- model.language_model.layers.4.self_attn.v_proj
776
- model.language_model.layers.4.self_attn.o_proj
777
- model.language_model.layers.4.mlp
778
- model.language_model.layers.4.mlp.gate_proj
779
- model.language_model.layers.4.mlp.up_proj
780
- model.language_model.layers.4.mlp.down_proj
781
- model.language_model.layers.4.mlp.act_fn
782
- model.language_model.layers.4.input_layernorm
783
- model.language_model.layers.4.post_attention_layernorm
784
- model.language_model.layers.5
785
- model.language_model.layers.5.self_attn
786
- model.language_model.layers.5.self_attn.q_proj
787
- model.language_model.layers.5.self_attn.k_proj
788
- model.language_model.layers.5.self_attn.v_proj
789
- model.language_model.layers.5.self_attn.o_proj
790
- model.language_model.layers.5.mlp
791
- model.language_model.layers.5.mlp.gate_proj
792
- model.language_model.layers.5.mlp.up_proj
793
- model.language_model.layers.5.mlp.down_proj
794
- model.language_model.layers.5.mlp.act_fn
795
- model.language_model.layers.5.input_layernorm
796
- model.language_model.layers.5.post_attention_layernorm
797
- model.language_model.layers.6
798
- model.language_model.layers.6.self_attn
799
- model.language_model.layers.6.self_attn.q_proj
800
- model.language_model.layers.6.self_attn.k_proj
801
- model.language_model.layers.6.self_attn.v_proj
802
- model.language_model.layers.6.self_attn.o_proj
803
- model.language_model.layers.6.mlp
804
- model.language_model.layers.6.mlp.gate_proj
805
- model.language_model.layers.6.mlp.up_proj
806
- model.language_model.layers.6.mlp.down_proj
807
- model.language_model.layers.6.mlp.act_fn
808
- model.language_model.layers.6.input_layernorm
809
- model.language_model.layers.6.post_attention_layernorm
810
- model.language_model.layers.7
811
- model.language_model.layers.7.self_attn
812
- model.language_model.layers.7.self_attn.q_proj
813
- model.language_model.layers.7.self_attn.k_proj
814
- model.language_model.layers.7.self_attn.v_proj
815
- model.language_model.layers.7.self_attn.o_proj
816
- model.language_model.layers.7.mlp
817
- model.language_model.layers.7.mlp.gate_proj
818
- model.language_model.layers.7.mlp.up_proj
819
- model.language_model.layers.7.mlp.down_proj
820
- model.language_model.layers.7.mlp.act_fn
821
- model.language_model.layers.7.input_layernorm
822
- model.language_model.layers.7.post_attention_layernorm
823
- model.language_model.layers.8
824
- model.language_model.layers.8.self_attn
825
- model.language_model.layers.8.self_attn.q_proj
826
- model.language_model.layers.8.self_attn.k_proj
827
- model.language_model.layers.8.self_attn.v_proj
828
- model.language_model.layers.8.self_attn.o_proj
829
- model.language_model.layers.8.mlp
830
- model.language_model.layers.8.mlp.gate_proj
831
- model.language_model.layers.8.mlp.up_proj
832
- model.language_model.layers.8.mlp.down_proj
833
- model.language_model.layers.8.mlp.act_fn
834
- model.language_model.layers.8.input_layernorm
835
- model.language_model.layers.8.post_attention_layernorm
836
- model.language_model.layers.9
837
- model.language_model.layers.9.self_attn
838
- model.language_model.layers.9.self_attn.q_proj
839
- model.language_model.layers.9.self_attn.k_proj
840
- model.language_model.layers.9.self_attn.v_proj
841
- model.language_model.layers.9.self_attn.o_proj
842
- model.language_model.layers.9.mlp
843
- model.language_model.layers.9.mlp.gate_proj
844
- model.language_model.layers.9.mlp.up_proj
845
- model.language_model.layers.9.mlp.down_proj
846
- model.language_model.layers.9.mlp.act_fn
847
- model.language_model.layers.9.input_layernorm
848
- model.language_model.layers.9.post_attention_layernorm
849
- model.language_model.layers.10
850
- model.language_model.layers.10.self_attn
851
- model.language_model.layers.10.self_attn.q_proj
852
- model.language_model.layers.10.self_attn.k_proj
853
- model.language_model.layers.10.self_attn.v_proj
854
- model.language_model.layers.10.self_attn.o_proj
855
- model.language_model.layers.10.mlp
856
- model.language_model.layers.10.mlp.gate_proj
857
- model.language_model.layers.10.mlp.up_proj
858
- model.language_model.layers.10.mlp.down_proj
859
- model.language_model.layers.10.mlp.act_fn
860
- model.language_model.layers.10.input_layernorm
861
- model.language_model.layers.10.post_attention_layernorm
862
- model.language_model.layers.11
863
- model.language_model.layers.11.self_attn
864
- model.language_model.layers.11.self_attn.q_proj
865
- model.language_model.layers.11.self_attn.k_proj
866
- model.language_model.layers.11.self_attn.v_proj
867
- model.language_model.layers.11.self_attn.o_proj
868
- model.language_model.layers.11.mlp
869
- model.language_model.layers.11.mlp.gate_proj
870
- model.language_model.layers.11.mlp.up_proj
871
- model.language_model.layers.11.mlp.down_proj
872
- model.language_model.layers.11.mlp.act_fn
873
- model.language_model.layers.11.input_layernorm
874
- model.language_model.layers.11.post_attention_layernorm
875
- model.language_model.layers.12
876
- model.language_model.layers.12.self_attn
877
- model.language_model.layers.12.self_attn.q_proj
878
- model.language_model.layers.12.self_attn.k_proj
879
- model.language_model.layers.12.self_attn.v_proj
880
- model.language_model.layers.12.self_attn.o_proj
881
- model.language_model.layers.12.mlp
882
- model.language_model.layers.12.mlp.gate_proj
883
- model.language_model.layers.12.mlp.up_proj
884
- model.language_model.layers.12.mlp.down_proj
885
- model.language_model.layers.12.mlp.act_fn
886
- model.language_model.layers.12.input_layernorm
887
- model.language_model.layers.12.post_attention_layernorm
888
- model.language_model.layers.13
889
- model.language_model.layers.13.self_attn
890
- model.language_model.layers.13.self_attn.q_proj
891
- model.language_model.layers.13.self_attn.k_proj
892
- model.language_model.layers.13.self_attn.v_proj
893
- model.language_model.layers.13.self_attn.o_proj
894
- model.language_model.layers.13.mlp
895
- model.language_model.layers.13.mlp.gate_proj
896
- model.language_model.layers.13.mlp.up_proj
897
- model.language_model.layers.13.mlp.down_proj
898
- model.language_model.layers.13.mlp.act_fn
899
- model.language_model.layers.13.input_layernorm
900
- model.language_model.layers.13.post_attention_layernorm
901
- model.language_model.layers.14
902
- model.language_model.layers.14.self_attn
903
- model.language_model.layers.14.self_attn.q_proj
904
- model.language_model.layers.14.self_attn.k_proj
905
- model.language_model.layers.14.self_attn.v_proj
906
- model.language_model.layers.14.self_attn.o_proj
907
- model.language_model.layers.14.mlp
908
- model.language_model.layers.14.mlp.gate_proj
909
- model.language_model.layers.14.mlp.up_proj
910
- model.language_model.layers.14.mlp.down_proj
911
- model.language_model.layers.14.mlp.act_fn
912
- model.language_model.layers.14.input_layernorm
913
- model.language_model.layers.14.post_attention_layernorm
914
- model.language_model.layers.15
915
- model.language_model.layers.15.self_attn
916
- model.language_model.layers.15.self_attn.q_proj
917
- model.language_model.layers.15.self_attn.k_proj
918
- model.language_model.layers.15.self_attn.v_proj
919
- model.language_model.layers.15.self_attn.o_proj
920
- model.language_model.layers.15.mlp
921
- model.language_model.layers.15.mlp.gate_proj
922
- model.language_model.layers.15.mlp.up_proj
923
- model.language_model.layers.15.mlp.down_proj
924
- model.language_model.layers.15.mlp.act_fn
925
- model.language_model.layers.15.input_layernorm
926
- model.language_model.layers.15.post_attention_layernorm
927
- model.language_model.layers.16
928
- model.language_model.layers.16.self_attn
929
- model.language_model.layers.16.self_attn.q_proj
930
- model.language_model.layers.16.self_attn.k_proj
931
- model.language_model.layers.16.self_attn.v_proj
932
- model.language_model.layers.16.self_attn.o_proj
933
- model.language_model.layers.16.mlp
934
- model.language_model.layers.16.mlp.gate_proj
935
- model.language_model.layers.16.mlp.up_proj
936
- model.language_model.layers.16.mlp.down_proj
937
- model.language_model.layers.16.mlp.act_fn
938
- model.language_model.layers.16.input_layernorm
939
- model.language_model.layers.16.post_attention_layernorm
940
- model.language_model.layers.17
941
- model.language_model.layers.17.self_attn
942
- model.language_model.layers.17.self_attn.q_proj
943
- model.language_model.layers.17.self_attn.k_proj
944
- model.language_model.layers.17.self_attn.v_proj
945
- model.language_model.layers.17.self_attn.o_proj
946
- model.language_model.layers.17.mlp
947
- model.language_model.layers.17.mlp.gate_proj
948
- model.language_model.layers.17.mlp.up_proj
949
- model.language_model.layers.17.mlp.down_proj
950
- model.language_model.layers.17.mlp.act_fn
951
- model.language_model.layers.17.input_layernorm
952
- model.language_model.layers.17.post_attention_layernorm
953
- model.language_model.layers.18
954
- model.language_model.layers.18.self_attn
955
- model.language_model.layers.18.self_attn.q_proj
956
- model.language_model.layers.18.self_attn.k_proj
957
- model.language_model.layers.18.self_attn.v_proj
958
- model.language_model.layers.18.self_attn.o_proj
959
- model.language_model.layers.18.mlp
960
- model.language_model.layers.18.mlp.gate_proj
961
- model.language_model.layers.18.mlp.up_proj
962
- model.language_model.layers.18.mlp.down_proj
963
- model.language_model.layers.18.mlp.act_fn
964
- model.language_model.layers.18.input_layernorm
965
- model.language_model.layers.18.post_attention_layernorm
966
- model.language_model.layers.19
967
- model.language_model.layers.19.self_attn
968
- model.language_model.layers.19.self_attn.q_proj
969
- model.language_model.layers.19.self_attn.k_proj
970
- model.language_model.layers.19.self_attn.v_proj
971
- model.language_model.layers.19.self_attn.o_proj
972
- model.language_model.layers.19.mlp
973
- model.language_model.layers.19.mlp.gate_proj
974
- model.language_model.layers.19.mlp.up_proj
975
- model.language_model.layers.19.mlp.down_proj
976
- model.language_model.layers.19.mlp.act_fn
977
- model.language_model.layers.19.input_layernorm
978
- model.language_model.layers.19.post_attention_layernorm
979
- model.language_model.layers.20
980
- model.language_model.layers.20.self_attn
981
- model.language_model.layers.20.self_attn.q_proj
982
- model.language_model.layers.20.self_attn.k_proj
983
- model.language_model.layers.20.self_attn.v_proj
984
- model.language_model.layers.20.self_attn.o_proj
985
- model.language_model.layers.20.mlp
986
- model.language_model.layers.20.mlp.gate_proj
987
- model.language_model.layers.20.mlp.up_proj
988
- model.language_model.layers.20.mlp.down_proj
989
- model.language_model.layers.20.mlp.act_fn
990
- model.language_model.layers.20.input_layernorm
991
- model.language_model.layers.20.post_attention_layernorm
992
- model.language_model.layers.21
993
- model.language_model.layers.21.self_attn
994
- model.language_model.layers.21.self_attn.q_proj
995
- model.language_model.layers.21.self_attn.k_proj
996
- model.language_model.layers.21.self_attn.v_proj
997
- model.language_model.layers.21.self_attn.o_proj
998
- model.language_model.layers.21.mlp
999
- model.language_model.layers.21.mlp.gate_proj
1000
- model.language_model.layers.21.mlp.up_proj
1001
- model.language_model.layers.21.mlp.down_proj
1002
- model.language_model.layers.21.mlp.act_fn
1003
- model.language_model.layers.21.input_layernorm
1004
- model.language_model.layers.21.post_attention_layernorm
1005
- model.language_model.layers.22
1006
- model.language_model.layers.22.self_attn
1007
- model.language_model.layers.22.self_attn.q_proj
1008
- model.language_model.layers.22.self_attn.k_proj
1009
- model.language_model.layers.22.self_attn.v_proj
1010
- model.language_model.layers.22.self_attn.o_proj
1011
- model.language_model.layers.22.mlp
1012
- model.language_model.layers.22.mlp.gate_proj
1013
- model.language_model.layers.22.mlp.up_proj
1014
- model.language_model.layers.22.mlp.down_proj
1015
- model.language_model.layers.22.mlp.act_fn
1016
- model.language_model.layers.22.input_layernorm
1017
- model.language_model.layers.22.post_attention_layernorm
1018
- model.language_model.layers.23
1019
- model.language_model.layers.23.self_attn
1020
- model.language_model.layers.23.self_attn.q_proj
1021
- model.language_model.layers.23.self_attn.k_proj
1022
- model.language_model.layers.23.self_attn.v_proj
1023
- model.language_model.layers.23.self_attn.o_proj
1024
- model.language_model.layers.23.mlp
1025
- model.language_model.layers.23.mlp.gate_proj
1026
- model.language_model.layers.23.mlp.up_proj
1027
- model.language_model.layers.23.mlp.down_proj
1028
- model.language_model.layers.23.mlp.act_fn
1029
- model.language_model.layers.23.input_layernorm
1030
- model.language_model.layers.23.post_attention_layernorm
1031
- model.language_model.norm
1032
- model.language_model.rotary_emb
1033
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/facebook/Perception-LM-1B.txt DELETED
@@ -1,694 +0,0 @@
1
-
2
- model
3
- model.vision_tower
4
- model.vision_tower.timm_model
5
- model.vision_tower.timm_model.patch_embed
6
- model.vision_tower.timm_model.patch_embed.proj
7
- model.vision_tower.timm_model.patch_embed.norm
8
- model.vision_tower.timm_model.pos_drop
9
- model.vision_tower.timm_model.rope
10
- model.vision_tower.timm_model.norm_pre
11
- model.vision_tower.timm_model.blocks
12
- model.vision_tower.timm_model.blocks.0
13
- model.vision_tower.timm_model.blocks.0.norm1
14
- model.vision_tower.timm_model.blocks.0.attn
15
- model.vision_tower.timm_model.blocks.0.attn.qkv
16
- model.vision_tower.timm_model.blocks.0.attn.q_norm
17
- model.vision_tower.timm_model.blocks.0.attn.k_norm
18
- model.vision_tower.timm_model.blocks.0.attn.attn_drop
19
- model.vision_tower.timm_model.blocks.0.attn.norm
20
- model.vision_tower.timm_model.blocks.0.attn.proj
21
- model.vision_tower.timm_model.blocks.0.attn.proj_drop
22
- model.vision_tower.timm_model.blocks.0.drop_path1
23
- model.vision_tower.timm_model.blocks.0.norm2
24
- model.vision_tower.timm_model.blocks.0.mlp
25
- model.vision_tower.timm_model.blocks.0.mlp.fc1
26
- model.vision_tower.timm_model.blocks.0.mlp.act
27
- model.vision_tower.timm_model.blocks.0.mlp.drop1
28
- model.vision_tower.timm_model.blocks.0.mlp.norm
29
- model.vision_tower.timm_model.blocks.0.mlp.fc2
30
- model.vision_tower.timm_model.blocks.0.mlp.drop2
31
- model.vision_tower.timm_model.blocks.0.drop_path2
32
- model.vision_tower.timm_model.blocks.1
33
- model.vision_tower.timm_model.blocks.1.norm1
34
- model.vision_tower.timm_model.blocks.1.attn
35
- model.vision_tower.timm_model.blocks.1.attn.qkv
36
- model.vision_tower.timm_model.blocks.1.attn.q_norm
37
- model.vision_tower.timm_model.blocks.1.attn.k_norm
38
- model.vision_tower.timm_model.blocks.1.attn.attn_drop
39
- model.vision_tower.timm_model.blocks.1.attn.norm
40
- model.vision_tower.timm_model.blocks.1.attn.proj
41
- model.vision_tower.timm_model.blocks.1.attn.proj_drop
42
- model.vision_tower.timm_model.blocks.1.drop_path1
43
- model.vision_tower.timm_model.blocks.1.norm2
44
- model.vision_tower.timm_model.blocks.1.mlp
45
- model.vision_tower.timm_model.blocks.1.mlp.fc1
46
- model.vision_tower.timm_model.blocks.1.mlp.act
47
- model.vision_tower.timm_model.blocks.1.mlp.drop1
48
- model.vision_tower.timm_model.blocks.1.mlp.norm
49
- model.vision_tower.timm_model.blocks.1.mlp.fc2
50
- model.vision_tower.timm_model.blocks.1.mlp.drop2
51
- model.vision_tower.timm_model.blocks.1.drop_path2
52
- model.vision_tower.timm_model.blocks.2
53
- model.vision_tower.timm_model.blocks.2.norm1
54
- model.vision_tower.timm_model.blocks.2.attn
55
- model.vision_tower.timm_model.blocks.2.attn.qkv
56
- model.vision_tower.timm_model.blocks.2.attn.q_norm
57
- model.vision_tower.timm_model.blocks.2.attn.k_norm
58
- model.vision_tower.timm_model.blocks.2.attn.attn_drop
59
- model.vision_tower.timm_model.blocks.2.attn.norm
60
- model.vision_tower.timm_model.blocks.2.attn.proj
61
- model.vision_tower.timm_model.blocks.2.attn.proj_drop
62
- model.vision_tower.timm_model.blocks.2.drop_path1
63
- model.vision_tower.timm_model.blocks.2.norm2
64
- model.vision_tower.timm_model.blocks.2.mlp
65
- model.vision_tower.timm_model.blocks.2.mlp.fc1
66
- model.vision_tower.timm_model.blocks.2.mlp.act
67
- model.vision_tower.timm_model.blocks.2.mlp.drop1
68
- model.vision_tower.timm_model.blocks.2.mlp.norm
69
- model.vision_tower.timm_model.blocks.2.mlp.fc2
70
- model.vision_tower.timm_model.blocks.2.mlp.drop2
71
- model.vision_tower.timm_model.blocks.2.drop_path2
72
- model.vision_tower.timm_model.blocks.3
73
- model.vision_tower.timm_model.blocks.3.norm1
74
- model.vision_tower.timm_model.blocks.3.attn
75
- model.vision_tower.timm_model.blocks.3.attn.qkv
76
- model.vision_tower.timm_model.blocks.3.attn.q_norm
77
- model.vision_tower.timm_model.blocks.3.attn.k_norm
78
- model.vision_tower.timm_model.blocks.3.attn.attn_drop
79
- model.vision_tower.timm_model.blocks.3.attn.norm
80
- model.vision_tower.timm_model.blocks.3.attn.proj
81
- model.vision_tower.timm_model.blocks.3.attn.proj_drop
82
- model.vision_tower.timm_model.blocks.3.drop_path1
83
- model.vision_tower.timm_model.blocks.3.norm2
84
- model.vision_tower.timm_model.blocks.3.mlp
85
- model.vision_tower.timm_model.blocks.3.mlp.fc1
86
- model.vision_tower.timm_model.blocks.3.mlp.act
87
- model.vision_tower.timm_model.blocks.3.mlp.drop1
88
- model.vision_tower.timm_model.blocks.3.mlp.norm
89
- model.vision_tower.timm_model.blocks.3.mlp.fc2
90
- model.vision_tower.timm_model.blocks.3.mlp.drop2
91
- model.vision_tower.timm_model.blocks.3.drop_path2
92
- model.vision_tower.timm_model.blocks.4
93
- model.vision_tower.timm_model.blocks.4.norm1
94
- model.vision_tower.timm_model.blocks.4.attn
95
- model.vision_tower.timm_model.blocks.4.attn.qkv
96
- model.vision_tower.timm_model.blocks.4.attn.q_norm
97
- model.vision_tower.timm_model.blocks.4.attn.k_norm
98
- model.vision_tower.timm_model.blocks.4.attn.attn_drop
99
- model.vision_tower.timm_model.blocks.4.attn.norm
100
- model.vision_tower.timm_model.blocks.4.attn.proj
101
- model.vision_tower.timm_model.blocks.4.attn.proj_drop
102
- model.vision_tower.timm_model.blocks.4.drop_path1
103
- model.vision_tower.timm_model.blocks.4.norm2
104
- model.vision_tower.timm_model.blocks.4.mlp
105
- model.vision_tower.timm_model.blocks.4.mlp.fc1
106
- model.vision_tower.timm_model.blocks.4.mlp.act
107
- model.vision_tower.timm_model.blocks.4.mlp.drop1
108
- model.vision_tower.timm_model.blocks.4.mlp.norm
109
- model.vision_tower.timm_model.blocks.4.mlp.fc2
110
- model.vision_tower.timm_model.blocks.4.mlp.drop2
111
- model.vision_tower.timm_model.blocks.4.drop_path2
112
- model.vision_tower.timm_model.blocks.5
113
- model.vision_tower.timm_model.blocks.5.norm1
114
- model.vision_tower.timm_model.blocks.5.attn
115
- model.vision_tower.timm_model.blocks.5.attn.qkv
116
- model.vision_tower.timm_model.blocks.5.attn.q_norm
117
- model.vision_tower.timm_model.blocks.5.attn.k_norm
118
- model.vision_tower.timm_model.blocks.5.attn.attn_drop
119
- model.vision_tower.timm_model.blocks.5.attn.norm
120
- model.vision_tower.timm_model.blocks.5.attn.proj
121
- model.vision_tower.timm_model.blocks.5.attn.proj_drop
122
- model.vision_tower.timm_model.blocks.5.drop_path1
123
- model.vision_tower.timm_model.blocks.5.norm2
124
- model.vision_tower.timm_model.blocks.5.mlp
125
- model.vision_tower.timm_model.blocks.5.mlp.fc1
126
- model.vision_tower.timm_model.blocks.5.mlp.act
127
- model.vision_tower.timm_model.blocks.5.mlp.drop1
128
- model.vision_tower.timm_model.blocks.5.mlp.norm
129
- model.vision_tower.timm_model.blocks.5.mlp.fc2
130
- model.vision_tower.timm_model.blocks.5.mlp.drop2
131
- model.vision_tower.timm_model.blocks.5.drop_path2
132
- model.vision_tower.timm_model.blocks.6
133
- model.vision_tower.timm_model.blocks.6.norm1
134
- model.vision_tower.timm_model.blocks.6.attn
135
- model.vision_tower.timm_model.blocks.6.attn.qkv
136
- model.vision_tower.timm_model.blocks.6.attn.q_norm
137
- model.vision_tower.timm_model.blocks.6.attn.k_norm
138
- model.vision_tower.timm_model.blocks.6.attn.attn_drop
139
- model.vision_tower.timm_model.blocks.6.attn.norm
140
- model.vision_tower.timm_model.blocks.6.attn.proj
141
- model.vision_tower.timm_model.blocks.6.attn.proj_drop
142
- model.vision_tower.timm_model.blocks.6.drop_path1
143
- model.vision_tower.timm_model.blocks.6.norm2
144
- model.vision_tower.timm_model.blocks.6.mlp
145
- model.vision_tower.timm_model.blocks.6.mlp.fc1
146
- model.vision_tower.timm_model.blocks.6.mlp.act
147
- model.vision_tower.timm_model.blocks.6.mlp.drop1
148
- model.vision_tower.timm_model.blocks.6.mlp.norm
149
- model.vision_tower.timm_model.blocks.6.mlp.fc2
150
- model.vision_tower.timm_model.blocks.6.mlp.drop2
151
- model.vision_tower.timm_model.blocks.6.drop_path2
152
- model.vision_tower.timm_model.blocks.7
153
- model.vision_tower.timm_model.blocks.7.norm1
154
- model.vision_tower.timm_model.blocks.7.attn
155
- model.vision_tower.timm_model.blocks.7.attn.qkv
156
- model.vision_tower.timm_model.blocks.7.attn.q_norm
157
- model.vision_tower.timm_model.blocks.7.attn.k_norm
158
- model.vision_tower.timm_model.blocks.7.attn.attn_drop
159
- model.vision_tower.timm_model.blocks.7.attn.norm
160
- model.vision_tower.timm_model.blocks.7.attn.proj
161
- model.vision_tower.timm_model.blocks.7.attn.proj_drop
162
- model.vision_tower.timm_model.blocks.7.drop_path1
163
- model.vision_tower.timm_model.blocks.7.norm2
164
- model.vision_tower.timm_model.blocks.7.mlp
165
- model.vision_tower.timm_model.blocks.7.mlp.fc1
166
- model.vision_tower.timm_model.blocks.7.mlp.act
167
- model.vision_tower.timm_model.blocks.7.mlp.drop1
168
- model.vision_tower.timm_model.blocks.7.mlp.norm
169
- model.vision_tower.timm_model.blocks.7.mlp.fc2
170
- model.vision_tower.timm_model.blocks.7.mlp.drop2
171
- model.vision_tower.timm_model.blocks.7.drop_path2
172
- model.vision_tower.timm_model.blocks.8
173
- model.vision_tower.timm_model.blocks.8.norm1
174
- model.vision_tower.timm_model.blocks.8.attn
175
- model.vision_tower.timm_model.blocks.8.attn.qkv
176
- model.vision_tower.timm_model.blocks.8.attn.q_norm
177
- model.vision_tower.timm_model.blocks.8.attn.k_norm
178
- model.vision_tower.timm_model.blocks.8.attn.attn_drop
179
- model.vision_tower.timm_model.blocks.8.attn.norm
180
- model.vision_tower.timm_model.blocks.8.attn.proj
181
- model.vision_tower.timm_model.blocks.8.attn.proj_drop
182
- model.vision_tower.timm_model.blocks.8.drop_path1
183
- model.vision_tower.timm_model.blocks.8.norm2
184
- model.vision_tower.timm_model.blocks.8.mlp
185
- model.vision_tower.timm_model.blocks.8.mlp.fc1
186
- model.vision_tower.timm_model.blocks.8.mlp.act
187
- model.vision_tower.timm_model.blocks.8.mlp.drop1
188
- model.vision_tower.timm_model.blocks.8.mlp.norm
189
- model.vision_tower.timm_model.blocks.8.mlp.fc2
190
- model.vision_tower.timm_model.blocks.8.mlp.drop2
191
- model.vision_tower.timm_model.blocks.8.drop_path2
192
- model.vision_tower.timm_model.blocks.9
193
- model.vision_tower.timm_model.blocks.9.norm1
194
- model.vision_tower.timm_model.blocks.9.attn
195
- model.vision_tower.timm_model.blocks.9.attn.qkv
196
- model.vision_tower.timm_model.blocks.9.attn.q_norm
197
- model.vision_tower.timm_model.blocks.9.attn.k_norm
198
- model.vision_tower.timm_model.blocks.9.attn.attn_drop
199
- model.vision_tower.timm_model.blocks.9.attn.norm
200
- model.vision_tower.timm_model.blocks.9.attn.proj
201
- model.vision_tower.timm_model.blocks.9.attn.proj_drop
202
- model.vision_tower.timm_model.blocks.9.drop_path1
203
- model.vision_tower.timm_model.blocks.9.norm2
204
- model.vision_tower.timm_model.blocks.9.mlp
205
- model.vision_tower.timm_model.blocks.9.mlp.fc1
206
- model.vision_tower.timm_model.blocks.9.mlp.act
207
- model.vision_tower.timm_model.blocks.9.mlp.drop1
208
- model.vision_tower.timm_model.blocks.9.mlp.norm
209
- model.vision_tower.timm_model.blocks.9.mlp.fc2
210
- model.vision_tower.timm_model.blocks.9.mlp.drop2
211
- model.vision_tower.timm_model.blocks.9.drop_path2
212
- model.vision_tower.timm_model.blocks.10
213
- model.vision_tower.timm_model.blocks.10.norm1
214
- model.vision_tower.timm_model.blocks.10.attn
215
- model.vision_tower.timm_model.blocks.10.attn.qkv
216
- model.vision_tower.timm_model.blocks.10.attn.q_norm
217
- model.vision_tower.timm_model.blocks.10.attn.k_norm
218
- model.vision_tower.timm_model.blocks.10.attn.attn_drop
219
- model.vision_tower.timm_model.blocks.10.attn.norm
220
- model.vision_tower.timm_model.blocks.10.attn.proj
221
- model.vision_tower.timm_model.blocks.10.attn.proj_drop
222
- model.vision_tower.timm_model.blocks.10.drop_path1
223
- model.vision_tower.timm_model.blocks.10.norm2
224
- model.vision_tower.timm_model.blocks.10.mlp
225
- model.vision_tower.timm_model.blocks.10.mlp.fc1
226
- model.vision_tower.timm_model.blocks.10.mlp.act
227
- model.vision_tower.timm_model.blocks.10.mlp.drop1
228
- model.vision_tower.timm_model.blocks.10.mlp.norm
229
- model.vision_tower.timm_model.blocks.10.mlp.fc2
230
- model.vision_tower.timm_model.blocks.10.mlp.drop2
231
- model.vision_tower.timm_model.blocks.10.drop_path2
232
- model.vision_tower.timm_model.blocks.11
233
- model.vision_tower.timm_model.blocks.11.norm1
234
- model.vision_tower.timm_model.blocks.11.attn
235
- model.vision_tower.timm_model.blocks.11.attn.qkv
236
- model.vision_tower.timm_model.blocks.11.attn.q_norm
237
- model.vision_tower.timm_model.blocks.11.attn.k_norm
238
- model.vision_tower.timm_model.blocks.11.attn.attn_drop
239
- model.vision_tower.timm_model.blocks.11.attn.norm
240
- model.vision_tower.timm_model.blocks.11.attn.proj
241
- model.vision_tower.timm_model.blocks.11.attn.proj_drop
242
- model.vision_tower.timm_model.blocks.11.drop_path1
243
- model.vision_tower.timm_model.blocks.11.norm2
244
- model.vision_tower.timm_model.blocks.11.mlp
245
- model.vision_tower.timm_model.blocks.11.mlp.fc1
246
- model.vision_tower.timm_model.blocks.11.mlp.act
247
- model.vision_tower.timm_model.blocks.11.mlp.drop1
248
- model.vision_tower.timm_model.blocks.11.mlp.norm
249
- model.vision_tower.timm_model.blocks.11.mlp.fc2
250
- model.vision_tower.timm_model.blocks.11.mlp.drop2
251
- model.vision_tower.timm_model.blocks.11.drop_path2
252
- model.vision_tower.timm_model.blocks.12
253
- model.vision_tower.timm_model.blocks.12.norm1
254
- model.vision_tower.timm_model.blocks.12.attn
255
- model.vision_tower.timm_model.blocks.12.attn.qkv
256
- model.vision_tower.timm_model.blocks.12.attn.q_norm
257
- model.vision_tower.timm_model.blocks.12.attn.k_norm
258
- model.vision_tower.timm_model.blocks.12.attn.attn_drop
259
- model.vision_tower.timm_model.blocks.12.attn.norm
260
- model.vision_tower.timm_model.blocks.12.attn.proj
261
- model.vision_tower.timm_model.blocks.12.attn.proj_drop
262
- model.vision_tower.timm_model.blocks.12.drop_path1
263
- model.vision_tower.timm_model.blocks.12.norm2
264
- model.vision_tower.timm_model.blocks.12.mlp
265
- model.vision_tower.timm_model.blocks.12.mlp.fc1
266
- model.vision_tower.timm_model.blocks.12.mlp.act
267
- model.vision_tower.timm_model.blocks.12.mlp.drop1
268
- model.vision_tower.timm_model.blocks.12.mlp.norm
269
- model.vision_tower.timm_model.blocks.12.mlp.fc2
270
- model.vision_tower.timm_model.blocks.12.mlp.drop2
271
- model.vision_tower.timm_model.blocks.12.drop_path2
272
- model.vision_tower.timm_model.blocks.13
273
- model.vision_tower.timm_model.blocks.13.norm1
274
- model.vision_tower.timm_model.blocks.13.attn
275
- model.vision_tower.timm_model.blocks.13.attn.qkv
276
- model.vision_tower.timm_model.blocks.13.attn.q_norm
277
- model.vision_tower.timm_model.blocks.13.attn.k_norm
278
- model.vision_tower.timm_model.blocks.13.attn.attn_drop
279
- model.vision_tower.timm_model.blocks.13.attn.norm
280
- model.vision_tower.timm_model.blocks.13.attn.proj
281
- model.vision_tower.timm_model.blocks.13.attn.proj_drop
282
- model.vision_tower.timm_model.blocks.13.drop_path1
283
- model.vision_tower.timm_model.blocks.13.norm2
284
- model.vision_tower.timm_model.blocks.13.mlp
285
- model.vision_tower.timm_model.blocks.13.mlp.fc1
286
- model.vision_tower.timm_model.blocks.13.mlp.act
287
- model.vision_tower.timm_model.blocks.13.mlp.drop1
288
- model.vision_tower.timm_model.blocks.13.mlp.norm
289
- model.vision_tower.timm_model.blocks.13.mlp.fc2
290
- model.vision_tower.timm_model.blocks.13.mlp.drop2
291
- model.vision_tower.timm_model.blocks.13.drop_path2
292
- model.vision_tower.timm_model.blocks.14
293
- model.vision_tower.timm_model.blocks.14.norm1
294
- model.vision_tower.timm_model.blocks.14.attn
295
- model.vision_tower.timm_model.blocks.14.attn.qkv
296
- model.vision_tower.timm_model.blocks.14.attn.q_norm
297
- model.vision_tower.timm_model.blocks.14.attn.k_norm
298
- model.vision_tower.timm_model.blocks.14.attn.attn_drop
299
- model.vision_tower.timm_model.blocks.14.attn.norm
300
- model.vision_tower.timm_model.blocks.14.attn.proj
301
- model.vision_tower.timm_model.blocks.14.attn.proj_drop
302
- model.vision_tower.timm_model.blocks.14.drop_path1
303
- model.vision_tower.timm_model.blocks.14.norm2
304
- model.vision_tower.timm_model.blocks.14.mlp
305
- model.vision_tower.timm_model.blocks.14.mlp.fc1
306
- model.vision_tower.timm_model.blocks.14.mlp.act
307
- model.vision_tower.timm_model.blocks.14.mlp.drop1
308
- model.vision_tower.timm_model.blocks.14.mlp.norm
309
- model.vision_tower.timm_model.blocks.14.mlp.fc2
310
- model.vision_tower.timm_model.blocks.14.mlp.drop2
311
- model.vision_tower.timm_model.blocks.14.drop_path2
312
- model.vision_tower.timm_model.blocks.15
313
- model.vision_tower.timm_model.blocks.15.norm1
314
- model.vision_tower.timm_model.blocks.15.attn
315
- model.vision_tower.timm_model.blocks.15.attn.qkv
316
- model.vision_tower.timm_model.blocks.15.attn.q_norm
317
- model.vision_tower.timm_model.blocks.15.attn.k_norm
318
- model.vision_tower.timm_model.blocks.15.attn.attn_drop
319
- model.vision_tower.timm_model.blocks.15.attn.norm
320
- model.vision_tower.timm_model.blocks.15.attn.proj
321
- model.vision_tower.timm_model.blocks.15.attn.proj_drop
322
- model.vision_tower.timm_model.blocks.15.drop_path1
323
- model.vision_tower.timm_model.blocks.15.norm2
324
- model.vision_tower.timm_model.blocks.15.mlp
325
- model.vision_tower.timm_model.blocks.15.mlp.fc1
326
- model.vision_tower.timm_model.blocks.15.mlp.act
327
- model.vision_tower.timm_model.blocks.15.mlp.drop1
328
- model.vision_tower.timm_model.blocks.15.mlp.norm
329
- model.vision_tower.timm_model.blocks.15.mlp.fc2
330
- model.vision_tower.timm_model.blocks.15.mlp.drop2
331
- model.vision_tower.timm_model.blocks.15.drop_path2
332
- model.vision_tower.timm_model.blocks.16
333
- model.vision_tower.timm_model.blocks.16.norm1
334
- model.vision_tower.timm_model.blocks.16.attn
335
- model.vision_tower.timm_model.blocks.16.attn.qkv
336
- model.vision_tower.timm_model.blocks.16.attn.q_norm
337
- model.vision_tower.timm_model.blocks.16.attn.k_norm
338
- model.vision_tower.timm_model.blocks.16.attn.attn_drop
339
- model.vision_tower.timm_model.blocks.16.attn.norm
340
- model.vision_tower.timm_model.blocks.16.attn.proj
341
- model.vision_tower.timm_model.blocks.16.attn.proj_drop
342
- model.vision_tower.timm_model.blocks.16.drop_path1
343
- model.vision_tower.timm_model.blocks.16.norm2
344
- model.vision_tower.timm_model.blocks.16.mlp
345
- model.vision_tower.timm_model.blocks.16.mlp.fc1
346
- model.vision_tower.timm_model.blocks.16.mlp.act
347
- model.vision_tower.timm_model.blocks.16.mlp.drop1
348
- model.vision_tower.timm_model.blocks.16.mlp.norm
349
- model.vision_tower.timm_model.blocks.16.mlp.fc2
350
- model.vision_tower.timm_model.blocks.16.mlp.drop2
351
- model.vision_tower.timm_model.blocks.16.drop_path2
352
- model.vision_tower.timm_model.blocks.17
353
- model.vision_tower.timm_model.blocks.17.norm1
354
- model.vision_tower.timm_model.blocks.17.attn
355
- model.vision_tower.timm_model.blocks.17.attn.qkv
356
- model.vision_tower.timm_model.blocks.17.attn.q_norm
357
- model.vision_tower.timm_model.blocks.17.attn.k_norm
358
- model.vision_tower.timm_model.blocks.17.attn.attn_drop
359
- model.vision_tower.timm_model.blocks.17.attn.norm
360
- model.vision_tower.timm_model.blocks.17.attn.proj
361
- model.vision_tower.timm_model.blocks.17.attn.proj_drop
362
- model.vision_tower.timm_model.blocks.17.drop_path1
363
- model.vision_tower.timm_model.blocks.17.norm2
364
- model.vision_tower.timm_model.blocks.17.mlp
365
- model.vision_tower.timm_model.blocks.17.mlp.fc1
366
- model.vision_tower.timm_model.blocks.17.mlp.act
367
- model.vision_tower.timm_model.blocks.17.mlp.drop1
368
- model.vision_tower.timm_model.blocks.17.mlp.norm
369
- model.vision_tower.timm_model.blocks.17.mlp.fc2
370
- model.vision_tower.timm_model.blocks.17.mlp.drop2
371
- model.vision_tower.timm_model.blocks.17.drop_path2
372
- model.vision_tower.timm_model.blocks.18
373
- model.vision_tower.timm_model.blocks.18.norm1
374
- model.vision_tower.timm_model.blocks.18.attn
375
- model.vision_tower.timm_model.blocks.18.attn.qkv
376
- model.vision_tower.timm_model.blocks.18.attn.q_norm
377
- model.vision_tower.timm_model.blocks.18.attn.k_norm
378
- model.vision_tower.timm_model.blocks.18.attn.attn_drop
379
- model.vision_tower.timm_model.blocks.18.attn.norm
380
- model.vision_tower.timm_model.blocks.18.attn.proj
381
- model.vision_tower.timm_model.blocks.18.attn.proj_drop
382
- model.vision_tower.timm_model.blocks.18.drop_path1
383
- model.vision_tower.timm_model.blocks.18.norm2
384
- model.vision_tower.timm_model.blocks.18.mlp
385
- model.vision_tower.timm_model.blocks.18.mlp.fc1
386
- model.vision_tower.timm_model.blocks.18.mlp.act
387
- model.vision_tower.timm_model.blocks.18.mlp.drop1
388
- model.vision_tower.timm_model.blocks.18.mlp.norm
389
- model.vision_tower.timm_model.blocks.18.mlp.fc2
390
- model.vision_tower.timm_model.blocks.18.mlp.drop2
391
- model.vision_tower.timm_model.blocks.18.drop_path2
392
- model.vision_tower.timm_model.blocks.19
393
- model.vision_tower.timm_model.blocks.19.norm1
394
- model.vision_tower.timm_model.blocks.19.attn
395
- model.vision_tower.timm_model.blocks.19.attn.qkv
396
- model.vision_tower.timm_model.blocks.19.attn.q_norm
397
- model.vision_tower.timm_model.blocks.19.attn.k_norm
398
- model.vision_tower.timm_model.blocks.19.attn.attn_drop
399
- model.vision_tower.timm_model.blocks.19.attn.norm
400
- model.vision_tower.timm_model.blocks.19.attn.proj
401
- model.vision_tower.timm_model.blocks.19.attn.proj_drop
402
- model.vision_tower.timm_model.blocks.19.drop_path1
403
- model.vision_tower.timm_model.blocks.19.norm2
404
- model.vision_tower.timm_model.blocks.19.mlp
405
- model.vision_tower.timm_model.blocks.19.mlp.fc1
406
- model.vision_tower.timm_model.blocks.19.mlp.act
407
- model.vision_tower.timm_model.blocks.19.mlp.drop1
408
- model.vision_tower.timm_model.blocks.19.mlp.norm
409
- model.vision_tower.timm_model.blocks.19.mlp.fc2
410
- model.vision_tower.timm_model.blocks.19.mlp.drop2
411
- model.vision_tower.timm_model.blocks.19.drop_path2
412
- model.vision_tower.timm_model.blocks.20
413
- model.vision_tower.timm_model.blocks.20.norm1
414
- model.vision_tower.timm_model.blocks.20.attn
415
- model.vision_tower.timm_model.blocks.20.attn.qkv
416
- model.vision_tower.timm_model.blocks.20.attn.q_norm
417
- model.vision_tower.timm_model.blocks.20.attn.k_norm
418
- model.vision_tower.timm_model.blocks.20.attn.attn_drop
419
- model.vision_tower.timm_model.blocks.20.attn.norm
420
- model.vision_tower.timm_model.blocks.20.attn.proj
421
- model.vision_tower.timm_model.blocks.20.attn.proj_drop
422
- model.vision_tower.timm_model.blocks.20.drop_path1
423
- model.vision_tower.timm_model.blocks.20.norm2
424
- model.vision_tower.timm_model.blocks.20.mlp
425
- model.vision_tower.timm_model.blocks.20.mlp.fc1
426
- model.vision_tower.timm_model.blocks.20.mlp.act
427
- model.vision_tower.timm_model.blocks.20.mlp.drop1
428
- model.vision_tower.timm_model.blocks.20.mlp.norm
429
- model.vision_tower.timm_model.blocks.20.mlp.fc2
430
- model.vision_tower.timm_model.blocks.20.mlp.drop2
431
- model.vision_tower.timm_model.blocks.20.drop_path2
432
- model.vision_tower.timm_model.blocks.21
433
- model.vision_tower.timm_model.blocks.21.norm1
434
- model.vision_tower.timm_model.blocks.21.attn
435
- model.vision_tower.timm_model.blocks.21.attn.qkv
436
- model.vision_tower.timm_model.blocks.21.attn.q_norm
437
- model.vision_tower.timm_model.blocks.21.attn.k_norm
438
- model.vision_tower.timm_model.blocks.21.attn.attn_drop
439
- model.vision_tower.timm_model.blocks.21.attn.norm
440
- model.vision_tower.timm_model.blocks.21.attn.proj
441
- model.vision_tower.timm_model.blocks.21.attn.proj_drop
442
- model.vision_tower.timm_model.blocks.21.drop_path1
443
- model.vision_tower.timm_model.blocks.21.norm2
444
- model.vision_tower.timm_model.blocks.21.mlp
445
- model.vision_tower.timm_model.blocks.21.mlp.fc1
446
- model.vision_tower.timm_model.blocks.21.mlp.act
447
- model.vision_tower.timm_model.blocks.21.mlp.drop1
448
- model.vision_tower.timm_model.blocks.21.mlp.norm
449
- model.vision_tower.timm_model.blocks.21.mlp.fc2
450
- model.vision_tower.timm_model.blocks.21.mlp.drop2
451
- model.vision_tower.timm_model.blocks.21.drop_path2
452
- model.vision_tower.timm_model.blocks.22
453
- model.vision_tower.timm_model.blocks.22.norm1
454
- model.vision_tower.timm_model.blocks.22.attn
455
- model.vision_tower.timm_model.blocks.22.attn.qkv
456
- model.vision_tower.timm_model.blocks.22.attn.q_norm
457
- model.vision_tower.timm_model.blocks.22.attn.k_norm
458
- model.vision_tower.timm_model.blocks.22.attn.attn_drop
459
- model.vision_tower.timm_model.blocks.22.attn.norm
460
- model.vision_tower.timm_model.blocks.22.attn.proj
461
- model.vision_tower.timm_model.blocks.22.attn.proj_drop
462
- model.vision_tower.timm_model.blocks.22.drop_path1
463
- model.vision_tower.timm_model.blocks.22.norm2
464
- model.vision_tower.timm_model.blocks.22.mlp
465
- model.vision_tower.timm_model.blocks.22.mlp.fc1
466
- model.vision_tower.timm_model.blocks.22.mlp.act
467
- model.vision_tower.timm_model.blocks.22.mlp.drop1
468
- model.vision_tower.timm_model.blocks.22.mlp.norm
469
- model.vision_tower.timm_model.blocks.22.mlp.fc2
470
- model.vision_tower.timm_model.blocks.22.mlp.drop2
471
- model.vision_tower.timm_model.blocks.22.drop_path2
472
- model.vision_tower.timm_model.norm
473
- model.vision_tower.timm_model.fc_norm
474
- model.vision_tower.timm_model.head_drop
475
- model.vision_tower.timm_model.head
476
- model.multi_modal_projector
477
- model.multi_modal_projector.linear_1
478
- model.multi_modal_projector.gelu
479
- model.multi_modal_projector.linear_2
480
- model.multi_modal_projector.pooling
481
- model.language_model
482
- model.language_model.embed_tokens
483
- model.language_model.layers
484
- model.language_model.layers.0
485
- model.language_model.layers.0.self_attn
486
- model.language_model.layers.0.self_attn.q_proj
487
- model.language_model.layers.0.self_attn.k_proj
488
- model.language_model.layers.0.self_attn.v_proj
489
- model.language_model.layers.0.self_attn.o_proj
490
- model.language_model.layers.0.mlp
491
- model.language_model.layers.0.mlp.gate_proj
492
- model.language_model.layers.0.mlp.up_proj
493
- model.language_model.layers.0.mlp.down_proj
494
- model.language_model.layers.0.mlp.act_fn
495
- model.language_model.layers.0.input_layernorm
496
- model.language_model.layers.0.post_attention_layernorm
497
- model.language_model.layers.1
498
- model.language_model.layers.1.self_attn
499
- model.language_model.layers.1.self_attn.q_proj
500
- model.language_model.layers.1.self_attn.k_proj
501
- model.language_model.layers.1.self_attn.v_proj
502
- model.language_model.layers.1.self_attn.o_proj
503
- model.language_model.layers.1.mlp
504
- model.language_model.layers.1.mlp.gate_proj
505
- model.language_model.layers.1.mlp.up_proj
506
- model.language_model.layers.1.mlp.down_proj
507
- model.language_model.layers.1.mlp.act_fn
508
- model.language_model.layers.1.input_layernorm
509
- model.language_model.layers.1.post_attention_layernorm
510
- model.language_model.layers.2
511
- model.language_model.layers.2.self_attn
512
- model.language_model.layers.2.self_attn.q_proj
513
- model.language_model.layers.2.self_attn.k_proj
514
- model.language_model.layers.2.self_attn.v_proj
515
- model.language_model.layers.2.self_attn.o_proj
516
- model.language_model.layers.2.mlp
517
- model.language_model.layers.2.mlp.gate_proj
518
- model.language_model.layers.2.mlp.up_proj
519
- model.language_model.layers.2.mlp.down_proj
520
- model.language_model.layers.2.mlp.act_fn
521
- model.language_model.layers.2.input_layernorm
522
- model.language_model.layers.2.post_attention_layernorm
523
- model.language_model.layers.3
524
- model.language_model.layers.3.self_attn
525
- model.language_model.layers.3.self_attn.q_proj
526
- model.language_model.layers.3.self_attn.k_proj
527
- model.language_model.layers.3.self_attn.v_proj
528
- model.language_model.layers.3.self_attn.o_proj
529
- model.language_model.layers.3.mlp
530
- model.language_model.layers.3.mlp.gate_proj
531
- model.language_model.layers.3.mlp.up_proj
532
- model.language_model.layers.3.mlp.down_proj
533
- model.language_model.layers.3.mlp.act_fn
534
- model.language_model.layers.3.input_layernorm
535
- model.language_model.layers.3.post_attention_layernorm
536
- model.language_model.layers.4
537
- model.language_model.layers.4.self_attn
538
- model.language_model.layers.4.self_attn.q_proj
539
- model.language_model.layers.4.self_attn.k_proj
540
- model.language_model.layers.4.self_attn.v_proj
541
- model.language_model.layers.4.self_attn.o_proj
542
- model.language_model.layers.4.mlp
543
- model.language_model.layers.4.mlp.gate_proj
544
- model.language_model.layers.4.mlp.up_proj
545
- model.language_model.layers.4.mlp.down_proj
546
- model.language_model.layers.4.mlp.act_fn
547
- model.language_model.layers.4.input_layernorm
548
- model.language_model.layers.4.post_attention_layernorm
549
- model.language_model.layers.5
550
- model.language_model.layers.5.self_attn
551
- model.language_model.layers.5.self_attn.q_proj
552
- model.language_model.layers.5.self_attn.k_proj
553
- model.language_model.layers.5.self_attn.v_proj
554
- model.language_model.layers.5.self_attn.o_proj
555
- model.language_model.layers.5.mlp
556
- model.language_model.layers.5.mlp.gate_proj
557
- model.language_model.layers.5.mlp.up_proj
558
- model.language_model.layers.5.mlp.down_proj
559
- model.language_model.layers.5.mlp.act_fn
560
- model.language_model.layers.5.input_layernorm
561
- model.language_model.layers.5.post_attention_layernorm
562
- model.language_model.layers.6
563
- model.language_model.layers.6.self_attn
564
- model.language_model.layers.6.self_attn.q_proj
565
- model.language_model.layers.6.self_attn.k_proj
566
- model.language_model.layers.6.self_attn.v_proj
567
- model.language_model.layers.6.self_attn.o_proj
568
- model.language_model.layers.6.mlp
569
- model.language_model.layers.6.mlp.gate_proj
570
- model.language_model.layers.6.mlp.up_proj
571
- model.language_model.layers.6.mlp.down_proj
572
- model.language_model.layers.6.mlp.act_fn
573
- model.language_model.layers.6.input_layernorm
574
- model.language_model.layers.6.post_attention_layernorm
575
- model.language_model.layers.7
576
- model.language_model.layers.7.self_attn
577
- model.language_model.layers.7.self_attn.q_proj
578
- model.language_model.layers.7.self_attn.k_proj
579
- model.language_model.layers.7.self_attn.v_proj
580
- model.language_model.layers.7.self_attn.o_proj
581
- model.language_model.layers.7.mlp
582
- model.language_model.layers.7.mlp.gate_proj
583
- model.language_model.layers.7.mlp.up_proj
584
- model.language_model.layers.7.mlp.down_proj
585
- model.language_model.layers.7.mlp.act_fn
586
- model.language_model.layers.7.input_layernorm
587
- model.language_model.layers.7.post_attention_layernorm
588
- model.language_model.layers.8
589
- model.language_model.layers.8.self_attn
590
- model.language_model.layers.8.self_attn.q_proj
591
- model.language_model.layers.8.self_attn.k_proj
592
- model.language_model.layers.8.self_attn.v_proj
593
- model.language_model.layers.8.self_attn.o_proj
594
- model.language_model.layers.8.mlp
595
- model.language_model.layers.8.mlp.gate_proj
596
- model.language_model.layers.8.mlp.up_proj
597
- model.language_model.layers.8.mlp.down_proj
598
- model.language_model.layers.8.mlp.act_fn
599
- model.language_model.layers.8.input_layernorm
600
- model.language_model.layers.8.post_attention_layernorm
601
- model.language_model.layers.9
602
- model.language_model.layers.9.self_attn
603
- model.language_model.layers.9.self_attn.q_proj
604
- model.language_model.layers.9.self_attn.k_proj
605
- model.language_model.layers.9.self_attn.v_proj
606
- model.language_model.layers.9.self_attn.o_proj
607
- model.language_model.layers.9.mlp
608
- model.language_model.layers.9.mlp.gate_proj
609
- model.language_model.layers.9.mlp.up_proj
610
- model.language_model.layers.9.mlp.down_proj
611
- model.language_model.layers.9.mlp.act_fn
612
- model.language_model.layers.9.input_layernorm
613
- model.language_model.layers.9.post_attention_layernorm
614
- model.language_model.layers.10
615
- model.language_model.layers.10.self_attn
616
- model.language_model.layers.10.self_attn.q_proj
617
- model.language_model.layers.10.self_attn.k_proj
618
- model.language_model.layers.10.self_attn.v_proj
619
- model.language_model.layers.10.self_attn.o_proj
620
- model.language_model.layers.10.mlp
621
- model.language_model.layers.10.mlp.gate_proj
622
- model.language_model.layers.10.mlp.up_proj
623
- model.language_model.layers.10.mlp.down_proj
624
- model.language_model.layers.10.mlp.act_fn
625
- model.language_model.layers.10.input_layernorm
626
- model.language_model.layers.10.post_attention_layernorm
627
- model.language_model.layers.11
628
- model.language_model.layers.11.self_attn
629
- model.language_model.layers.11.self_attn.q_proj
630
- model.language_model.layers.11.self_attn.k_proj
631
- model.language_model.layers.11.self_attn.v_proj
632
- model.language_model.layers.11.self_attn.o_proj
633
- model.language_model.layers.11.mlp
634
- model.language_model.layers.11.mlp.gate_proj
635
- model.language_model.layers.11.mlp.up_proj
636
- model.language_model.layers.11.mlp.down_proj
637
- model.language_model.layers.11.mlp.act_fn
638
- model.language_model.layers.11.input_layernorm
639
- model.language_model.layers.11.post_attention_layernorm
640
- model.language_model.layers.12
641
- model.language_model.layers.12.self_attn
642
- model.language_model.layers.12.self_attn.q_proj
643
- model.language_model.layers.12.self_attn.k_proj
644
- model.language_model.layers.12.self_attn.v_proj
645
- model.language_model.layers.12.self_attn.o_proj
646
- model.language_model.layers.12.mlp
647
- model.language_model.layers.12.mlp.gate_proj
648
- model.language_model.layers.12.mlp.up_proj
649
- model.language_model.layers.12.mlp.down_proj
650
- model.language_model.layers.12.mlp.act_fn
651
- model.language_model.layers.12.input_layernorm
652
- model.language_model.layers.12.post_attention_layernorm
653
- model.language_model.layers.13
654
- model.language_model.layers.13.self_attn
655
- model.language_model.layers.13.self_attn.q_proj
656
- model.language_model.layers.13.self_attn.k_proj
657
- model.language_model.layers.13.self_attn.v_proj
658
- model.language_model.layers.13.self_attn.o_proj
659
- model.language_model.layers.13.mlp
660
- model.language_model.layers.13.mlp.gate_proj
661
- model.language_model.layers.13.mlp.up_proj
662
- model.language_model.layers.13.mlp.down_proj
663
- model.language_model.layers.13.mlp.act_fn
664
- model.language_model.layers.13.input_layernorm
665
- model.language_model.layers.13.post_attention_layernorm
666
- model.language_model.layers.14
667
- model.language_model.layers.14.self_attn
668
- model.language_model.layers.14.self_attn.q_proj
669
- model.language_model.layers.14.self_attn.k_proj
670
- model.language_model.layers.14.self_attn.v_proj
671
- model.language_model.layers.14.self_attn.o_proj
672
- model.language_model.layers.14.mlp
673
- model.language_model.layers.14.mlp.gate_proj
674
- model.language_model.layers.14.mlp.up_proj
675
- model.language_model.layers.14.mlp.down_proj
676
- model.language_model.layers.14.mlp.act_fn
677
- model.language_model.layers.14.input_layernorm
678
- model.language_model.layers.14.post_attention_layernorm
679
- model.language_model.layers.15
680
- model.language_model.layers.15.self_attn
681
- model.language_model.layers.15.self_attn.q_proj
682
- model.language_model.layers.15.self_attn.k_proj
683
- model.language_model.layers.15.self_attn.v_proj
684
- model.language_model.layers.15.self_attn.o_proj
685
- model.language_model.layers.15.mlp
686
- model.language_model.layers.15.mlp.gate_proj
687
- model.language_model.layers.15.mlp.up_proj
688
- model.language_model.layers.15.mlp.down_proj
689
- model.language_model.layers.15.mlp.act_fn
690
- model.language_model.layers.15.input_layernorm
691
- model.language_model.layers.15.post_attention_layernorm
692
- model.language_model.norm
693
- model.language_model.rotary_emb
694
- lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/internlm/internlm-xcomposer2d5-7b.txt DELETED
@@ -1,2132 +0,0 @@
1
-
2
- model
3
- model.tok_embeddings
4
- model.layers
5
- model.layers.0
6
- model.layers.0.attention
7
- model.layers.0.attention.wqkv
8
- model.layers.0.attention.wqkv.lora_dropout
9
- model.layers.0.attention.wqkv.Plora_A
10
- model.layers.0.attention.wqkv.Plora_B
11
- model.layers.0.attention.wqkv.lora_sft_A
12
- model.layers.0.attention.wqkv.lora_sft_B
13
- model.layers.0.attention.wqkv.lora_dpo_A
14
- model.layers.0.attention.wqkv.lora_dpo_B
15
- model.layers.0.attention.wqkv.lora_web_A
16
- model.layers.0.attention.wqkv.lora_web_B
17
- model.layers.0.attention.wo
18
- model.layers.0.attention.wo.lora_dropout
19
- model.layers.0.attention.wo.Plora_A
20
- model.layers.0.attention.wo.Plora_B
21
- model.layers.0.attention.wo.lora_sft_A
22
- model.layers.0.attention.wo.lora_sft_B
23
- model.layers.0.attention.wo.lora_dpo_A
24
- model.layers.0.attention.wo.lora_dpo_B
25
- model.layers.0.attention.wo.lora_web_A
26
- model.layers.0.attention.wo.lora_web_B
27
- model.layers.0.attention.rotary_emb
28
- model.layers.0.feed_forward
29
- model.layers.0.feed_forward.w1
30
- model.layers.0.feed_forward.w1.lora_dropout
31
- model.layers.0.feed_forward.w1.Plora_A
32
- model.layers.0.feed_forward.w1.Plora_B
33
- model.layers.0.feed_forward.w1.lora_sft_A
34
- model.layers.0.feed_forward.w1.lora_sft_B
35
- model.layers.0.feed_forward.w1.lora_dpo_A
36
- model.layers.0.feed_forward.w1.lora_dpo_B
37
- model.layers.0.feed_forward.w1.lora_web_A
38
- model.layers.0.feed_forward.w1.lora_web_B
39
- model.layers.0.feed_forward.w3
40
- model.layers.0.feed_forward.w3.lora_dropout
41
- model.layers.0.feed_forward.w3.Plora_A
42
- model.layers.0.feed_forward.w3.Plora_B
43
- model.layers.0.feed_forward.w3.lora_sft_A
44
- model.layers.0.feed_forward.w3.lora_sft_B
45
- model.layers.0.feed_forward.w3.lora_dpo_A
46
- model.layers.0.feed_forward.w3.lora_dpo_B
47
- model.layers.0.feed_forward.w3.lora_web_A
48
- model.layers.0.feed_forward.w3.lora_web_B
49
- model.layers.0.feed_forward.w2
50
- model.layers.0.feed_forward.w2.lora_dropout
51
- model.layers.0.feed_forward.w2.Plora_A
52
- model.layers.0.feed_forward.w2.Plora_B
53
- model.layers.0.feed_forward.w2.lora_sft_A
54
- model.layers.0.feed_forward.w2.lora_sft_B
55
- model.layers.0.feed_forward.w2.lora_dpo_A
56
- model.layers.0.feed_forward.w2.lora_dpo_B
57
- model.layers.0.feed_forward.w2.lora_web_A
58
- model.layers.0.feed_forward.w2.lora_web_B
59
- model.layers.0.feed_forward.act_fn
60
- model.layers.0.attention_norm
61
- model.layers.0.ffn_norm
62
- model.layers.1
63
- model.layers.1.attention
64
- model.layers.1.attention.wqkv
65
- model.layers.1.attention.wqkv.lora_dropout
66
- model.layers.1.attention.wqkv.Plora_A
67
- model.layers.1.attention.wqkv.Plora_B
68
- model.layers.1.attention.wqkv.lora_sft_A
69
- model.layers.1.attention.wqkv.lora_sft_B
70
- model.layers.1.attention.wqkv.lora_dpo_A
71
- model.layers.1.attention.wqkv.lora_dpo_B
72
- model.layers.1.attention.wqkv.lora_web_A
73
- model.layers.1.attention.wqkv.lora_web_B
74
- model.layers.1.attention.wo
75
- model.layers.1.attention.wo.lora_dropout
76
- model.layers.1.attention.wo.Plora_A
77
- model.layers.1.attention.wo.Plora_B
78
- model.layers.1.attention.wo.lora_sft_A
79
- model.layers.1.attention.wo.lora_sft_B
80
- model.layers.1.attention.wo.lora_dpo_A
81
- model.layers.1.attention.wo.lora_dpo_B
82
- model.layers.1.attention.wo.lora_web_A
83
- model.layers.1.attention.wo.lora_web_B
84
- model.layers.1.attention.rotary_emb
85
- model.layers.1.feed_forward
86
- model.layers.1.feed_forward.w1
87
- model.layers.1.feed_forward.w1.lora_dropout
88
- model.layers.1.feed_forward.w1.Plora_A
89
- model.layers.1.feed_forward.w1.Plora_B
90
- model.layers.1.feed_forward.w1.lora_sft_A
91
- model.layers.1.feed_forward.w1.lora_sft_B
92
- model.layers.1.feed_forward.w1.lora_dpo_A
93
- model.layers.1.feed_forward.w1.lora_dpo_B
94
- model.layers.1.feed_forward.w1.lora_web_A
95
- model.layers.1.feed_forward.w1.lora_web_B
96
- model.layers.1.feed_forward.w3
97
- model.layers.1.feed_forward.w3.lora_dropout
98
- model.layers.1.feed_forward.w3.Plora_A
99
- model.layers.1.feed_forward.w3.Plora_B
100
- model.layers.1.feed_forward.w3.lora_sft_A
101
- model.layers.1.feed_forward.w3.lora_sft_B
102
- model.layers.1.feed_forward.w3.lora_dpo_A
103
- model.layers.1.feed_forward.w3.lora_dpo_B
104
- model.layers.1.feed_forward.w3.lora_web_A
105
- model.layers.1.feed_forward.w3.lora_web_B
106
- model.layers.1.feed_forward.w2
107
- model.layers.1.feed_forward.w2.lora_dropout
108
- model.layers.1.feed_forward.w2.Plora_A
109
- model.layers.1.feed_forward.w2.Plora_B
110
- model.layers.1.feed_forward.w2.lora_sft_A
111
- model.layers.1.feed_forward.w2.lora_sft_B
112
- model.layers.1.feed_forward.w2.lora_dpo_A
113
- model.layers.1.feed_forward.w2.lora_dpo_B
114
- model.layers.1.feed_forward.w2.lora_web_A
115
- model.layers.1.feed_forward.w2.lora_web_B
116
- model.layers.1.feed_forward.act_fn
117
- model.layers.1.attention_norm
118
- model.layers.1.ffn_norm
119
- model.layers.2
120
- model.layers.2.attention
121
- model.layers.2.attention.wqkv
122
- model.layers.2.attention.wqkv.lora_dropout
123
- model.layers.2.attention.wqkv.Plora_A
124
- model.layers.2.attention.wqkv.Plora_B
125
- model.layers.2.attention.wqkv.lora_sft_A
126
- model.layers.2.attention.wqkv.lora_sft_B
127
- model.layers.2.attention.wqkv.lora_dpo_A
128
- model.layers.2.attention.wqkv.lora_dpo_B
129
- model.layers.2.attention.wqkv.lora_web_A
130
- model.layers.2.attention.wqkv.lora_web_B
131
- model.layers.2.attention.wo
132
- model.layers.2.attention.wo.lora_dropout
133
- model.layers.2.attention.wo.Plora_A
134
- model.layers.2.attention.wo.Plora_B
135
- model.layers.2.attention.wo.lora_sft_A
136
- model.layers.2.attention.wo.lora_sft_B
137
- model.layers.2.attention.wo.lora_dpo_A
138
- model.layers.2.attention.wo.lora_dpo_B
139
- model.layers.2.attention.wo.lora_web_A
140
- model.layers.2.attention.wo.lora_web_B
141
- model.layers.2.attention.rotary_emb
142
- model.layers.2.feed_forward
143
- model.layers.2.feed_forward.w1
144
- model.layers.2.feed_forward.w1.lora_dropout
145
- model.layers.2.feed_forward.w1.Plora_A
146
- model.layers.2.feed_forward.w1.Plora_B
147
- model.layers.2.feed_forward.w1.lora_sft_A
148
- model.layers.2.feed_forward.w1.lora_sft_B
149
- model.layers.2.feed_forward.w1.lora_dpo_A
150
- model.layers.2.feed_forward.w1.lora_dpo_B
151
- model.layers.2.feed_forward.w1.lora_web_A
152
- model.layers.2.feed_forward.w1.lora_web_B
153
- model.layers.2.feed_forward.w3
154
- model.layers.2.feed_forward.w3.lora_dropout
155
- model.layers.2.feed_forward.w3.Plora_A
156
- model.layers.2.feed_forward.w3.Plora_B
157
- model.layers.2.feed_forward.w3.lora_sft_A
158
- model.layers.2.feed_forward.w3.lora_sft_B
159
- model.layers.2.feed_forward.w3.lora_dpo_A
160
- model.layers.2.feed_forward.w3.lora_dpo_B
161
- model.layers.2.feed_forward.w3.lora_web_A
162
- model.layers.2.feed_forward.w3.lora_web_B
163
- model.layers.2.feed_forward.w2
164
- model.layers.2.feed_forward.w2.lora_dropout
165
- model.layers.2.feed_forward.w2.Plora_A
166
- model.layers.2.feed_forward.w2.Plora_B
167
- model.layers.2.feed_forward.w2.lora_sft_A
168
- model.layers.2.feed_forward.w2.lora_sft_B
169
- model.layers.2.feed_forward.w2.lora_dpo_A
170
- model.layers.2.feed_forward.w2.lora_dpo_B
171
- model.layers.2.feed_forward.w2.lora_web_A
172
- model.layers.2.feed_forward.w2.lora_web_B
173
- model.layers.2.feed_forward.act_fn
174
- model.layers.2.attention_norm
175
- model.layers.2.ffn_norm
176
- model.layers.3
177
- model.layers.3.attention
178
- model.layers.3.attention.wqkv
179
- model.layers.3.attention.wqkv.lora_dropout
180
- model.layers.3.attention.wqkv.Plora_A
181
- model.layers.3.attention.wqkv.Plora_B
182
- model.layers.3.attention.wqkv.lora_sft_A
183
- model.layers.3.attention.wqkv.lora_sft_B
184
- model.layers.3.attention.wqkv.lora_dpo_A
185
- model.layers.3.attention.wqkv.lora_dpo_B
186
- model.layers.3.attention.wqkv.lora_web_A
187
- model.layers.3.attention.wqkv.lora_web_B
188
- model.layers.3.attention.wo
189
- model.layers.3.attention.wo.lora_dropout
190
- model.layers.3.attention.wo.Plora_A
191
- model.layers.3.attention.wo.Plora_B
192
- model.layers.3.attention.wo.lora_sft_A
193
- model.layers.3.attention.wo.lora_sft_B
194
- model.layers.3.attention.wo.lora_dpo_A
195
- model.layers.3.attention.wo.lora_dpo_B
196
- model.layers.3.attention.wo.lora_web_A
197
- model.layers.3.attention.wo.lora_web_B
198
- model.layers.3.attention.rotary_emb
199
- model.layers.3.feed_forward
200
- model.layers.3.feed_forward.w1
201
- model.layers.3.feed_forward.w1.lora_dropout
202
- model.layers.3.feed_forward.w1.Plora_A
203
- model.layers.3.feed_forward.w1.Plora_B
204
- model.layers.3.feed_forward.w1.lora_sft_A
205
- model.layers.3.feed_forward.w1.lora_sft_B
206
- model.layers.3.feed_forward.w1.lora_dpo_A
207
- model.layers.3.feed_forward.w1.lora_dpo_B
208
- model.layers.3.feed_forward.w1.lora_web_A
209
- model.layers.3.feed_forward.w1.lora_web_B
210
- model.layers.3.feed_forward.w3
211
- model.layers.3.feed_forward.w3.lora_dropout
212
- model.layers.3.feed_forward.w3.Plora_A
213
- model.layers.3.feed_forward.w3.Plora_B
214
- model.layers.3.feed_forward.w3.lora_sft_A
215
- model.layers.3.feed_forward.w3.lora_sft_B
216
- model.layers.3.feed_forward.w3.lora_dpo_A
217
- model.layers.3.feed_forward.w3.lora_dpo_B
218
- model.layers.3.feed_forward.w3.lora_web_A
219
- model.layers.3.feed_forward.w3.lora_web_B
220
- model.layers.3.feed_forward.w2
221
- model.layers.3.feed_forward.w2.lora_dropout
222
- model.layers.3.feed_forward.w2.Plora_A
223
- model.layers.3.feed_forward.w2.Plora_B
224
- model.layers.3.feed_forward.w2.lora_sft_A
225
- model.layers.3.feed_forward.w2.lora_sft_B
226
- model.layers.3.feed_forward.w2.lora_dpo_A
227
- model.layers.3.feed_forward.w2.lora_dpo_B
228
- model.layers.3.feed_forward.w2.lora_web_A
229
- model.layers.3.feed_forward.w2.lora_web_B
230
- model.layers.3.feed_forward.act_fn
231
- model.layers.3.attention_norm
232
- model.layers.3.ffn_norm
233
- model.layers.4
234
- model.layers.4.attention
235
- model.layers.4.attention.wqkv
236
- model.layers.4.attention.wqkv.lora_dropout
237
- model.layers.4.attention.wqkv.Plora_A
238
- model.layers.4.attention.wqkv.Plora_B
239
- model.layers.4.attention.wqkv.lora_sft_A
240
- model.layers.4.attention.wqkv.lora_sft_B
241
- model.layers.4.attention.wqkv.lora_dpo_A
242
- model.layers.4.attention.wqkv.lora_dpo_B
243
- model.layers.4.attention.wqkv.lora_web_A
244
- model.layers.4.attention.wqkv.lora_web_B
245
- model.layers.4.attention.wo
246
- model.layers.4.attention.wo.lora_dropout
247
- model.layers.4.attention.wo.Plora_A
248
- model.layers.4.attention.wo.Plora_B
249
- model.layers.4.attention.wo.lora_sft_A
250
- model.layers.4.attention.wo.lora_sft_B
251
- model.layers.4.attention.wo.lora_dpo_A
252
- model.layers.4.attention.wo.lora_dpo_B
253
- model.layers.4.attention.wo.lora_web_A
254
- model.layers.4.attention.wo.lora_web_B
255
- model.layers.4.attention.rotary_emb
256
- model.layers.4.feed_forward
257
- model.layers.4.feed_forward.w1
258
- model.layers.4.feed_forward.w1.lora_dropout
259
- model.layers.4.feed_forward.w1.Plora_A
260
- model.layers.4.feed_forward.w1.Plora_B
261
- model.layers.4.feed_forward.w1.lora_sft_A
262
- model.layers.4.feed_forward.w1.lora_sft_B
263
- model.layers.4.feed_forward.w1.lora_dpo_A
264
- model.layers.4.feed_forward.w1.lora_dpo_B
265
- model.layers.4.feed_forward.w1.lora_web_A
266
- model.layers.4.feed_forward.w1.lora_web_B
267
- model.layers.4.feed_forward.w3
268
- model.layers.4.feed_forward.w3.lora_dropout
269
- model.layers.4.feed_forward.w3.Plora_A
270
- model.layers.4.feed_forward.w3.Plora_B
271
- model.layers.4.feed_forward.w3.lora_sft_A
272
- model.layers.4.feed_forward.w3.lora_sft_B
273
- model.layers.4.feed_forward.w3.lora_dpo_A
274
- model.layers.4.feed_forward.w3.lora_dpo_B
275
- model.layers.4.feed_forward.w3.lora_web_A
276
- model.layers.4.feed_forward.w3.lora_web_B
277
- model.layers.4.feed_forward.w2
278
- model.layers.4.feed_forward.w2.lora_dropout
279
- model.layers.4.feed_forward.w2.Plora_A
280
- model.layers.4.feed_forward.w2.Plora_B
281
- model.layers.4.feed_forward.w2.lora_sft_A
282
- model.layers.4.feed_forward.w2.lora_sft_B
283
- model.layers.4.feed_forward.w2.lora_dpo_A
284
- model.layers.4.feed_forward.w2.lora_dpo_B
285
- model.layers.4.feed_forward.w2.lora_web_A
286
- model.layers.4.feed_forward.w2.lora_web_B
287
- model.layers.4.feed_forward.act_fn
288
- model.layers.4.attention_norm
289
- model.layers.4.ffn_norm
290
- model.layers.5
291
- model.layers.5.attention
292
- model.layers.5.attention.wqkv
293
- model.layers.5.attention.wqkv.lora_dropout
294
- model.layers.5.attention.wqkv.Plora_A
295
- model.layers.5.attention.wqkv.Plora_B
296
- model.layers.5.attention.wqkv.lora_sft_A
297
- model.layers.5.attention.wqkv.lora_sft_B
298
- model.layers.5.attention.wqkv.lora_dpo_A
299
- model.layers.5.attention.wqkv.lora_dpo_B
300
- model.layers.5.attention.wqkv.lora_web_A
301
- model.layers.5.attention.wqkv.lora_web_B
302
- model.layers.5.attention.wo
303
- model.layers.5.attention.wo.lora_dropout
304
- model.layers.5.attention.wo.Plora_A
305
- model.layers.5.attention.wo.Plora_B
306
- model.layers.5.attention.wo.lora_sft_A
307
- model.layers.5.attention.wo.lora_sft_B
308
- model.layers.5.attention.wo.lora_dpo_A
309
- model.layers.5.attention.wo.lora_dpo_B
310
- model.layers.5.attention.wo.lora_web_A
311
- model.layers.5.attention.wo.lora_web_B
312
- model.layers.5.attention.rotary_emb
313
- model.layers.5.feed_forward
314
- model.layers.5.feed_forward.w1
315
- model.layers.5.feed_forward.w1.lora_dropout
316
- model.layers.5.feed_forward.w1.Plora_A
317
- model.layers.5.feed_forward.w1.Plora_B
318
- model.layers.5.feed_forward.w1.lora_sft_A
319
- model.layers.5.feed_forward.w1.lora_sft_B
320
- model.layers.5.feed_forward.w1.lora_dpo_A
321
- model.layers.5.feed_forward.w1.lora_dpo_B
322
- model.layers.5.feed_forward.w1.lora_web_A
323
- model.layers.5.feed_forward.w1.lora_web_B
324
- model.layers.5.feed_forward.w3
325
- model.layers.5.feed_forward.w3.lora_dropout
326
- model.layers.5.feed_forward.w3.Plora_A
327
- model.layers.5.feed_forward.w3.Plora_B
328
- model.layers.5.feed_forward.w3.lora_sft_A
329
- model.layers.5.feed_forward.w3.lora_sft_B
330
- model.layers.5.feed_forward.w3.lora_dpo_A
331
- model.layers.5.feed_forward.w3.lora_dpo_B
332
- model.layers.5.feed_forward.w3.lora_web_A
333
- model.layers.5.feed_forward.w3.lora_web_B
334
- model.layers.5.feed_forward.w2
335
- model.layers.5.feed_forward.w2.lora_dropout
336
- model.layers.5.feed_forward.w2.Plora_A
337
- model.layers.5.feed_forward.w2.Plora_B
338
- model.layers.5.feed_forward.w2.lora_sft_A
339
- model.layers.5.feed_forward.w2.lora_sft_B
340
- model.layers.5.feed_forward.w2.lora_dpo_A
341
- model.layers.5.feed_forward.w2.lora_dpo_B
342
- model.layers.5.feed_forward.w2.lora_web_A
343
- model.layers.5.feed_forward.w2.lora_web_B
344
- model.layers.5.feed_forward.act_fn
345
- model.layers.5.attention_norm
346
- model.layers.5.ffn_norm
347
- model.layers.6
348
- model.layers.6.attention
349
- model.layers.6.attention.wqkv
350
- model.layers.6.attention.wqkv.lora_dropout
351
- model.layers.6.attention.wqkv.Plora_A
352
- model.layers.6.attention.wqkv.Plora_B
353
- model.layers.6.attention.wqkv.lora_sft_A
354
- model.layers.6.attention.wqkv.lora_sft_B
355
- model.layers.6.attention.wqkv.lora_dpo_A
356
- model.layers.6.attention.wqkv.lora_dpo_B
357
- model.layers.6.attention.wqkv.lora_web_A
358
- model.layers.6.attention.wqkv.lora_web_B
359
- model.layers.6.attention.wo
360
- model.layers.6.attention.wo.lora_dropout
361
- model.layers.6.attention.wo.Plora_A
362
- model.layers.6.attention.wo.Plora_B
363
- model.layers.6.attention.wo.lora_sft_A
364
- model.layers.6.attention.wo.lora_sft_B
365
- model.layers.6.attention.wo.lora_dpo_A
366
- model.layers.6.attention.wo.lora_dpo_B
367
- model.layers.6.attention.wo.lora_web_A
368
- model.layers.6.attention.wo.lora_web_B
369
- model.layers.6.attention.rotary_emb
370
- model.layers.6.feed_forward
371
- model.layers.6.feed_forward.w1
372
- model.layers.6.feed_forward.w1.lora_dropout
373
- model.layers.6.feed_forward.w1.Plora_A
374
- model.layers.6.feed_forward.w1.Plora_B
375
- model.layers.6.feed_forward.w1.lora_sft_A
376
- model.layers.6.feed_forward.w1.lora_sft_B
377
- model.layers.6.feed_forward.w1.lora_dpo_A
378
- model.layers.6.feed_forward.w1.lora_dpo_B
379
- model.layers.6.feed_forward.w1.lora_web_A
380
- model.layers.6.feed_forward.w1.lora_web_B
381
- model.layers.6.feed_forward.w3
382
- model.layers.6.feed_forward.w3.lora_dropout
383
- model.layers.6.feed_forward.w3.Plora_A
384
- model.layers.6.feed_forward.w3.Plora_B
385
- model.layers.6.feed_forward.w3.lora_sft_A
386
- model.layers.6.feed_forward.w3.lora_sft_B
387
- model.layers.6.feed_forward.w3.lora_dpo_A
388
- model.layers.6.feed_forward.w3.lora_dpo_B
389
- model.layers.6.feed_forward.w3.lora_web_A
390
- model.layers.6.feed_forward.w3.lora_web_B
391
- model.layers.6.feed_forward.w2
392
- model.layers.6.feed_forward.w2.lora_dropout
393
- model.layers.6.feed_forward.w2.Plora_A
394
- model.layers.6.feed_forward.w2.Plora_B
395
- model.layers.6.feed_forward.w2.lora_sft_A
396
- model.layers.6.feed_forward.w2.lora_sft_B
397
- model.layers.6.feed_forward.w2.lora_dpo_A
398
- model.layers.6.feed_forward.w2.lora_dpo_B
399
- model.layers.6.feed_forward.w2.lora_web_A
400
- model.layers.6.feed_forward.w2.lora_web_B
401
- model.layers.6.feed_forward.act_fn
402
- model.layers.6.attention_norm
403
- model.layers.6.ffn_norm
404
- model.layers.7
405
- model.layers.7.attention
406
- model.layers.7.attention.wqkv
407
- model.layers.7.attention.wqkv.lora_dropout
408
- model.layers.7.attention.wqkv.Plora_A
409
- model.layers.7.attention.wqkv.Plora_B
410
- model.layers.7.attention.wqkv.lora_sft_A
411
- model.layers.7.attention.wqkv.lora_sft_B
412
- model.layers.7.attention.wqkv.lora_dpo_A
413
- model.layers.7.attention.wqkv.lora_dpo_B
414
- model.layers.7.attention.wqkv.lora_web_A
415
- model.layers.7.attention.wqkv.lora_web_B
416
- model.layers.7.attention.wo
417
- model.layers.7.attention.wo.lora_dropout
418
- model.layers.7.attention.wo.Plora_A
419
- model.layers.7.attention.wo.Plora_B
420
- model.layers.7.attention.wo.lora_sft_A
421
- model.layers.7.attention.wo.lora_sft_B
422
- model.layers.7.attention.wo.lora_dpo_A
423
- model.layers.7.attention.wo.lora_dpo_B
424
- model.layers.7.attention.wo.lora_web_A
425
- model.layers.7.attention.wo.lora_web_B
426
- model.layers.7.attention.rotary_emb
427
- model.layers.7.feed_forward
428
- model.layers.7.feed_forward.w1
429
- model.layers.7.feed_forward.w1.lora_dropout
430
- model.layers.7.feed_forward.w1.Plora_A
431
- model.layers.7.feed_forward.w1.Plora_B
432
- model.layers.7.feed_forward.w1.lora_sft_A
433
- model.layers.7.feed_forward.w1.lora_sft_B
434
- model.layers.7.feed_forward.w1.lora_dpo_A
435
- model.layers.7.feed_forward.w1.lora_dpo_B
436
- model.layers.7.feed_forward.w1.lora_web_A
437
- model.layers.7.feed_forward.w1.lora_web_B
438
- model.layers.7.feed_forward.w3
439
- model.layers.7.feed_forward.w3.lora_dropout
440
- model.layers.7.feed_forward.w3.Plora_A
441
- model.layers.7.feed_forward.w3.Plora_B
442
- model.layers.7.feed_forward.w3.lora_sft_A
443
- model.layers.7.feed_forward.w3.lora_sft_B
444
- model.layers.7.feed_forward.w3.lora_dpo_A
445
- model.layers.7.feed_forward.w3.lora_dpo_B
446
- model.layers.7.feed_forward.w3.lora_web_A
447
- model.layers.7.feed_forward.w3.lora_web_B
448
- model.layers.7.feed_forward.w2
449
- model.layers.7.feed_forward.w2.lora_dropout
450
- model.layers.7.feed_forward.w2.Plora_A
451
- model.layers.7.feed_forward.w2.Plora_B
452
- model.layers.7.feed_forward.w2.lora_sft_A
453
- model.layers.7.feed_forward.w2.lora_sft_B
454
- model.layers.7.feed_forward.w2.lora_dpo_A
455
- model.layers.7.feed_forward.w2.lora_dpo_B
456
- model.layers.7.feed_forward.w2.lora_web_A
457
- model.layers.7.feed_forward.w2.lora_web_B
458
- model.layers.7.feed_forward.act_fn
459
- model.layers.7.attention_norm
460
- model.layers.7.ffn_norm
461
- model.layers.8
462
- model.layers.8.attention
463
- model.layers.8.attention.wqkv
464
- model.layers.8.attention.wqkv.lora_dropout
465
- model.layers.8.attention.wqkv.Plora_A
466
- model.layers.8.attention.wqkv.Plora_B
467
- model.layers.8.attention.wqkv.lora_sft_A
468
- model.layers.8.attention.wqkv.lora_sft_B
469
- model.layers.8.attention.wqkv.lora_dpo_A
470
- model.layers.8.attention.wqkv.lora_dpo_B
471
- model.layers.8.attention.wqkv.lora_web_A
472
- model.layers.8.attention.wqkv.lora_web_B
473
- model.layers.8.attention.wo
474
- model.layers.8.attention.wo.lora_dropout
475
- model.layers.8.attention.wo.Plora_A
476
- model.layers.8.attention.wo.Plora_B
477
- model.layers.8.attention.wo.lora_sft_A
478
- model.layers.8.attention.wo.lora_sft_B
479
- model.layers.8.attention.wo.lora_dpo_A
480
- model.layers.8.attention.wo.lora_dpo_B
481
- model.layers.8.attention.wo.lora_web_A
482
- model.layers.8.attention.wo.lora_web_B
483
- model.layers.8.attention.rotary_emb
484
- model.layers.8.feed_forward
485
- model.layers.8.feed_forward.w1
486
- model.layers.8.feed_forward.w1.lora_dropout
487
- model.layers.8.feed_forward.w1.Plora_A
488
- model.layers.8.feed_forward.w1.Plora_B
489
- model.layers.8.feed_forward.w1.lora_sft_A
490
- model.layers.8.feed_forward.w1.lora_sft_B
491
- model.layers.8.feed_forward.w1.lora_dpo_A
492
- model.layers.8.feed_forward.w1.lora_dpo_B
493
- model.layers.8.feed_forward.w1.lora_web_A
494
- model.layers.8.feed_forward.w1.lora_web_B
495
- model.layers.8.feed_forward.w3
496
- model.layers.8.feed_forward.w3.lora_dropout
497
- model.layers.8.feed_forward.w3.Plora_A
498
- model.layers.8.feed_forward.w3.Plora_B
499
- model.layers.8.feed_forward.w3.lora_sft_A
500
- model.layers.8.feed_forward.w3.lora_sft_B
501
- model.layers.8.feed_forward.w3.lora_dpo_A
502
- model.layers.8.feed_forward.w3.lora_dpo_B
503
- model.layers.8.feed_forward.w3.lora_web_A
504
- model.layers.8.feed_forward.w3.lora_web_B
505
- model.layers.8.feed_forward.w2
506
- model.layers.8.feed_forward.w2.lora_dropout
507
- model.layers.8.feed_forward.w2.Plora_A
508
- model.layers.8.feed_forward.w2.Plora_B
509
- model.layers.8.feed_forward.w2.lora_sft_A
510
- model.layers.8.feed_forward.w2.lora_sft_B
511
- model.layers.8.feed_forward.w2.lora_dpo_A
512
- model.layers.8.feed_forward.w2.lora_dpo_B
513
- model.layers.8.feed_forward.w2.lora_web_A
514
- model.layers.8.feed_forward.w2.lora_web_B
515
- model.layers.8.feed_forward.act_fn
516
- model.layers.8.attention_norm
517
- model.layers.8.ffn_norm
518
- model.layers.9
519
- model.layers.9.attention
520
- model.layers.9.attention.wqkv
521
- model.layers.9.attention.wqkv.lora_dropout
522
- model.layers.9.attention.wqkv.Plora_A
523
- model.layers.9.attention.wqkv.Plora_B
524
- model.layers.9.attention.wqkv.lora_sft_A
525
- model.layers.9.attention.wqkv.lora_sft_B
526
- model.layers.9.attention.wqkv.lora_dpo_A
527
- model.layers.9.attention.wqkv.lora_dpo_B
528
- model.layers.9.attention.wqkv.lora_web_A
529
- model.layers.9.attention.wqkv.lora_web_B
530
- model.layers.9.attention.wo
531
- model.layers.9.attention.wo.lora_dropout
532
- model.layers.9.attention.wo.Plora_A
533
- model.layers.9.attention.wo.Plora_B
534
- model.layers.9.attention.wo.lora_sft_A
535
- model.layers.9.attention.wo.lora_sft_B
536
- model.layers.9.attention.wo.lora_dpo_A
537
- model.layers.9.attention.wo.lora_dpo_B
538
- model.layers.9.attention.wo.lora_web_A
539
- model.layers.9.attention.wo.lora_web_B
540
- model.layers.9.attention.rotary_emb
541
- model.layers.9.feed_forward
542
- model.layers.9.feed_forward.w1
543
- model.layers.9.feed_forward.w1.lora_dropout
544
- model.layers.9.feed_forward.w1.Plora_A
545
- model.layers.9.feed_forward.w1.Plora_B
546
- model.layers.9.feed_forward.w1.lora_sft_A
547
- model.layers.9.feed_forward.w1.lora_sft_B
548
- model.layers.9.feed_forward.w1.lora_dpo_A
549
- model.layers.9.feed_forward.w1.lora_dpo_B
550
- model.layers.9.feed_forward.w1.lora_web_A
551
- model.layers.9.feed_forward.w1.lora_web_B
552
- model.layers.9.feed_forward.w3
553
- model.layers.9.feed_forward.w3.lora_dropout
554
- model.layers.9.feed_forward.w3.Plora_A
555
- model.layers.9.feed_forward.w3.Plora_B
556
- model.layers.9.feed_forward.w3.lora_sft_A
557
- model.layers.9.feed_forward.w3.lora_sft_B
558
- model.layers.9.feed_forward.w3.lora_dpo_A
559
- model.layers.9.feed_forward.w3.lora_dpo_B
560
- model.layers.9.feed_forward.w3.lora_web_A
561
- model.layers.9.feed_forward.w3.lora_web_B
562
- model.layers.9.feed_forward.w2
563
- model.layers.9.feed_forward.w2.lora_dropout
564
- model.layers.9.feed_forward.w2.Plora_A
565
- model.layers.9.feed_forward.w2.Plora_B
566
- model.layers.9.feed_forward.w2.lora_sft_A
567
- model.layers.9.feed_forward.w2.lora_sft_B
568
- model.layers.9.feed_forward.w2.lora_dpo_A
569
- model.layers.9.feed_forward.w2.lora_dpo_B
570
- model.layers.9.feed_forward.w2.lora_web_A
571
- model.layers.9.feed_forward.w2.lora_web_B
572
- model.layers.9.feed_forward.act_fn
573
- model.layers.9.attention_norm
574
- model.layers.9.ffn_norm
575
- model.layers.10
576
- model.layers.10.attention
577
- model.layers.10.attention.wqkv
578
- model.layers.10.attention.wqkv.lora_dropout
579
- model.layers.10.attention.wqkv.Plora_A
580
- model.layers.10.attention.wqkv.Plora_B
581
- model.layers.10.attention.wqkv.lora_sft_A
582
- model.layers.10.attention.wqkv.lora_sft_B
583
- model.layers.10.attention.wqkv.lora_dpo_A
584
- model.layers.10.attention.wqkv.lora_dpo_B
585
- model.layers.10.attention.wqkv.lora_web_A
586
- model.layers.10.attention.wqkv.lora_web_B
587
- model.layers.10.attention.wo
588
- model.layers.10.attention.wo.lora_dropout
589
- model.layers.10.attention.wo.Plora_A
590
- model.layers.10.attention.wo.Plora_B
591
- model.layers.10.attention.wo.lora_sft_A
592
- model.layers.10.attention.wo.lora_sft_B
593
- model.layers.10.attention.wo.lora_dpo_A
594
- model.layers.10.attention.wo.lora_dpo_B
595
- model.layers.10.attention.wo.lora_web_A
596
- model.layers.10.attention.wo.lora_web_B
597
- model.layers.10.attention.rotary_emb
598
- model.layers.10.feed_forward
599
- model.layers.10.feed_forward.w1
600
- model.layers.10.feed_forward.w1.lora_dropout
601
- model.layers.10.feed_forward.w1.Plora_A
602
- model.layers.10.feed_forward.w1.Plora_B
603
- model.layers.10.feed_forward.w1.lora_sft_A
604
- model.layers.10.feed_forward.w1.lora_sft_B
605
- model.layers.10.feed_forward.w1.lora_dpo_A
606
- model.layers.10.feed_forward.w1.lora_dpo_B
607
- model.layers.10.feed_forward.w1.lora_web_A
608
- model.layers.10.feed_forward.w1.lora_web_B
609
- model.layers.10.feed_forward.w3
610
- model.layers.10.feed_forward.w3.lora_dropout
611
- model.layers.10.feed_forward.w3.Plora_A
612
- model.layers.10.feed_forward.w3.Plora_B
613
- model.layers.10.feed_forward.w3.lora_sft_A
614
- model.layers.10.feed_forward.w3.lora_sft_B
615
- model.layers.10.feed_forward.w3.lora_dpo_A
616
- model.layers.10.feed_forward.w3.lora_dpo_B
617
- model.layers.10.feed_forward.w3.lora_web_A
618
- model.layers.10.feed_forward.w3.lora_web_B
619
- model.layers.10.feed_forward.w2
620
- model.layers.10.feed_forward.w2.lora_dropout
621
- model.layers.10.feed_forward.w2.Plora_A
622
- model.layers.10.feed_forward.w2.Plora_B
623
- model.layers.10.feed_forward.w2.lora_sft_A
624
- model.layers.10.feed_forward.w2.lora_sft_B
625
- model.layers.10.feed_forward.w2.lora_dpo_A
626
- model.layers.10.feed_forward.w2.lora_dpo_B
627
- model.layers.10.feed_forward.w2.lora_web_A
628
- model.layers.10.feed_forward.w2.lora_web_B
629
- model.layers.10.feed_forward.act_fn
630
- model.layers.10.attention_norm
631
- model.layers.10.ffn_norm
632
- model.layers.11
633
- model.layers.11.attention
634
- model.layers.11.attention.wqkv
635
- model.layers.11.attention.wqkv.lora_dropout
636
- model.layers.11.attention.wqkv.Plora_A
637
- model.layers.11.attention.wqkv.Plora_B
638
- model.layers.11.attention.wqkv.lora_sft_A
639
- model.layers.11.attention.wqkv.lora_sft_B
640
- model.layers.11.attention.wqkv.lora_dpo_A
641
- model.layers.11.attention.wqkv.lora_dpo_B
642
- model.layers.11.attention.wqkv.lora_web_A
643
- model.layers.11.attention.wqkv.lora_web_B
644
- model.layers.11.attention.wo
645
- model.layers.11.attention.wo.lora_dropout
646
- model.layers.11.attention.wo.Plora_A
647
- model.layers.11.attention.wo.Plora_B
648
- model.layers.11.attention.wo.lora_sft_A
649
- model.layers.11.attention.wo.lora_sft_B
650
- model.layers.11.attention.wo.lora_dpo_A
651
- model.layers.11.attention.wo.lora_dpo_B
652
- model.layers.11.attention.wo.lora_web_A
653
- model.layers.11.attention.wo.lora_web_B
654
- model.layers.11.attention.rotary_emb
655
- model.layers.11.feed_forward
656
- model.layers.11.feed_forward.w1
657
- model.layers.11.feed_forward.w1.lora_dropout
658
- model.layers.11.feed_forward.w1.Plora_A
659
- model.layers.11.feed_forward.w1.Plora_B
660
- model.layers.11.feed_forward.w1.lora_sft_A
661
- model.layers.11.feed_forward.w1.lora_sft_B
662
- model.layers.11.feed_forward.w1.lora_dpo_A
663
- model.layers.11.feed_forward.w1.lora_dpo_B
664
- model.layers.11.feed_forward.w1.lora_web_A
665
- model.layers.11.feed_forward.w1.lora_web_B
666
- model.layers.11.feed_forward.w3
667
- model.layers.11.feed_forward.w3.lora_dropout
668
- model.layers.11.feed_forward.w3.Plora_A
669
- model.layers.11.feed_forward.w3.Plora_B
670
- model.layers.11.feed_forward.w3.lora_sft_A
671
- model.layers.11.feed_forward.w3.lora_sft_B
672
- model.layers.11.feed_forward.w3.lora_dpo_A
673
- model.layers.11.feed_forward.w3.lora_dpo_B
674
- model.layers.11.feed_forward.w3.lora_web_A
675
- model.layers.11.feed_forward.w3.lora_web_B
676
- model.layers.11.feed_forward.w2
677
- model.layers.11.feed_forward.w2.lora_dropout
678
- model.layers.11.feed_forward.w2.Plora_A
679
- model.layers.11.feed_forward.w2.Plora_B
680
- model.layers.11.feed_forward.w2.lora_sft_A
681
- model.layers.11.feed_forward.w2.lora_sft_B
682
- model.layers.11.feed_forward.w2.lora_dpo_A
683
- model.layers.11.feed_forward.w2.lora_dpo_B
684
- model.layers.11.feed_forward.w2.lora_web_A
685
- model.layers.11.feed_forward.w2.lora_web_B
686
- model.layers.11.feed_forward.act_fn
687
- model.layers.11.attention_norm
688
- model.layers.11.ffn_norm
689
- model.layers.12
690
- model.layers.12.attention
691
- model.layers.12.attention.wqkv
692
- model.layers.12.attention.wqkv.lora_dropout
693
- model.layers.12.attention.wqkv.Plora_A
694
- model.layers.12.attention.wqkv.Plora_B
695
- model.layers.12.attention.wqkv.lora_sft_A
696
- model.layers.12.attention.wqkv.lora_sft_B
697
- model.layers.12.attention.wqkv.lora_dpo_A
698
- model.layers.12.attention.wqkv.lora_dpo_B
699
- model.layers.12.attention.wqkv.lora_web_A
700
- model.layers.12.attention.wqkv.lora_web_B
701
- model.layers.12.attention.wo
702
- model.layers.12.attention.wo.lora_dropout
703
- model.layers.12.attention.wo.Plora_A
704
- model.layers.12.attention.wo.Plora_B
705
- model.layers.12.attention.wo.lora_sft_A
706
- model.layers.12.attention.wo.lora_sft_B
707
- model.layers.12.attention.wo.lora_dpo_A
708
- model.layers.12.attention.wo.lora_dpo_B
709
- model.layers.12.attention.wo.lora_web_A
710
- model.layers.12.attention.wo.lora_web_B
711
- model.layers.12.attention.rotary_emb
712
- model.layers.12.feed_forward
713
- model.layers.12.feed_forward.w1
714
- model.layers.12.feed_forward.w1.lora_dropout
715
- model.layers.12.feed_forward.w1.Plora_A
716
- model.layers.12.feed_forward.w1.Plora_B
717
- model.layers.12.feed_forward.w1.lora_sft_A
718
- model.layers.12.feed_forward.w1.lora_sft_B
719
- model.layers.12.feed_forward.w1.lora_dpo_A
720
- model.layers.12.feed_forward.w1.lora_dpo_B
721
- model.layers.12.feed_forward.w1.lora_web_A
722
- model.layers.12.feed_forward.w1.lora_web_B
723
- model.layers.12.feed_forward.w3
724
- model.layers.12.feed_forward.w3.lora_dropout
725
- model.layers.12.feed_forward.w3.Plora_A
726
- model.layers.12.feed_forward.w3.Plora_B
727
- model.layers.12.feed_forward.w3.lora_sft_A
728
- model.layers.12.feed_forward.w3.lora_sft_B
729
- model.layers.12.feed_forward.w3.lora_dpo_A
730
- model.layers.12.feed_forward.w3.lora_dpo_B
731
- model.layers.12.feed_forward.w3.lora_web_A
732
- model.layers.12.feed_forward.w3.lora_web_B
733
- model.layers.12.feed_forward.w2
734
- model.layers.12.feed_forward.w2.lora_dropout
735
- model.layers.12.feed_forward.w2.Plora_A
736
- model.layers.12.feed_forward.w2.Plora_B
737
- model.layers.12.feed_forward.w2.lora_sft_A
738
- model.layers.12.feed_forward.w2.lora_sft_B
739
- model.layers.12.feed_forward.w2.lora_dpo_A
740
- model.layers.12.feed_forward.w2.lora_dpo_B
741
- model.layers.12.feed_forward.w2.lora_web_A
742
- model.layers.12.feed_forward.w2.lora_web_B
743
- model.layers.12.feed_forward.act_fn
744
- model.layers.12.attention_norm
745
- model.layers.12.ffn_norm
746
- model.layers.13
747
- model.layers.13.attention
748
- model.layers.13.attention.wqkv
749
- model.layers.13.attention.wqkv.lora_dropout
750
- model.layers.13.attention.wqkv.Plora_A
751
- model.layers.13.attention.wqkv.Plora_B
752
- model.layers.13.attention.wqkv.lora_sft_A
753
- model.layers.13.attention.wqkv.lora_sft_B
754
- model.layers.13.attention.wqkv.lora_dpo_A
755
- model.layers.13.attention.wqkv.lora_dpo_B
756
- model.layers.13.attention.wqkv.lora_web_A
757
- model.layers.13.attention.wqkv.lora_web_B
758
- model.layers.13.attention.wo
759
- model.layers.13.attention.wo.lora_dropout
760
- model.layers.13.attention.wo.Plora_A
761
- model.layers.13.attention.wo.Plora_B
762
- model.layers.13.attention.wo.lora_sft_A
763
- model.layers.13.attention.wo.lora_sft_B
764
- model.layers.13.attention.wo.lora_dpo_A
765
- model.layers.13.attention.wo.lora_dpo_B
766
- model.layers.13.attention.wo.lora_web_A
767
- model.layers.13.attention.wo.lora_web_B
768
- model.layers.13.attention.rotary_emb
769
- model.layers.13.feed_forward
770
- model.layers.13.feed_forward.w1
771
- model.layers.13.feed_forward.w1.lora_dropout
772
- model.layers.13.feed_forward.w1.Plora_A
773
- model.layers.13.feed_forward.w1.Plora_B
774
- model.layers.13.feed_forward.w1.lora_sft_A
775
- model.layers.13.feed_forward.w1.lora_sft_B
776
- model.layers.13.feed_forward.w1.lora_dpo_A
777
- model.layers.13.feed_forward.w1.lora_dpo_B
778
- model.layers.13.feed_forward.w1.lora_web_A
779
- model.layers.13.feed_forward.w1.lora_web_B
780
- model.layers.13.feed_forward.w3
781
- model.layers.13.feed_forward.w3.lora_dropout
782
- model.layers.13.feed_forward.w3.Plora_A
783
- model.layers.13.feed_forward.w3.Plora_B
784
- model.layers.13.feed_forward.w3.lora_sft_A
785
- model.layers.13.feed_forward.w3.lora_sft_B
786
- model.layers.13.feed_forward.w3.lora_dpo_A
787
- model.layers.13.feed_forward.w3.lora_dpo_B
788
- model.layers.13.feed_forward.w3.lora_web_A
789
- model.layers.13.feed_forward.w3.lora_web_B
790
- model.layers.13.feed_forward.w2
791
- model.layers.13.feed_forward.w2.lora_dropout
792
- model.layers.13.feed_forward.w2.Plora_A
793
- model.layers.13.feed_forward.w2.Plora_B
794
- model.layers.13.feed_forward.w2.lora_sft_A
795
- model.layers.13.feed_forward.w2.lora_sft_B
796
- model.layers.13.feed_forward.w2.lora_dpo_A
797
- model.layers.13.feed_forward.w2.lora_dpo_B
798
- model.layers.13.feed_forward.w2.lora_web_A
799
- model.layers.13.feed_forward.w2.lora_web_B
800
- model.layers.13.feed_forward.act_fn
801
- model.layers.13.attention_norm
802
- model.layers.13.ffn_norm
803
- model.layers.14
804
- model.layers.14.attention
805
- model.layers.14.attention.wqkv
806
- model.layers.14.attention.wqkv.lora_dropout
807
- model.layers.14.attention.wqkv.Plora_A
808
- model.layers.14.attention.wqkv.Plora_B
809
- model.layers.14.attention.wqkv.lora_sft_A
810
- model.layers.14.attention.wqkv.lora_sft_B
811
- model.layers.14.attention.wqkv.lora_dpo_A
812
- model.layers.14.attention.wqkv.lora_dpo_B
813
- model.layers.14.attention.wqkv.lora_web_A
814
- model.layers.14.attention.wqkv.lora_web_B
815
- model.layers.14.attention.wo
816
- model.layers.14.attention.wo.lora_dropout
817
- model.layers.14.attention.wo.Plora_A
818
- model.layers.14.attention.wo.Plora_B
819
- model.layers.14.attention.wo.lora_sft_A
820
- model.layers.14.attention.wo.lora_sft_B
821
- model.layers.14.attention.wo.lora_dpo_A
822
- model.layers.14.attention.wo.lora_dpo_B
823
- model.layers.14.attention.wo.lora_web_A
824
- model.layers.14.attention.wo.lora_web_B
825
- model.layers.14.attention.rotary_emb
826
- model.layers.14.feed_forward
827
- model.layers.14.feed_forward.w1
828
- model.layers.14.feed_forward.w1.lora_dropout
829
- model.layers.14.feed_forward.w1.Plora_A
830
- model.layers.14.feed_forward.w1.Plora_B
831
- model.layers.14.feed_forward.w1.lora_sft_A
832
- model.layers.14.feed_forward.w1.lora_sft_B
833
- model.layers.14.feed_forward.w1.lora_dpo_A
834
- model.layers.14.feed_forward.w1.lora_dpo_B
835
- model.layers.14.feed_forward.w1.lora_web_A
836
- model.layers.14.feed_forward.w1.lora_web_B
837
- model.layers.14.feed_forward.w3
838
- model.layers.14.feed_forward.w3.lora_dropout
839
- model.layers.14.feed_forward.w3.Plora_A
840
- model.layers.14.feed_forward.w3.Plora_B
841
- model.layers.14.feed_forward.w3.lora_sft_A
842
- model.layers.14.feed_forward.w3.lora_sft_B
843
- model.layers.14.feed_forward.w3.lora_dpo_A
844
- model.layers.14.feed_forward.w3.lora_dpo_B
845
- model.layers.14.feed_forward.w3.lora_web_A
846
- model.layers.14.feed_forward.w3.lora_web_B
847
- model.layers.14.feed_forward.w2
848
- model.layers.14.feed_forward.w2.lora_dropout
849
- model.layers.14.feed_forward.w2.Plora_A
850
- model.layers.14.feed_forward.w2.Plora_B
851
- model.layers.14.feed_forward.w2.lora_sft_A
852
- model.layers.14.feed_forward.w2.lora_sft_B
853
- model.layers.14.feed_forward.w2.lora_dpo_A
854
- model.layers.14.feed_forward.w2.lora_dpo_B
855
- model.layers.14.feed_forward.w2.lora_web_A
856
- model.layers.14.feed_forward.w2.lora_web_B
857
- model.layers.14.feed_forward.act_fn
858
- model.layers.14.attention_norm
859
- model.layers.14.ffn_norm
860
- model.layers.15
861
- model.layers.15.attention
862
- model.layers.15.attention.wqkv
863
- model.layers.15.attention.wqkv.lora_dropout
864
- model.layers.15.attention.wqkv.Plora_A
865
- model.layers.15.attention.wqkv.Plora_B
866
- model.layers.15.attention.wqkv.lora_sft_A
867
- model.layers.15.attention.wqkv.lora_sft_B
868
- model.layers.15.attention.wqkv.lora_dpo_A
869
- model.layers.15.attention.wqkv.lora_dpo_B
870
- model.layers.15.attention.wqkv.lora_web_A
871
- model.layers.15.attention.wqkv.lora_web_B
872
- model.layers.15.attention.wo
873
- model.layers.15.attention.wo.lora_dropout
874
- model.layers.15.attention.wo.Plora_A
875
- model.layers.15.attention.wo.Plora_B
876
- model.layers.15.attention.wo.lora_sft_A
877
- model.layers.15.attention.wo.lora_sft_B
878
- model.layers.15.attention.wo.lora_dpo_A
879
- model.layers.15.attention.wo.lora_dpo_B
880
- model.layers.15.attention.wo.lora_web_A
881
- model.layers.15.attention.wo.lora_web_B
882
- model.layers.15.attention.rotary_emb
883
- model.layers.15.feed_forward
884
- model.layers.15.feed_forward.w1
885
- model.layers.15.feed_forward.w1.lora_dropout
886
- model.layers.15.feed_forward.w1.Plora_A
887
- model.layers.15.feed_forward.w1.Plora_B
888
- model.layers.15.feed_forward.w1.lora_sft_A
889
- model.layers.15.feed_forward.w1.lora_sft_B
890
- model.layers.15.feed_forward.w1.lora_dpo_A
891
- model.layers.15.feed_forward.w1.lora_dpo_B
892
- model.layers.15.feed_forward.w1.lora_web_A
893
- model.layers.15.feed_forward.w1.lora_web_B
894
- model.layers.15.feed_forward.w3
895
- model.layers.15.feed_forward.w3.lora_dropout
896
- model.layers.15.feed_forward.w3.Plora_A
897
- model.layers.15.feed_forward.w3.Plora_B
898
- model.layers.15.feed_forward.w3.lora_sft_A
899
- model.layers.15.feed_forward.w3.lora_sft_B
900
- model.layers.15.feed_forward.w3.lora_dpo_A
901
- model.layers.15.feed_forward.w3.lora_dpo_B
902
- model.layers.15.feed_forward.w3.lora_web_A
903
- model.layers.15.feed_forward.w3.lora_web_B
904
- model.layers.15.feed_forward.w2
905
- model.layers.15.feed_forward.w2.lora_dropout
906
- model.layers.15.feed_forward.w2.Plora_A
907
- model.layers.15.feed_forward.w2.Plora_B
908
- model.layers.15.feed_forward.w2.lora_sft_A
909
- model.layers.15.feed_forward.w2.lora_sft_B
910
- model.layers.15.feed_forward.w2.lora_dpo_A
911
- model.layers.15.feed_forward.w2.lora_dpo_B
912
- model.layers.15.feed_forward.w2.lora_web_A
913
- model.layers.15.feed_forward.w2.lora_web_B
914
- model.layers.15.feed_forward.act_fn
915
- model.layers.15.attention_norm
916
- model.layers.15.ffn_norm
917
- model.layers.16
918
- model.layers.16.attention
919
- model.layers.16.attention.wqkv
920
- model.layers.16.attention.wqkv.lora_dropout
921
- model.layers.16.attention.wqkv.Plora_A
922
- model.layers.16.attention.wqkv.Plora_B
923
- model.layers.16.attention.wqkv.lora_sft_A
924
- model.layers.16.attention.wqkv.lora_sft_B
925
- model.layers.16.attention.wqkv.lora_dpo_A
926
- model.layers.16.attention.wqkv.lora_dpo_B
927
- model.layers.16.attention.wqkv.lora_web_A
928
- model.layers.16.attention.wqkv.lora_web_B
929
- model.layers.16.attention.wo
930
- model.layers.16.attention.wo.lora_dropout
931
- model.layers.16.attention.wo.Plora_A
932
- model.layers.16.attention.wo.Plora_B
933
- model.layers.16.attention.wo.lora_sft_A
934
- model.layers.16.attention.wo.lora_sft_B
935
- model.layers.16.attention.wo.lora_dpo_A
936
- model.layers.16.attention.wo.lora_dpo_B
937
- model.layers.16.attention.wo.lora_web_A
938
- model.layers.16.attention.wo.lora_web_B
939
- model.layers.16.attention.rotary_emb
940
- model.layers.16.feed_forward
941
- model.layers.16.feed_forward.w1
942
- model.layers.16.feed_forward.w1.lora_dropout
943
- model.layers.16.feed_forward.w1.Plora_A
944
- model.layers.16.feed_forward.w1.Plora_B
945
- model.layers.16.feed_forward.w1.lora_sft_A
946
- model.layers.16.feed_forward.w1.lora_sft_B
947
- model.layers.16.feed_forward.w1.lora_dpo_A
948
- model.layers.16.feed_forward.w1.lora_dpo_B
949
- model.layers.16.feed_forward.w1.lora_web_A
950
- model.layers.16.feed_forward.w1.lora_web_B
951
- model.layers.16.feed_forward.w3
952
- model.layers.16.feed_forward.w3.lora_dropout
953
- model.layers.16.feed_forward.w3.Plora_A
954
- model.layers.16.feed_forward.w3.Plora_B
955
- model.layers.16.feed_forward.w3.lora_sft_A
956
- model.layers.16.feed_forward.w3.lora_sft_B
957
- model.layers.16.feed_forward.w3.lora_dpo_A
958
- model.layers.16.feed_forward.w3.lora_dpo_B
959
- model.layers.16.feed_forward.w3.lora_web_A
960
- model.layers.16.feed_forward.w3.lora_web_B
961
- model.layers.16.feed_forward.w2
962
- model.layers.16.feed_forward.w2.lora_dropout
963
- model.layers.16.feed_forward.w2.Plora_A
964
- model.layers.16.feed_forward.w2.Plora_B
965
- model.layers.16.feed_forward.w2.lora_sft_A
966
- model.layers.16.feed_forward.w2.lora_sft_B
967
- model.layers.16.feed_forward.w2.lora_dpo_A
968
- model.layers.16.feed_forward.w2.lora_dpo_B
969
- model.layers.16.feed_forward.w2.lora_web_A
970
- model.layers.16.feed_forward.w2.lora_web_B
971
- model.layers.16.feed_forward.act_fn
972
- model.layers.16.attention_norm
973
- model.layers.16.ffn_norm
974
- model.layers.17
975
- model.layers.17.attention
976
- model.layers.17.attention.wqkv
977
- model.layers.17.attention.wqkv.lora_dropout
978
- model.layers.17.attention.wqkv.Plora_A
979
- model.layers.17.attention.wqkv.Plora_B
980
- model.layers.17.attention.wqkv.lora_sft_A
981
- model.layers.17.attention.wqkv.lora_sft_B
982
- model.layers.17.attention.wqkv.lora_dpo_A
983
- model.layers.17.attention.wqkv.lora_dpo_B
984
- model.layers.17.attention.wqkv.lora_web_A
985
- model.layers.17.attention.wqkv.lora_web_B
986
- model.layers.17.attention.wo
987
- model.layers.17.attention.wo.lora_dropout
988
- model.layers.17.attention.wo.Plora_A
989
- model.layers.17.attention.wo.Plora_B
990
- model.layers.17.attention.wo.lora_sft_A
991
- model.layers.17.attention.wo.lora_sft_B
992
- model.layers.17.attention.wo.lora_dpo_A
993
- model.layers.17.attention.wo.lora_dpo_B
994
- model.layers.17.attention.wo.lora_web_A
995
- model.layers.17.attention.wo.lora_web_B
996
- model.layers.17.attention.rotary_emb
997
- model.layers.17.feed_forward
998
- model.layers.17.feed_forward.w1
999
- model.layers.17.feed_forward.w1.lora_dropout
1000
- model.layers.17.feed_forward.w1.Plora_A
1001
- model.layers.17.feed_forward.w1.Plora_B
1002
- model.layers.17.feed_forward.w1.lora_sft_A
1003
- model.layers.17.feed_forward.w1.lora_sft_B
1004
- model.layers.17.feed_forward.w1.lora_dpo_A
1005
- model.layers.17.feed_forward.w1.lora_dpo_B
1006
- model.layers.17.feed_forward.w1.lora_web_A
1007
- model.layers.17.feed_forward.w1.lora_web_B
1008
- model.layers.17.feed_forward.w3
1009
- model.layers.17.feed_forward.w3.lora_dropout
1010
- model.layers.17.feed_forward.w3.Plora_A
1011
- model.layers.17.feed_forward.w3.Plora_B
1012
- model.layers.17.feed_forward.w3.lora_sft_A
1013
- model.layers.17.feed_forward.w3.lora_sft_B
1014
- model.layers.17.feed_forward.w3.lora_dpo_A
1015
- model.layers.17.feed_forward.w3.lora_dpo_B
1016
- model.layers.17.feed_forward.w3.lora_web_A
1017
- model.layers.17.feed_forward.w3.lora_web_B
1018
- model.layers.17.feed_forward.w2
1019
- model.layers.17.feed_forward.w2.lora_dropout
1020
- model.layers.17.feed_forward.w2.Plora_A
1021
- model.layers.17.feed_forward.w2.Plora_B
1022
- model.layers.17.feed_forward.w2.lora_sft_A
1023
- model.layers.17.feed_forward.w2.lora_sft_B
1024
- model.layers.17.feed_forward.w2.lora_dpo_A
1025
- model.layers.17.feed_forward.w2.lora_dpo_B
1026
- model.layers.17.feed_forward.w2.lora_web_A
1027
- model.layers.17.feed_forward.w2.lora_web_B
1028
- model.layers.17.feed_forward.act_fn
1029
- model.layers.17.attention_norm
1030
- model.layers.17.ffn_norm
1031
- model.layers.18
1032
- model.layers.18.attention
1033
- model.layers.18.attention.wqkv
1034
- model.layers.18.attention.wqkv.lora_dropout
1035
- model.layers.18.attention.wqkv.Plora_A
1036
- model.layers.18.attention.wqkv.Plora_B
1037
- model.layers.18.attention.wqkv.lora_sft_A
1038
- model.layers.18.attention.wqkv.lora_sft_B
1039
- model.layers.18.attention.wqkv.lora_dpo_A
1040
- model.layers.18.attention.wqkv.lora_dpo_B
1041
- model.layers.18.attention.wqkv.lora_web_A
1042
- model.layers.18.attention.wqkv.lora_web_B
1043
- model.layers.18.attention.wo
1044
- model.layers.18.attention.wo.lora_dropout
1045
- model.layers.18.attention.wo.Plora_A
1046
- model.layers.18.attention.wo.Plora_B
1047
- model.layers.18.attention.wo.lora_sft_A
1048
- model.layers.18.attention.wo.lora_sft_B
1049
- model.layers.18.attention.wo.lora_dpo_A
1050
- model.layers.18.attention.wo.lora_dpo_B
1051
- model.layers.18.attention.wo.lora_web_A
1052
- model.layers.18.attention.wo.lora_web_B
1053
- model.layers.18.attention.rotary_emb
1054
- model.layers.18.feed_forward
1055
- model.layers.18.feed_forward.w1
1056
- model.layers.18.feed_forward.w1.lora_dropout
1057
- model.layers.18.feed_forward.w1.Plora_A
1058
- model.layers.18.feed_forward.w1.Plora_B
1059
- model.layers.18.feed_forward.w1.lora_sft_A
1060
- model.layers.18.feed_forward.w1.lora_sft_B
1061
- model.layers.18.feed_forward.w1.lora_dpo_A
1062
- model.layers.18.feed_forward.w1.lora_dpo_B
1063
- model.layers.18.feed_forward.w1.lora_web_A
1064
- model.layers.18.feed_forward.w1.lora_web_B
1065
- model.layers.18.feed_forward.w3
1066
- model.layers.18.feed_forward.w3.lora_dropout
1067
- model.layers.18.feed_forward.w3.Plora_A
1068
- model.layers.18.feed_forward.w3.Plora_B
1069
- model.layers.18.feed_forward.w3.lora_sft_A
1070
- model.layers.18.feed_forward.w3.lora_sft_B
1071
- model.layers.18.feed_forward.w3.lora_dpo_A
1072
- model.layers.18.feed_forward.w3.lora_dpo_B
1073
- model.layers.18.feed_forward.w3.lora_web_A
1074
- model.layers.18.feed_forward.w3.lora_web_B
1075
- model.layers.18.feed_forward.w2
1076
- model.layers.18.feed_forward.w2.lora_dropout
1077
- model.layers.18.feed_forward.w2.Plora_A
1078
- model.layers.18.feed_forward.w2.Plora_B
1079
- model.layers.18.feed_forward.w2.lora_sft_A
1080
- model.layers.18.feed_forward.w2.lora_sft_B
1081
- model.layers.18.feed_forward.w2.lora_dpo_A
1082
- model.layers.18.feed_forward.w2.lora_dpo_B
1083
- model.layers.18.feed_forward.w2.lora_web_A
1084
- model.layers.18.feed_forward.w2.lora_web_B
1085
- model.layers.18.feed_forward.act_fn
1086
- model.layers.18.attention_norm
1087
- model.layers.18.ffn_norm
1088
- model.layers.19
1089
- model.layers.19.attention
1090
- model.layers.19.attention.wqkv
1091
- model.layers.19.attention.wqkv.lora_dropout
1092
- model.layers.19.attention.wqkv.Plora_A
1093
- model.layers.19.attention.wqkv.Plora_B
1094
- model.layers.19.attention.wqkv.lora_sft_A
1095
- model.layers.19.attention.wqkv.lora_sft_B
1096
- model.layers.19.attention.wqkv.lora_dpo_A
1097
- model.layers.19.attention.wqkv.lora_dpo_B
1098
- model.layers.19.attention.wqkv.lora_web_A
1099
- model.layers.19.attention.wqkv.lora_web_B
1100
- model.layers.19.attention.wo
1101
- model.layers.19.attention.wo.lora_dropout
1102
- model.layers.19.attention.wo.Plora_A
1103
- model.layers.19.attention.wo.Plora_B
1104
- model.layers.19.attention.wo.lora_sft_A
1105
- model.layers.19.attention.wo.lora_sft_B
1106
- model.layers.19.attention.wo.lora_dpo_A
1107
- model.layers.19.attention.wo.lora_dpo_B
1108
- model.layers.19.attention.wo.lora_web_A
1109
- model.layers.19.attention.wo.lora_web_B
1110
- model.layers.19.attention.rotary_emb
1111
- model.layers.19.feed_forward
1112
- model.layers.19.feed_forward.w1
1113
- model.layers.19.feed_forward.w1.lora_dropout
1114
- model.layers.19.feed_forward.w1.Plora_A
1115
- model.layers.19.feed_forward.w1.Plora_B
1116
- model.layers.19.feed_forward.w1.lora_sft_A
1117
- model.layers.19.feed_forward.w1.lora_sft_B
1118
- model.layers.19.feed_forward.w1.lora_dpo_A
1119
- model.layers.19.feed_forward.w1.lora_dpo_B
1120
- model.layers.19.feed_forward.w1.lora_web_A
1121
- model.layers.19.feed_forward.w1.lora_web_B
1122
- model.layers.19.feed_forward.w3
1123
- model.layers.19.feed_forward.w3.lora_dropout
1124
- model.layers.19.feed_forward.w3.Plora_A
1125
- model.layers.19.feed_forward.w3.Plora_B
1126
- model.layers.19.feed_forward.w3.lora_sft_A
1127
- model.layers.19.feed_forward.w3.lora_sft_B
1128
- model.layers.19.feed_forward.w3.lora_dpo_A
1129
- model.layers.19.feed_forward.w3.lora_dpo_B
1130
- model.layers.19.feed_forward.w3.lora_web_A
1131
- model.layers.19.feed_forward.w3.lora_web_B
1132
- model.layers.19.feed_forward.w2
1133
- model.layers.19.feed_forward.w2.lora_dropout
1134
- model.layers.19.feed_forward.w2.Plora_A
1135
- model.layers.19.feed_forward.w2.Plora_B
1136
- model.layers.19.feed_forward.w2.lora_sft_A
1137
- model.layers.19.feed_forward.w2.lora_sft_B
1138
- model.layers.19.feed_forward.w2.lora_dpo_A
1139
- model.layers.19.feed_forward.w2.lora_dpo_B
1140
- model.layers.19.feed_forward.w2.lora_web_A
1141
- model.layers.19.feed_forward.w2.lora_web_B
1142
- model.layers.19.feed_forward.act_fn
1143
- model.layers.19.attention_norm
1144
- model.layers.19.ffn_norm
1145
- model.layers.20
1146
- model.layers.20.attention
1147
- model.layers.20.attention.wqkv
1148
- model.layers.20.attention.wqkv.lora_dropout
1149
- model.layers.20.attention.wqkv.Plora_A
1150
- model.layers.20.attention.wqkv.Plora_B
1151
- model.layers.20.attention.wqkv.lora_sft_A
1152
- model.layers.20.attention.wqkv.lora_sft_B
1153
- model.layers.20.attention.wqkv.lora_dpo_A
1154
- model.layers.20.attention.wqkv.lora_dpo_B
1155
- model.layers.20.attention.wqkv.lora_web_A
1156
- model.layers.20.attention.wqkv.lora_web_B
1157
- model.layers.20.attention.wo
1158
- model.layers.20.attention.wo.lora_dropout
1159
- model.layers.20.attention.wo.Plora_A
1160
- model.layers.20.attention.wo.Plora_B
1161
- model.layers.20.attention.wo.lora_sft_A
1162
- model.layers.20.attention.wo.lora_sft_B
1163
- model.layers.20.attention.wo.lora_dpo_A
1164
- model.layers.20.attention.wo.lora_dpo_B
1165
- model.layers.20.attention.wo.lora_web_A
1166
- model.layers.20.attention.wo.lora_web_B
1167
- model.layers.20.attention.rotary_emb
1168
- model.layers.20.feed_forward
1169
- model.layers.20.feed_forward.w1
1170
- model.layers.20.feed_forward.w1.lora_dropout
1171
- model.layers.20.feed_forward.w1.Plora_A
1172
- model.layers.20.feed_forward.w1.Plora_B
1173
- model.layers.20.feed_forward.w1.lora_sft_A
1174
- model.layers.20.feed_forward.w1.lora_sft_B
1175
- model.layers.20.feed_forward.w1.lora_dpo_A
1176
- model.layers.20.feed_forward.w1.lora_dpo_B
1177
- model.layers.20.feed_forward.w1.lora_web_A
1178
- model.layers.20.feed_forward.w1.lora_web_B
1179
- model.layers.20.feed_forward.w3
1180
- model.layers.20.feed_forward.w3.lora_dropout
1181
- model.layers.20.feed_forward.w3.Plora_A
1182
- model.layers.20.feed_forward.w3.Plora_B
1183
- model.layers.20.feed_forward.w3.lora_sft_A
1184
- model.layers.20.feed_forward.w3.lora_sft_B
1185
- model.layers.20.feed_forward.w3.lora_dpo_A
1186
- model.layers.20.feed_forward.w3.lora_dpo_B
1187
- model.layers.20.feed_forward.w3.lora_web_A
1188
- model.layers.20.feed_forward.w3.lora_web_B
1189
- model.layers.20.feed_forward.w2
1190
- model.layers.20.feed_forward.w2.lora_dropout
1191
- model.layers.20.feed_forward.w2.Plora_A
1192
- model.layers.20.feed_forward.w2.Plora_B
1193
- model.layers.20.feed_forward.w2.lora_sft_A
1194
- model.layers.20.feed_forward.w2.lora_sft_B
1195
- model.layers.20.feed_forward.w2.lora_dpo_A
1196
- model.layers.20.feed_forward.w2.lora_dpo_B
1197
- model.layers.20.feed_forward.w2.lora_web_A
1198
- model.layers.20.feed_forward.w2.lora_web_B
1199
- model.layers.20.feed_forward.act_fn
1200
- model.layers.20.attention_norm
1201
- model.layers.20.ffn_norm
1202
- model.layers.21
1203
- model.layers.21.attention
1204
- model.layers.21.attention.wqkv
1205
- model.layers.21.attention.wqkv.lora_dropout
1206
- model.layers.21.attention.wqkv.Plora_A
1207
- model.layers.21.attention.wqkv.Plora_B
1208
- model.layers.21.attention.wqkv.lora_sft_A
1209
- model.layers.21.attention.wqkv.lora_sft_B
1210
- model.layers.21.attention.wqkv.lora_dpo_A
1211
- model.layers.21.attention.wqkv.lora_dpo_B
1212
- model.layers.21.attention.wqkv.lora_web_A
1213
- model.layers.21.attention.wqkv.lora_web_B
1214
- model.layers.21.attention.wo
1215
- model.layers.21.attention.wo.lora_dropout
1216
- model.layers.21.attention.wo.Plora_A
1217
- model.layers.21.attention.wo.Plora_B
1218
- model.layers.21.attention.wo.lora_sft_A
1219
- model.layers.21.attention.wo.lora_sft_B
1220
- model.layers.21.attention.wo.lora_dpo_A
1221
- model.layers.21.attention.wo.lora_dpo_B
1222
- model.layers.21.attention.wo.lora_web_A
1223
- model.layers.21.attention.wo.lora_web_B
1224
- model.layers.21.attention.rotary_emb
1225
- model.layers.21.feed_forward
1226
- model.layers.21.feed_forward.w1
1227
- model.layers.21.feed_forward.w1.lora_dropout
1228
- model.layers.21.feed_forward.w1.Plora_A
1229
- model.layers.21.feed_forward.w1.Plora_B
1230
- model.layers.21.feed_forward.w1.lora_sft_A
1231
- model.layers.21.feed_forward.w1.lora_sft_B
1232
- model.layers.21.feed_forward.w1.lora_dpo_A
1233
- model.layers.21.feed_forward.w1.lora_dpo_B
1234
- model.layers.21.feed_forward.w1.lora_web_A
1235
- model.layers.21.feed_forward.w1.lora_web_B
1236
- model.layers.21.feed_forward.w3
1237
- model.layers.21.feed_forward.w3.lora_dropout
1238
- model.layers.21.feed_forward.w3.Plora_A
1239
- model.layers.21.feed_forward.w3.Plora_B
1240
- model.layers.21.feed_forward.w3.lora_sft_A
1241
- model.layers.21.feed_forward.w3.lora_sft_B
1242
- model.layers.21.feed_forward.w3.lora_dpo_A
1243
- model.layers.21.feed_forward.w3.lora_dpo_B
1244
- model.layers.21.feed_forward.w3.lora_web_A
1245
- model.layers.21.feed_forward.w3.lora_web_B
1246
- model.layers.21.feed_forward.w2
1247
- model.layers.21.feed_forward.w2.lora_dropout
1248
- model.layers.21.feed_forward.w2.Plora_A
1249
- model.layers.21.feed_forward.w2.Plora_B
1250
- model.layers.21.feed_forward.w2.lora_sft_A
1251
- model.layers.21.feed_forward.w2.lora_sft_B
1252
- model.layers.21.feed_forward.w2.lora_dpo_A
1253
- model.layers.21.feed_forward.w2.lora_dpo_B
1254
- model.layers.21.feed_forward.w2.lora_web_A
1255
- model.layers.21.feed_forward.w2.lora_web_B
1256
- model.layers.21.feed_forward.act_fn
1257
- model.layers.21.attention_norm
1258
- model.layers.21.ffn_norm
1259
- model.layers.22
1260
- model.layers.22.attention
1261
- model.layers.22.attention.wqkv
1262
- model.layers.22.attention.wqkv.lora_dropout
1263
- model.layers.22.attention.wqkv.Plora_A
1264
- model.layers.22.attention.wqkv.Plora_B
1265
- model.layers.22.attention.wqkv.lora_sft_A
1266
- model.layers.22.attention.wqkv.lora_sft_B
1267
- model.layers.22.attention.wqkv.lora_dpo_A
1268
- model.layers.22.attention.wqkv.lora_dpo_B
1269
- model.layers.22.attention.wqkv.lora_web_A
1270
- model.layers.22.attention.wqkv.lora_web_B
1271
- model.layers.22.attention.wo
1272
- model.layers.22.attention.wo.lora_dropout
1273
- model.layers.22.attention.wo.Plora_A
1274
- model.layers.22.attention.wo.Plora_B
1275
- model.layers.22.attention.wo.lora_sft_A
1276
- model.layers.22.attention.wo.lora_sft_B
1277
- model.layers.22.attention.wo.lora_dpo_A
1278
- model.layers.22.attention.wo.lora_dpo_B
1279
- model.layers.22.attention.wo.lora_web_A
1280
- model.layers.22.attention.wo.lora_web_B
1281
- model.layers.22.attention.rotary_emb
1282
- model.layers.22.feed_forward
1283
- model.layers.22.feed_forward.w1
1284
- model.layers.22.feed_forward.w1.lora_dropout
1285
- model.layers.22.feed_forward.w1.Plora_A
1286
- model.layers.22.feed_forward.w1.Plora_B
1287
- model.layers.22.feed_forward.w1.lora_sft_A
1288
- model.layers.22.feed_forward.w1.lora_sft_B
1289
- model.layers.22.feed_forward.w1.lora_dpo_A
1290
- model.layers.22.feed_forward.w1.lora_dpo_B
1291
- model.layers.22.feed_forward.w1.lora_web_A
1292
- model.layers.22.feed_forward.w1.lora_web_B
1293
- model.layers.22.feed_forward.w3
1294
- model.layers.22.feed_forward.w3.lora_dropout
1295
- model.layers.22.feed_forward.w3.Plora_A
1296
- model.layers.22.feed_forward.w3.Plora_B
1297
- model.layers.22.feed_forward.w3.lora_sft_A
1298
- model.layers.22.feed_forward.w3.lora_sft_B
1299
- model.layers.22.feed_forward.w3.lora_dpo_A
1300
- model.layers.22.feed_forward.w3.lora_dpo_B
1301
- model.layers.22.feed_forward.w3.lora_web_A
1302
- model.layers.22.feed_forward.w3.lora_web_B
1303
- model.layers.22.feed_forward.w2
1304
- model.layers.22.feed_forward.w2.lora_dropout
1305
- model.layers.22.feed_forward.w2.Plora_A
1306
- model.layers.22.feed_forward.w2.Plora_B
1307
- model.layers.22.feed_forward.w2.lora_sft_A
1308
- model.layers.22.feed_forward.w2.lora_sft_B
1309
- model.layers.22.feed_forward.w2.lora_dpo_A
1310
- model.layers.22.feed_forward.w2.lora_dpo_B
1311
- model.layers.22.feed_forward.w2.lora_web_A
1312
- model.layers.22.feed_forward.w2.lora_web_B
1313
- model.layers.22.feed_forward.act_fn
1314
- model.layers.22.attention_norm
1315
- model.layers.22.ffn_norm
1316
- model.layers.23
1317
- model.layers.23.attention
1318
- model.layers.23.attention.wqkv
1319
- model.layers.23.attention.wqkv.lora_dropout
1320
- model.layers.23.attention.wqkv.Plora_A
1321
- model.layers.23.attention.wqkv.Plora_B
1322
- model.layers.23.attention.wqkv.lora_sft_A
1323
- model.layers.23.attention.wqkv.lora_sft_B
1324
- model.layers.23.attention.wqkv.lora_dpo_A
1325
- model.layers.23.attention.wqkv.lora_dpo_B
1326
- model.layers.23.attention.wqkv.lora_web_A
1327
- model.layers.23.attention.wqkv.lora_web_B
1328
- model.layers.23.attention.wo
1329
- model.layers.23.attention.wo.lora_dropout
1330
- model.layers.23.attention.wo.Plora_A
1331
- model.layers.23.attention.wo.Plora_B
1332
- model.layers.23.attention.wo.lora_sft_A
1333
- model.layers.23.attention.wo.lora_sft_B
1334
- model.layers.23.attention.wo.lora_dpo_A
1335
- model.layers.23.attention.wo.lora_dpo_B
1336
- model.layers.23.attention.wo.lora_web_A
1337
- model.layers.23.attention.wo.lora_web_B
1338
- model.layers.23.attention.rotary_emb
1339
- model.layers.23.feed_forward
1340
- model.layers.23.feed_forward.w1
1341
- model.layers.23.feed_forward.w1.lora_dropout
1342
- model.layers.23.feed_forward.w1.Plora_A
1343
- model.layers.23.feed_forward.w1.Plora_B
1344
- model.layers.23.feed_forward.w1.lora_sft_A
1345
- model.layers.23.feed_forward.w1.lora_sft_B
1346
- model.layers.23.feed_forward.w1.lora_dpo_A
1347
- model.layers.23.feed_forward.w1.lora_dpo_B
1348
- model.layers.23.feed_forward.w1.lora_web_A
1349
- model.layers.23.feed_forward.w1.lora_web_B
1350
- model.layers.23.feed_forward.w3
1351
- model.layers.23.feed_forward.w3.lora_dropout
1352
- model.layers.23.feed_forward.w3.Plora_A
1353
- model.layers.23.feed_forward.w3.Plora_B
1354
- model.layers.23.feed_forward.w3.lora_sft_A
1355
- model.layers.23.feed_forward.w3.lora_sft_B
1356
- model.layers.23.feed_forward.w3.lora_dpo_A
1357
- model.layers.23.feed_forward.w3.lora_dpo_B
1358
- model.layers.23.feed_forward.w3.lora_web_A
1359
- model.layers.23.feed_forward.w3.lora_web_B
1360
- model.layers.23.feed_forward.w2
1361
- model.layers.23.feed_forward.w2.lora_dropout
1362
- model.layers.23.feed_forward.w2.Plora_A
1363
- model.layers.23.feed_forward.w2.Plora_B
1364
- model.layers.23.feed_forward.w2.lora_sft_A
1365
- model.layers.23.feed_forward.w2.lora_sft_B
1366
- model.layers.23.feed_forward.w2.lora_dpo_A
1367
- model.layers.23.feed_forward.w2.lora_dpo_B
1368
- model.layers.23.feed_forward.w2.lora_web_A
1369
- model.layers.23.feed_forward.w2.lora_web_B
1370
- model.layers.23.feed_forward.act_fn
1371
- model.layers.23.attention_norm
1372
- model.layers.23.ffn_norm
1373
- model.layers.24
1374
- model.layers.24.attention
1375
- model.layers.24.attention.wqkv
1376
- model.layers.24.attention.wqkv.lora_dropout
1377
- model.layers.24.attention.wqkv.Plora_A
1378
- model.layers.24.attention.wqkv.Plora_B
1379
- model.layers.24.attention.wqkv.lora_sft_A
1380
- model.layers.24.attention.wqkv.lora_sft_B
1381
- model.layers.24.attention.wqkv.lora_dpo_A
1382
- model.layers.24.attention.wqkv.lora_dpo_B
1383
- model.layers.24.attention.wqkv.lora_web_A
1384
- model.layers.24.attention.wqkv.lora_web_B
1385
- model.layers.24.attention.wo
1386
- model.layers.24.attention.wo.lora_dropout
1387
- model.layers.24.attention.wo.Plora_A
1388
- model.layers.24.attention.wo.Plora_B
1389
- model.layers.24.attention.wo.lora_sft_A
1390
- model.layers.24.attention.wo.lora_sft_B
1391
- model.layers.24.attention.wo.lora_dpo_A
1392
- model.layers.24.attention.wo.lora_dpo_B
1393
- model.layers.24.attention.wo.lora_web_A
1394
- model.layers.24.attention.wo.lora_web_B
1395
- model.layers.24.attention.rotary_emb
1396
- model.layers.24.feed_forward
1397
- model.layers.24.feed_forward.w1
1398
- model.layers.24.feed_forward.w1.lora_dropout
1399
- model.layers.24.feed_forward.w1.Plora_A
1400
- model.layers.24.feed_forward.w1.Plora_B
1401
- model.layers.24.feed_forward.w1.lora_sft_A
1402
- model.layers.24.feed_forward.w1.lora_sft_B
1403
- model.layers.24.feed_forward.w1.lora_dpo_A
1404
- model.layers.24.feed_forward.w1.lora_dpo_B
1405
- model.layers.24.feed_forward.w1.lora_web_A
1406
- model.layers.24.feed_forward.w1.lora_web_B
1407
- model.layers.24.feed_forward.w3
1408
- model.layers.24.feed_forward.w3.lora_dropout
1409
- model.layers.24.feed_forward.w3.Plora_A
1410
- model.layers.24.feed_forward.w3.Plora_B
1411
- model.layers.24.feed_forward.w3.lora_sft_A
1412
- model.layers.24.feed_forward.w3.lora_sft_B
1413
- model.layers.24.feed_forward.w3.lora_dpo_A
1414
- model.layers.24.feed_forward.w3.lora_dpo_B
1415
- model.layers.24.feed_forward.w3.lora_web_A
1416
- model.layers.24.feed_forward.w3.lora_web_B
1417
- model.layers.24.feed_forward.w2
1418
- model.layers.24.feed_forward.w2.lora_dropout
1419
- model.layers.24.feed_forward.w2.Plora_A
1420
- model.layers.24.feed_forward.w2.Plora_B
1421
- model.layers.24.feed_forward.w2.lora_sft_A
1422
- model.layers.24.feed_forward.w2.lora_sft_B
1423
- model.layers.24.feed_forward.w2.lora_dpo_A
1424
- model.layers.24.feed_forward.w2.lora_dpo_B
1425
- model.layers.24.feed_forward.w2.lora_web_A
1426
- model.layers.24.feed_forward.w2.lora_web_B
1427
- model.layers.24.feed_forward.act_fn
1428
- model.layers.24.attention_norm
1429
- model.layers.24.ffn_norm
1430
- model.layers.25
1431
- model.layers.25.attention
1432
- model.layers.25.attention.wqkv
1433
- model.layers.25.attention.wqkv.lora_dropout
1434
- model.layers.25.attention.wqkv.Plora_A
1435
- model.layers.25.attention.wqkv.Plora_B
1436
- model.layers.25.attention.wqkv.lora_sft_A
1437
- model.layers.25.attention.wqkv.lora_sft_B
1438
- model.layers.25.attention.wqkv.lora_dpo_A
1439
- model.layers.25.attention.wqkv.lora_dpo_B
1440
- model.layers.25.attention.wqkv.lora_web_A
1441
- model.layers.25.attention.wqkv.lora_web_B
1442
- model.layers.25.attention.wo
1443
- model.layers.25.attention.wo.lora_dropout
1444
- model.layers.25.attention.wo.Plora_A
1445
- model.layers.25.attention.wo.Plora_B
1446
- model.layers.25.attention.wo.lora_sft_A
1447
- model.layers.25.attention.wo.lora_sft_B
1448
- model.layers.25.attention.wo.lora_dpo_A
1449
- model.layers.25.attention.wo.lora_dpo_B
1450
- model.layers.25.attention.wo.lora_web_A
1451
- model.layers.25.attention.wo.lora_web_B
1452
- model.layers.25.attention.rotary_emb
1453
- model.layers.25.feed_forward
1454
- model.layers.25.feed_forward.w1
1455
- model.layers.25.feed_forward.w1.lora_dropout
1456
- model.layers.25.feed_forward.w1.Plora_A
1457
- model.layers.25.feed_forward.w1.Plora_B
1458
- model.layers.25.feed_forward.w1.lora_sft_A
1459
- model.layers.25.feed_forward.w1.lora_sft_B
1460
- model.layers.25.feed_forward.w1.lora_dpo_A
1461
- model.layers.25.feed_forward.w1.lora_dpo_B
1462
- model.layers.25.feed_forward.w1.lora_web_A
1463
- model.layers.25.feed_forward.w1.lora_web_B
1464
- model.layers.25.feed_forward.w3
1465
- model.layers.25.feed_forward.w3.lora_dropout
1466
- model.layers.25.feed_forward.w3.Plora_A
1467
- model.layers.25.feed_forward.w3.Plora_B
1468
- model.layers.25.feed_forward.w3.lora_sft_A
1469
- model.layers.25.feed_forward.w3.lora_sft_B
1470
- model.layers.25.feed_forward.w3.lora_dpo_A
1471
- model.layers.25.feed_forward.w3.lora_dpo_B
1472
- model.layers.25.feed_forward.w3.lora_web_A
1473
- model.layers.25.feed_forward.w3.lora_web_B
1474
- model.layers.25.feed_forward.w2
1475
- model.layers.25.feed_forward.w2.lora_dropout
1476
- model.layers.25.feed_forward.w2.Plora_A
1477
- model.layers.25.feed_forward.w2.Plora_B
1478
- model.layers.25.feed_forward.w2.lora_sft_A
1479
- model.layers.25.feed_forward.w2.lora_sft_B
1480
- model.layers.25.feed_forward.w2.lora_dpo_A
1481
- model.layers.25.feed_forward.w2.lora_dpo_B
1482
- model.layers.25.feed_forward.w2.lora_web_A
1483
- model.layers.25.feed_forward.w2.lora_web_B
1484
- model.layers.25.feed_forward.act_fn
1485
- model.layers.25.attention_norm
1486
- model.layers.25.ffn_norm
1487
- model.layers.26
1488
- model.layers.26.attention
1489
- model.layers.26.attention.wqkv
1490
- model.layers.26.attention.wqkv.lora_dropout
1491
- model.layers.26.attention.wqkv.Plora_A
1492
- model.layers.26.attention.wqkv.Plora_B
1493
- model.layers.26.attention.wqkv.lora_sft_A
1494
- model.layers.26.attention.wqkv.lora_sft_B
1495
- model.layers.26.attention.wqkv.lora_dpo_A
1496
- model.layers.26.attention.wqkv.lora_dpo_B
1497
- model.layers.26.attention.wqkv.lora_web_A
1498
- model.layers.26.attention.wqkv.lora_web_B
1499
- model.layers.26.attention.wo
1500
- model.layers.26.attention.wo.lora_dropout
1501
- model.layers.26.attention.wo.Plora_A
1502
- model.layers.26.attention.wo.Plora_B
1503
- model.layers.26.attention.wo.lora_sft_A
1504
- model.layers.26.attention.wo.lora_sft_B
1505
- model.layers.26.attention.wo.lora_dpo_A
1506
- model.layers.26.attention.wo.lora_dpo_B
1507
- model.layers.26.attention.wo.lora_web_A
1508
- model.layers.26.attention.wo.lora_web_B
1509
- model.layers.26.attention.rotary_emb
1510
- model.layers.26.feed_forward
1511
- model.layers.26.feed_forward.w1
1512
- model.layers.26.feed_forward.w1.lora_dropout
1513
- model.layers.26.feed_forward.w1.Plora_A
1514
- model.layers.26.feed_forward.w1.Plora_B
1515
- model.layers.26.feed_forward.w1.lora_sft_A
1516
- model.layers.26.feed_forward.w1.lora_sft_B
1517
- model.layers.26.feed_forward.w1.lora_dpo_A
1518
- model.layers.26.feed_forward.w1.lora_dpo_B
1519
- model.layers.26.feed_forward.w1.lora_web_A
1520
- model.layers.26.feed_forward.w1.lora_web_B
1521
- model.layers.26.feed_forward.w3
1522
- model.layers.26.feed_forward.w3.lora_dropout
1523
- model.layers.26.feed_forward.w3.Plora_A
1524
- model.layers.26.feed_forward.w3.Plora_B
1525
- model.layers.26.feed_forward.w3.lora_sft_A
1526
- model.layers.26.feed_forward.w3.lora_sft_B
1527
- model.layers.26.feed_forward.w3.lora_dpo_A
1528
- model.layers.26.feed_forward.w3.lora_dpo_B
1529
- model.layers.26.feed_forward.w3.lora_web_A
1530
- model.layers.26.feed_forward.w3.lora_web_B
1531
- model.layers.26.feed_forward.w2
1532
- model.layers.26.feed_forward.w2.lora_dropout
1533
- model.layers.26.feed_forward.w2.Plora_A
1534
- model.layers.26.feed_forward.w2.Plora_B
1535
- model.layers.26.feed_forward.w2.lora_sft_A
1536
- model.layers.26.feed_forward.w2.lora_sft_B
1537
- model.layers.26.feed_forward.w2.lora_dpo_A
1538
- model.layers.26.feed_forward.w2.lora_dpo_B
1539
- model.layers.26.feed_forward.w2.lora_web_A
1540
- model.layers.26.feed_forward.w2.lora_web_B
1541
- model.layers.26.feed_forward.act_fn
1542
- model.layers.26.attention_norm
1543
- model.layers.26.ffn_norm
1544
- model.layers.27
1545
- model.layers.27.attention
1546
- model.layers.27.attention.wqkv
1547
- model.layers.27.attention.wqkv.lora_dropout
1548
- model.layers.27.attention.wqkv.Plora_A
1549
- model.layers.27.attention.wqkv.Plora_B
1550
- model.layers.27.attention.wqkv.lora_sft_A
1551
- model.layers.27.attention.wqkv.lora_sft_B
1552
- model.layers.27.attention.wqkv.lora_dpo_A
1553
- model.layers.27.attention.wqkv.lora_dpo_B
1554
- model.layers.27.attention.wqkv.lora_web_A
1555
- model.layers.27.attention.wqkv.lora_web_B
1556
- model.layers.27.attention.wo
1557
- model.layers.27.attention.wo.lora_dropout
1558
- model.layers.27.attention.wo.Plora_A
1559
- model.layers.27.attention.wo.Plora_B
1560
- model.layers.27.attention.wo.lora_sft_A
1561
- model.layers.27.attention.wo.lora_sft_B
1562
- model.layers.27.attention.wo.lora_dpo_A
1563
- model.layers.27.attention.wo.lora_dpo_B
1564
- model.layers.27.attention.wo.lora_web_A
1565
- model.layers.27.attention.wo.lora_web_B
1566
- model.layers.27.attention.rotary_emb
1567
- model.layers.27.feed_forward
1568
- model.layers.27.feed_forward.w1
1569
- model.layers.27.feed_forward.w1.lora_dropout
1570
- model.layers.27.feed_forward.w1.Plora_A
1571
- model.layers.27.feed_forward.w1.Plora_B
1572
- model.layers.27.feed_forward.w1.lora_sft_A
1573
- model.layers.27.feed_forward.w1.lora_sft_B
1574
- model.layers.27.feed_forward.w1.lora_dpo_A
1575
- model.layers.27.feed_forward.w1.lora_dpo_B
1576
- model.layers.27.feed_forward.w1.lora_web_A
1577
- model.layers.27.feed_forward.w1.lora_web_B
1578
- model.layers.27.feed_forward.w3
1579
- model.layers.27.feed_forward.w3.lora_dropout
1580
- model.layers.27.feed_forward.w3.Plora_A
1581
- model.layers.27.feed_forward.w3.Plora_B
1582
- model.layers.27.feed_forward.w3.lora_sft_A
1583
- model.layers.27.feed_forward.w3.lora_sft_B
1584
- model.layers.27.feed_forward.w3.lora_dpo_A
1585
- model.layers.27.feed_forward.w3.lora_dpo_B
1586
- model.layers.27.feed_forward.w3.lora_web_A
1587
- model.layers.27.feed_forward.w3.lora_web_B
1588
- model.layers.27.feed_forward.w2
1589
- model.layers.27.feed_forward.w2.lora_dropout
1590
- model.layers.27.feed_forward.w2.Plora_A
1591
- model.layers.27.feed_forward.w2.Plora_B
1592
- model.layers.27.feed_forward.w2.lora_sft_A
1593
- model.layers.27.feed_forward.w2.lora_sft_B
1594
- model.layers.27.feed_forward.w2.lora_dpo_A
1595
- model.layers.27.feed_forward.w2.lora_dpo_B
1596
- model.layers.27.feed_forward.w2.lora_web_A
1597
- model.layers.27.feed_forward.w2.lora_web_B
1598
- model.layers.27.feed_forward.act_fn
1599
- model.layers.27.attention_norm
1600
- model.layers.27.ffn_norm
1601
- model.layers.28
1602
- model.layers.28.attention
1603
- model.layers.28.attention.wqkv
1604
- model.layers.28.attention.wqkv.lora_dropout
1605
- model.layers.28.attention.wqkv.Plora_A
1606
- model.layers.28.attention.wqkv.Plora_B
1607
- model.layers.28.attention.wqkv.lora_sft_A
1608
- model.layers.28.attention.wqkv.lora_sft_B
1609
- model.layers.28.attention.wqkv.lora_dpo_A
1610
- model.layers.28.attention.wqkv.lora_dpo_B
1611
- model.layers.28.attention.wqkv.lora_web_A
1612
- model.layers.28.attention.wqkv.lora_web_B
1613
- model.layers.28.attention.wo
1614
- model.layers.28.attention.wo.lora_dropout
1615
- model.layers.28.attention.wo.Plora_A
1616
- model.layers.28.attention.wo.Plora_B
1617
- model.layers.28.attention.wo.lora_sft_A
1618
- model.layers.28.attention.wo.lora_sft_B
1619
- model.layers.28.attention.wo.lora_dpo_A
1620
- model.layers.28.attention.wo.lora_dpo_B
1621
- model.layers.28.attention.wo.lora_web_A
1622
- model.layers.28.attention.wo.lora_web_B
1623
- model.layers.28.attention.rotary_emb
1624
- model.layers.28.feed_forward
1625
- model.layers.28.feed_forward.w1
1626
- model.layers.28.feed_forward.w1.lora_dropout
1627
- model.layers.28.feed_forward.w1.Plora_A
1628
- model.layers.28.feed_forward.w1.Plora_B
1629
- model.layers.28.feed_forward.w1.lora_sft_A
1630
- model.layers.28.feed_forward.w1.lora_sft_B
1631
- model.layers.28.feed_forward.w1.lora_dpo_A
1632
- model.layers.28.feed_forward.w1.lora_dpo_B
1633
- model.layers.28.feed_forward.w1.lora_web_A
1634
- model.layers.28.feed_forward.w1.lora_web_B
1635
- model.layers.28.feed_forward.w3
1636
- model.layers.28.feed_forward.w3.lora_dropout
1637
- model.layers.28.feed_forward.w3.Plora_A
1638
- model.layers.28.feed_forward.w3.Plora_B
1639
- model.layers.28.feed_forward.w3.lora_sft_A
1640
- model.layers.28.feed_forward.w3.lora_sft_B
1641
- model.layers.28.feed_forward.w3.lora_dpo_A
1642
- model.layers.28.feed_forward.w3.lora_dpo_B
1643
- model.layers.28.feed_forward.w3.lora_web_A
1644
- model.layers.28.feed_forward.w3.lora_web_B
1645
- model.layers.28.feed_forward.w2
1646
- model.layers.28.feed_forward.w2.lora_dropout
1647
- model.layers.28.feed_forward.w2.Plora_A
1648
- model.layers.28.feed_forward.w2.Plora_B
1649
- model.layers.28.feed_forward.w2.lora_sft_A
1650
- model.layers.28.feed_forward.w2.lora_sft_B
1651
- model.layers.28.feed_forward.w2.lora_dpo_A
1652
- model.layers.28.feed_forward.w2.lora_dpo_B
1653
- model.layers.28.feed_forward.w2.lora_web_A
1654
- model.layers.28.feed_forward.w2.lora_web_B
1655
- model.layers.28.feed_forward.act_fn
1656
- model.layers.28.attention_norm
1657
- model.layers.28.ffn_norm
1658
- model.layers.29
1659
- model.layers.29.attention
1660
- model.layers.29.attention.wqkv
1661
- model.layers.29.attention.wqkv.lora_dropout
1662
- model.layers.29.attention.wqkv.Plora_A
1663
- model.layers.29.attention.wqkv.Plora_B
1664
- model.layers.29.attention.wqkv.lora_sft_A
1665
- model.layers.29.attention.wqkv.lora_sft_B
1666
- model.layers.29.attention.wqkv.lora_dpo_A
1667
- model.layers.29.attention.wqkv.lora_dpo_B
1668
- model.layers.29.attention.wqkv.lora_web_A
1669
- model.layers.29.attention.wqkv.lora_web_B
1670
- model.layers.29.attention.wo
1671
- model.layers.29.attention.wo.lora_dropout
1672
- model.layers.29.attention.wo.Plora_A
1673
- model.layers.29.attention.wo.Plora_B
1674
- model.layers.29.attention.wo.lora_sft_A
1675
- model.layers.29.attention.wo.lora_sft_B
1676
- model.layers.29.attention.wo.lora_dpo_A
1677
- model.layers.29.attention.wo.lora_dpo_B
1678
- model.layers.29.attention.wo.lora_web_A
1679
- model.layers.29.attention.wo.lora_web_B
1680
- model.layers.29.attention.rotary_emb
1681
- model.layers.29.feed_forward
1682
- model.layers.29.feed_forward.w1
1683
- model.layers.29.feed_forward.w1.lora_dropout
1684
- model.layers.29.feed_forward.w1.Plora_A
1685
- model.layers.29.feed_forward.w1.Plora_B
1686
- model.layers.29.feed_forward.w1.lora_sft_A
1687
- model.layers.29.feed_forward.w1.lora_sft_B
1688
- model.layers.29.feed_forward.w1.lora_dpo_A
1689
- model.layers.29.feed_forward.w1.lora_dpo_B
1690
- model.layers.29.feed_forward.w1.lora_web_A
1691
- model.layers.29.feed_forward.w1.lora_web_B
1692
- model.layers.29.feed_forward.w3
1693
- model.layers.29.feed_forward.w3.lora_dropout
1694
- model.layers.29.feed_forward.w3.Plora_A
1695
- model.layers.29.feed_forward.w3.Plora_B
1696
- model.layers.29.feed_forward.w3.lora_sft_A
1697
- model.layers.29.feed_forward.w3.lora_sft_B
1698
- model.layers.29.feed_forward.w3.lora_dpo_A
1699
- model.layers.29.feed_forward.w3.lora_dpo_B
1700
- model.layers.29.feed_forward.w3.lora_web_A
1701
- model.layers.29.feed_forward.w3.lora_web_B
1702
- model.layers.29.feed_forward.w2
1703
- model.layers.29.feed_forward.w2.lora_dropout
1704
- model.layers.29.feed_forward.w2.Plora_A
1705
- model.layers.29.feed_forward.w2.Plora_B
1706
- model.layers.29.feed_forward.w2.lora_sft_A
1707
- model.layers.29.feed_forward.w2.lora_sft_B
1708
- model.layers.29.feed_forward.w2.lora_dpo_A
1709
- model.layers.29.feed_forward.w2.lora_dpo_B
1710
- model.layers.29.feed_forward.w2.lora_web_A
1711
- model.layers.29.feed_forward.w2.lora_web_B
1712
- model.layers.29.feed_forward.act_fn
1713
- model.layers.29.attention_norm
1714
- model.layers.29.ffn_norm
1715
- model.layers.30
1716
- model.layers.30.attention
1717
- model.layers.30.attention.wqkv
1718
- model.layers.30.attention.wqkv.lora_dropout
1719
- model.layers.30.attention.wqkv.Plora_A
1720
- model.layers.30.attention.wqkv.Plora_B
1721
- model.layers.30.attention.wqkv.lora_sft_A
1722
- model.layers.30.attention.wqkv.lora_sft_B
1723
- model.layers.30.attention.wqkv.lora_dpo_A
1724
- model.layers.30.attention.wqkv.lora_dpo_B
1725
- model.layers.30.attention.wqkv.lora_web_A
1726
- model.layers.30.attention.wqkv.lora_web_B
1727
- model.layers.30.attention.wo
1728
- model.layers.30.attention.wo.lora_dropout
1729
- model.layers.30.attention.wo.Plora_A
1730
- model.layers.30.attention.wo.Plora_B
1731
- model.layers.30.attention.wo.lora_sft_A
1732
- model.layers.30.attention.wo.lora_sft_B
1733
- model.layers.30.attention.wo.lora_dpo_A
1734
- model.layers.30.attention.wo.lora_dpo_B
1735
- model.layers.30.attention.wo.lora_web_A
1736
- model.layers.30.attention.wo.lora_web_B
1737
- model.layers.30.attention.rotary_emb
1738
- model.layers.30.feed_forward
1739
- model.layers.30.feed_forward.w1
1740
- model.layers.30.feed_forward.w1.lora_dropout
1741
- model.layers.30.feed_forward.w1.Plora_A
1742
- model.layers.30.feed_forward.w1.Plora_B
1743
- model.layers.30.feed_forward.w1.lora_sft_A
1744
- model.layers.30.feed_forward.w1.lora_sft_B
1745
- model.layers.30.feed_forward.w1.lora_dpo_A
1746
- model.layers.30.feed_forward.w1.lora_dpo_B
1747
- model.layers.30.feed_forward.w1.lora_web_A
1748
- model.layers.30.feed_forward.w1.lora_web_B
1749
- model.layers.30.feed_forward.w3
1750
- model.layers.30.feed_forward.w3.lora_dropout
1751
- model.layers.30.feed_forward.w3.Plora_A
1752
- model.layers.30.feed_forward.w3.Plora_B
1753
- model.layers.30.feed_forward.w3.lora_sft_A
1754
- model.layers.30.feed_forward.w3.lora_sft_B
1755
- model.layers.30.feed_forward.w3.lora_dpo_A
1756
- model.layers.30.feed_forward.w3.lora_dpo_B
1757
- model.layers.30.feed_forward.w3.lora_web_A
1758
- model.layers.30.feed_forward.w3.lora_web_B
1759
- model.layers.30.feed_forward.w2
1760
- model.layers.30.feed_forward.w2.lora_dropout
1761
- model.layers.30.feed_forward.w2.Plora_A
1762
- model.layers.30.feed_forward.w2.Plora_B
1763
- model.layers.30.feed_forward.w2.lora_sft_A
1764
- model.layers.30.feed_forward.w2.lora_sft_B
1765
- model.layers.30.feed_forward.w2.lora_dpo_A
1766
- model.layers.30.feed_forward.w2.lora_dpo_B
1767
- model.layers.30.feed_forward.w2.lora_web_A
1768
- model.layers.30.feed_forward.w2.lora_web_B
1769
- model.layers.30.feed_forward.act_fn
1770
- model.layers.30.attention_norm
1771
- model.layers.30.ffn_norm
1772
- model.layers.31
1773
- model.layers.31.attention
1774
- model.layers.31.attention.wqkv
1775
- model.layers.31.attention.wqkv.lora_dropout
1776
- model.layers.31.attention.wqkv.Plora_A
1777
- model.layers.31.attention.wqkv.Plora_B
1778
- model.layers.31.attention.wqkv.lora_sft_A
1779
- model.layers.31.attention.wqkv.lora_sft_B
1780
- model.layers.31.attention.wqkv.lora_dpo_A
1781
- model.layers.31.attention.wqkv.lora_dpo_B
1782
- model.layers.31.attention.wqkv.lora_web_A
1783
- model.layers.31.attention.wqkv.lora_web_B
1784
- model.layers.31.attention.wo
1785
- model.layers.31.attention.wo.lora_dropout
1786
- model.layers.31.attention.wo.Plora_A
1787
- model.layers.31.attention.wo.Plora_B
1788
- model.layers.31.attention.wo.lora_sft_A
1789
- model.layers.31.attention.wo.lora_sft_B
1790
- model.layers.31.attention.wo.lora_dpo_A
1791
- model.layers.31.attention.wo.lora_dpo_B
1792
- model.layers.31.attention.wo.lora_web_A
1793
- model.layers.31.attention.wo.lora_web_B
1794
- model.layers.31.attention.rotary_emb
1795
- model.layers.31.feed_forward
1796
- model.layers.31.feed_forward.w1
1797
- model.layers.31.feed_forward.w1.lora_dropout
1798
- model.layers.31.feed_forward.w1.Plora_A
1799
- model.layers.31.feed_forward.w1.Plora_B
1800
- model.layers.31.feed_forward.w1.lora_sft_A
1801
- model.layers.31.feed_forward.w1.lora_sft_B
1802
- model.layers.31.feed_forward.w1.lora_dpo_A
1803
- model.layers.31.feed_forward.w1.lora_dpo_B
1804
- model.layers.31.feed_forward.w1.lora_web_A
1805
- model.layers.31.feed_forward.w1.lora_web_B
1806
- model.layers.31.feed_forward.w3
1807
- model.layers.31.feed_forward.w3.lora_dropout
1808
- model.layers.31.feed_forward.w3.Plora_A
1809
- model.layers.31.feed_forward.w3.Plora_B
1810
- model.layers.31.feed_forward.w3.lora_sft_A
1811
- model.layers.31.feed_forward.w3.lora_sft_B
1812
- model.layers.31.feed_forward.w3.lora_dpo_A
1813
- model.layers.31.feed_forward.w3.lora_dpo_B
1814
- model.layers.31.feed_forward.w3.lora_web_A
1815
- model.layers.31.feed_forward.w3.lora_web_B
1816
- model.layers.31.feed_forward.w2
1817
- model.layers.31.feed_forward.w2.lora_dropout
1818
- model.layers.31.feed_forward.w2.Plora_A
1819
- model.layers.31.feed_forward.w2.Plora_B
1820
- model.layers.31.feed_forward.w2.lora_sft_A
1821
- model.layers.31.feed_forward.w2.lora_sft_B
1822
- model.layers.31.feed_forward.w2.lora_dpo_A
1823
- model.layers.31.feed_forward.w2.lora_dpo_B
1824
- model.layers.31.feed_forward.w2.lora_web_A
1825
- model.layers.31.feed_forward.w2.lora_web_B
1826
- model.layers.31.feed_forward.act_fn
1827
- model.layers.31.attention_norm
1828
- model.layers.31.ffn_norm
1829
- model.norm
1830
- output
1831
- vit
1832
- vit.vision_tower
1833
- vit.vision_tower.vision_model
1834
- vit.vision_tower.vision_model.embeddings
1835
- vit.vision_tower.vision_model.embeddings.patch_embedding
1836
- vit.vision_tower.vision_model.embeddings.position_embedding
1837
- vit.vision_tower.vision_model.pre_layrnorm
1838
- vit.vision_tower.vision_model.encoder
1839
- vit.vision_tower.vision_model.encoder.layers
1840
- vit.vision_tower.vision_model.encoder.layers.0
1841
- vit.vision_tower.vision_model.encoder.layers.0.self_attn
1842
- vit.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
1843
- vit.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
1844
- vit.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
1845
- vit.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
1846
- vit.vision_tower.vision_model.encoder.layers.0.layer_norm1
1847
- vit.vision_tower.vision_model.encoder.layers.0.mlp
1848
- vit.vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
1849
- vit.vision_tower.vision_model.encoder.layers.0.mlp.fc1
1850
- vit.vision_tower.vision_model.encoder.layers.0.mlp.fc2
1851
- vit.vision_tower.vision_model.encoder.layers.0.layer_norm2
1852
- vit.vision_tower.vision_model.encoder.layers.1
1853
- vit.vision_tower.vision_model.encoder.layers.1.self_attn
1854
- vit.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
1855
- vit.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
1856
- vit.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
1857
- vit.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
1858
- vit.vision_tower.vision_model.encoder.layers.1.layer_norm1
1859
- vit.vision_tower.vision_model.encoder.layers.1.mlp
1860
- vit.vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
1861
- vit.vision_tower.vision_model.encoder.layers.1.mlp.fc1
1862
- vit.vision_tower.vision_model.encoder.layers.1.mlp.fc2
1863
- vit.vision_tower.vision_model.encoder.layers.1.layer_norm2
1864
- vit.vision_tower.vision_model.encoder.layers.2
1865
- vit.vision_tower.vision_model.encoder.layers.2.self_attn
1866
- vit.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
1867
- vit.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
1868
- vit.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
1869
- vit.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
1870
- vit.vision_tower.vision_model.encoder.layers.2.layer_norm1
1871
- vit.vision_tower.vision_model.encoder.layers.2.mlp
1872
- vit.vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
1873
- vit.vision_tower.vision_model.encoder.layers.2.mlp.fc1
1874
- vit.vision_tower.vision_model.encoder.layers.2.mlp.fc2
1875
- vit.vision_tower.vision_model.encoder.layers.2.layer_norm2
1876
- vit.vision_tower.vision_model.encoder.layers.3
1877
- vit.vision_tower.vision_model.encoder.layers.3.self_attn
1878
- vit.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
1879
- vit.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
1880
- vit.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
1881
- vit.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
1882
- vit.vision_tower.vision_model.encoder.layers.3.layer_norm1
1883
- vit.vision_tower.vision_model.encoder.layers.3.mlp
1884
- vit.vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
1885
- vit.vision_tower.vision_model.encoder.layers.3.mlp.fc1
1886
- vit.vision_tower.vision_model.encoder.layers.3.mlp.fc2
1887
- vit.vision_tower.vision_model.encoder.layers.3.layer_norm2
1888
- vit.vision_tower.vision_model.encoder.layers.4
1889
- vit.vision_tower.vision_model.encoder.layers.4.self_attn
1890
- vit.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
1891
- vit.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
1892
- vit.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
1893
- vit.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
1894
- vit.vision_tower.vision_model.encoder.layers.4.layer_norm1
1895
- vit.vision_tower.vision_model.encoder.layers.4.mlp
1896
- vit.vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
1897
- vit.vision_tower.vision_model.encoder.layers.4.mlp.fc1
1898
- vit.vision_tower.vision_model.encoder.layers.4.mlp.fc2
1899
- vit.vision_tower.vision_model.encoder.layers.4.layer_norm2
1900
- vit.vision_tower.vision_model.encoder.layers.5
1901
- vit.vision_tower.vision_model.encoder.layers.5.self_attn
1902
- vit.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
1903
- vit.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
1904
- vit.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
1905
- vit.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
1906
- vit.vision_tower.vision_model.encoder.layers.5.layer_norm1
1907
- vit.vision_tower.vision_model.encoder.layers.5.mlp
1908
- vit.vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
1909
- vit.vision_tower.vision_model.encoder.layers.5.mlp.fc1
1910
- vit.vision_tower.vision_model.encoder.layers.5.mlp.fc2
1911
- vit.vision_tower.vision_model.encoder.layers.5.layer_norm2
1912
- vit.vision_tower.vision_model.encoder.layers.6
1913
- vit.vision_tower.vision_model.encoder.layers.6.self_attn
1914
- vit.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
1915
- vit.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
1916
- vit.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
1917
- vit.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
1918
- vit.vision_tower.vision_model.encoder.layers.6.layer_norm1
1919
- vit.vision_tower.vision_model.encoder.layers.6.mlp
1920
- vit.vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
1921
- vit.vision_tower.vision_model.encoder.layers.6.mlp.fc1
1922
- vit.vision_tower.vision_model.encoder.layers.6.mlp.fc2
1923
- vit.vision_tower.vision_model.encoder.layers.6.layer_norm2
1924
- vit.vision_tower.vision_model.encoder.layers.7
1925
- vit.vision_tower.vision_model.encoder.layers.7.self_attn
1926
- vit.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
1927
- vit.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
1928
- vit.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
1929
- vit.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
1930
- vit.vision_tower.vision_model.encoder.layers.7.layer_norm1
1931
- vit.vision_tower.vision_model.encoder.layers.7.mlp
1932
- vit.vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
1933
- vit.vision_tower.vision_model.encoder.layers.7.mlp.fc1
1934
- vit.vision_tower.vision_model.encoder.layers.7.mlp.fc2
1935
- vit.vision_tower.vision_model.encoder.layers.7.layer_norm2
1936
- vit.vision_tower.vision_model.encoder.layers.8
1937
- vit.vision_tower.vision_model.encoder.layers.8.self_attn
1938
- vit.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
1939
- vit.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
1940
- vit.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
1941
- vit.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
1942
- vit.vision_tower.vision_model.encoder.layers.8.layer_norm1
1943
- vit.vision_tower.vision_model.encoder.layers.8.mlp
1944
- vit.vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
1945
- vit.vision_tower.vision_model.encoder.layers.8.mlp.fc1
1946
- vit.vision_tower.vision_model.encoder.layers.8.mlp.fc2
1947
- vit.vision_tower.vision_model.encoder.layers.8.layer_norm2
1948
- vit.vision_tower.vision_model.encoder.layers.9
1949
- vit.vision_tower.vision_model.encoder.layers.9.self_attn
1950
- vit.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
1951
- vit.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
1952
- vit.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
1953
- vit.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
1954
- vit.vision_tower.vision_model.encoder.layers.9.layer_norm1
1955
- vit.vision_tower.vision_model.encoder.layers.9.mlp
1956
- vit.vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
1957
- vit.vision_tower.vision_model.encoder.layers.9.mlp.fc1
1958
- vit.vision_tower.vision_model.encoder.layers.9.mlp.fc2
1959
- vit.vision_tower.vision_model.encoder.layers.9.layer_norm2
1960
- vit.vision_tower.vision_model.encoder.layers.10
1961
- vit.vision_tower.vision_model.encoder.layers.10.self_attn
1962
- vit.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
1963
- vit.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
1964
- vit.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
1965
- vit.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
1966
- vit.vision_tower.vision_model.encoder.layers.10.layer_norm1
1967
- vit.vision_tower.vision_model.encoder.layers.10.mlp
1968
- vit.vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
1969
- vit.vision_tower.vision_model.encoder.layers.10.mlp.fc1
1970
- vit.vision_tower.vision_model.encoder.layers.10.mlp.fc2
1971
- vit.vision_tower.vision_model.encoder.layers.10.layer_norm2
1972
- vit.vision_tower.vision_model.encoder.layers.11
1973
- vit.vision_tower.vision_model.encoder.layers.11.self_attn
1974
- vit.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
1975
- vit.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
1976
- vit.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
1977
- vit.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
1978
- vit.vision_tower.vision_model.encoder.layers.11.layer_norm1
1979
- vit.vision_tower.vision_model.encoder.layers.11.mlp
1980
- vit.vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
1981
- vit.vision_tower.vision_model.encoder.layers.11.mlp.fc1
1982
- vit.vision_tower.vision_model.encoder.layers.11.mlp.fc2
1983
- vit.vision_tower.vision_model.encoder.layers.11.layer_norm2
1984
- vit.vision_tower.vision_model.encoder.layers.12
1985
- vit.vision_tower.vision_model.encoder.layers.12.self_attn
1986
- vit.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
1987
- vit.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
1988
- vit.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
1989
- vit.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
1990
- vit.vision_tower.vision_model.encoder.layers.12.layer_norm1
1991
- vit.vision_tower.vision_model.encoder.layers.12.mlp
1992
- vit.vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
1993
- vit.vision_tower.vision_model.encoder.layers.12.mlp.fc1
1994
- vit.vision_tower.vision_model.encoder.layers.12.mlp.fc2
1995
- vit.vision_tower.vision_model.encoder.layers.12.layer_norm2
1996
- vit.vision_tower.vision_model.encoder.layers.13
1997
- vit.vision_tower.vision_model.encoder.layers.13.self_attn
1998
- vit.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
1999
- vit.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
2000
- vit.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
2001
- vit.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
2002
- vit.vision_tower.vision_model.encoder.layers.13.layer_norm1
2003
- vit.vision_tower.vision_model.encoder.layers.13.mlp
2004
- vit.vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
2005
- vit.vision_tower.vision_model.encoder.layers.13.mlp.fc1
2006
- vit.vision_tower.vision_model.encoder.layers.13.mlp.fc2
2007
- vit.vision_tower.vision_model.encoder.layers.13.layer_norm2
2008
- vit.vision_tower.vision_model.encoder.layers.14
2009
- vit.vision_tower.vision_model.encoder.layers.14.self_attn
2010
- vit.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
2011
- vit.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
2012
- vit.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
2013
- vit.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
2014
- vit.vision_tower.vision_model.encoder.layers.14.layer_norm1
2015
- vit.vision_tower.vision_model.encoder.layers.14.mlp
2016
- vit.vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
2017
- vit.vision_tower.vision_model.encoder.layers.14.mlp.fc1
2018
- vit.vision_tower.vision_model.encoder.layers.14.mlp.fc2
2019
- vit.vision_tower.vision_model.encoder.layers.14.layer_norm2
2020
- vit.vision_tower.vision_model.encoder.layers.15
2021
- vit.vision_tower.vision_model.encoder.layers.15.self_attn
2022
- vit.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
2023
- vit.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
2024
- vit.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
2025
- vit.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
2026
- vit.vision_tower.vision_model.encoder.layers.15.layer_norm1
2027
- vit.vision_tower.vision_model.encoder.layers.15.mlp
2028
- vit.vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
2029
- vit.vision_tower.vision_model.encoder.layers.15.mlp.fc1
2030
- vit.vision_tower.vision_model.encoder.layers.15.mlp.fc2
2031
- vit.vision_tower.vision_model.encoder.layers.15.layer_norm2
2032
- vit.vision_tower.vision_model.encoder.layers.16
2033
- vit.vision_tower.vision_model.encoder.layers.16.self_attn
2034
- vit.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
2035
- vit.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
2036
- vit.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
2037
- vit.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
2038
- vit.vision_tower.vision_model.encoder.layers.16.layer_norm1
2039
- vit.vision_tower.vision_model.encoder.layers.16.mlp
2040
- vit.vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
2041
- vit.vision_tower.vision_model.encoder.layers.16.mlp.fc1
2042
- vit.vision_tower.vision_model.encoder.layers.16.mlp.fc2
2043
- vit.vision_tower.vision_model.encoder.layers.16.layer_norm2
2044
- vit.vision_tower.vision_model.encoder.layers.17
2045
- vit.vision_tower.vision_model.encoder.layers.17.self_attn
2046
- vit.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
2047
- vit.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
2048
- vit.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
2049
- vit.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
2050
- vit.vision_tower.vision_model.encoder.layers.17.layer_norm1
2051
- vit.vision_tower.vision_model.encoder.layers.17.mlp
2052
- vit.vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
2053
- vit.vision_tower.vision_model.encoder.layers.17.mlp.fc1
2054
- vit.vision_tower.vision_model.encoder.layers.17.mlp.fc2
2055
- vit.vision_tower.vision_model.encoder.layers.17.layer_norm2
2056
- vit.vision_tower.vision_model.encoder.layers.18
2057
- vit.vision_tower.vision_model.encoder.layers.18.self_attn
2058
- vit.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
2059
- vit.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
2060
- vit.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
2061
- vit.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
2062
- vit.vision_tower.vision_model.encoder.layers.18.layer_norm1
2063
- vit.vision_tower.vision_model.encoder.layers.18.mlp
2064
- vit.vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
2065
- vit.vision_tower.vision_model.encoder.layers.18.mlp.fc1
2066
- vit.vision_tower.vision_model.encoder.layers.18.mlp.fc2
2067
- vit.vision_tower.vision_model.encoder.layers.18.layer_norm2
2068
- vit.vision_tower.vision_model.encoder.layers.19
2069
- vit.vision_tower.vision_model.encoder.layers.19.self_attn
2070
- vit.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
2071
- vit.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
2072
- vit.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
2073
- vit.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
2074
- vit.vision_tower.vision_model.encoder.layers.19.layer_norm1
2075
- vit.vision_tower.vision_model.encoder.layers.19.mlp
2076
- vit.vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
2077
- vit.vision_tower.vision_model.encoder.layers.19.mlp.fc1
2078
- vit.vision_tower.vision_model.encoder.layers.19.mlp.fc2
2079
- vit.vision_tower.vision_model.encoder.layers.19.layer_norm2
2080
- vit.vision_tower.vision_model.encoder.layers.20
2081
- vit.vision_tower.vision_model.encoder.layers.20.self_attn
2082
- vit.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
2083
- vit.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
2084
- vit.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
2085
- vit.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
2086
- vit.vision_tower.vision_model.encoder.layers.20.layer_norm1
2087
- vit.vision_tower.vision_model.encoder.layers.20.mlp
2088
- vit.vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
2089
- vit.vision_tower.vision_model.encoder.layers.20.mlp.fc1
2090
- vit.vision_tower.vision_model.encoder.layers.20.mlp.fc2
2091
- vit.vision_tower.vision_model.encoder.layers.20.layer_norm2
2092
- vit.vision_tower.vision_model.encoder.layers.21
2093
- vit.vision_tower.vision_model.encoder.layers.21.self_attn
2094
- vit.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
2095
- vit.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
2096
- vit.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
2097
- vit.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
2098
- vit.vision_tower.vision_model.encoder.layers.21.layer_norm1
2099
- vit.vision_tower.vision_model.encoder.layers.21.mlp
2100
- vit.vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
2101
- vit.vision_tower.vision_model.encoder.layers.21.mlp.fc1
2102
- vit.vision_tower.vision_model.encoder.layers.21.mlp.fc2
2103
- vit.vision_tower.vision_model.encoder.layers.21.layer_norm2
2104
- vit.vision_tower.vision_model.encoder.layers.22
2105
- vit.vision_tower.vision_model.encoder.layers.22.self_attn
2106
- vit.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
2107
- vit.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
2108
- vit.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
2109
- vit.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
2110
- vit.vision_tower.vision_model.encoder.layers.22.layer_norm1
2111
- vit.vision_tower.vision_model.encoder.layers.22.mlp
2112
- vit.vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
2113
- vit.vision_tower.vision_model.encoder.layers.22.mlp.fc1
2114
- vit.vision_tower.vision_model.encoder.layers.22.mlp.fc2
2115
- vit.vision_tower.vision_model.encoder.layers.22.layer_norm2
2116
- vit.vision_tower.vision_model.encoder.layers.23
2117
- vit.vision_tower.vision_model.encoder.layers.23.self_attn
2118
- vit.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
2119
- vit.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
2120
- vit.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
2121
- vit.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
2122
- vit.vision_tower.vision_model.encoder.layers.23.layer_norm1
2123
- vit.vision_tower.vision_model.encoder.layers.23.mlp
2124
- vit.vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
2125
- vit.vision_tower.vision_model.encoder.layers.23.mlp.fc1
2126
- vit.vision_tower.vision_model.encoder.layers.23.mlp.fc2
2127
- vit.vision_tower.vision_model.encoder.layers.23.layer_norm2
2128
- vit.vision_tower.vision_model.post_layernorm
2129
- vision_proj
2130
- vision_proj.0
2131
- vision_proj.1
2132
- vision_proj.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/internvl/InternVL2_5-8B.txt DELETED
@@ -1,737 +0,0 @@
1
-
2
- vision_model
3
- vision_model.embeddings
4
- vision_model.embeddings.patch_embedding
5
- vision_model.encoder
6
- vision_model.encoder.layers
7
- vision_model.encoder.layers.0
8
- vision_model.encoder.layers.0.attn
9
- vision_model.encoder.layers.0.attn.qkv
10
- vision_model.encoder.layers.0.attn.attn_drop
11
- vision_model.encoder.layers.0.attn.proj_drop
12
- vision_model.encoder.layers.0.attn.proj
13
- vision_model.encoder.layers.0.mlp
14
- vision_model.encoder.layers.0.mlp.act
15
- vision_model.encoder.layers.0.mlp.fc1
16
- vision_model.encoder.layers.0.mlp.fc2
17
- vision_model.encoder.layers.0.norm1
18
- vision_model.encoder.layers.0.norm2
19
- vision_model.encoder.layers.0.drop_path1
20
- vision_model.encoder.layers.0.drop_path2
21
- vision_model.encoder.layers.1
22
- vision_model.encoder.layers.1.attn
23
- vision_model.encoder.layers.1.attn.qkv
24
- vision_model.encoder.layers.1.attn.attn_drop
25
- vision_model.encoder.layers.1.attn.proj_drop
26
- vision_model.encoder.layers.1.attn.proj
27
- vision_model.encoder.layers.1.mlp
28
- vision_model.encoder.layers.1.mlp.act
29
- vision_model.encoder.layers.1.mlp.fc1
30
- vision_model.encoder.layers.1.mlp.fc2
31
- vision_model.encoder.layers.1.norm1
32
- vision_model.encoder.layers.1.norm2
33
- vision_model.encoder.layers.1.drop_path1
34
- vision_model.encoder.layers.1.drop_path2
35
- vision_model.encoder.layers.2
36
- vision_model.encoder.layers.2.attn
37
- vision_model.encoder.layers.2.attn.qkv
38
- vision_model.encoder.layers.2.attn.attn_drop
39
- vision_model.encoder.layers.2.attn.proj_drop
40
- vision_model.encoder.layers.2.attn.proj
41
- vision_model.encoder.layers.2.mlp
42
- vision_model.encoder.layers.2.mlp.act
43
- vision_model.encoder.layers.2.mlp.fc1
44
- vision_model.encoder.layers.2.mlp.fc2
45
- vision_model.encoder.layers.2.norm1
46
- vision_model.encoder.layers.2.norm2
47
- vision_model.encoder.layers.2.drop_path1
48
- vision_model.encoder.layers.2.drop_path2
49
- vision_model.encoder.layers.3
50
- vision_model.encoder.layers.3.attn
51
- vision_model.encoder.layers.3.attn.qkv
52
- vision_model.encoder.layers.3.attn.attn_drop
53
- vision_model.encoder.layers.3.attn.proj_drop
54
- vision_model.encoder.layers.3.attn.proj
55
- vision_model.encoder.layers.3.mlp
56
- vision_model.encoder.layers.3.mlp.act
57
- vision_model.encoder.layers.3.mlp.fc1
58
- vision_model.encoder.layers.3.mlp.fc2
59
- vision_model.encoder.layers.3.norm1
60
- vision_model.encoder.layers.3.norm2
61
- vision_model.encoder.layers.3.drop_path1
62
- vision_model.encoder.layers.3.drop_path2
63
- vision_model.encoder.layers.4
64
- vision_model.encoder.layers.4.attn
65
- vision_model.encoder.layers.4.attn.qkv
66
- vision_model.encoder.layers.4.attn.attn_drop
67
- vision_model.encoder.layers.4.attn.proj_drop
68
- vision_model.encoder.layers.4.attn.proj
69
- vision_model.encoder.layers.4.mlp
70
- vision_model.encoder.layers.4.mlp.act
71
- vision_model.encoder.layers.4.mlp.fc1
72
- vision_model.encoder.layers.4.mlp.fc2
73
- vision_model.encoder.layers.4.norm1
74
- vision_model.encoder.layers.4.norm2
75
- vision_model.encoder.layers.4.drop_path1
76
- vision_model.encoder.layers.4.drop_path2
77
- vision_model.encoder.layers.5
78
- vision_model.encoder.layers.5.attn
79
- vision_model.encoder.layers.5.attn.qkv
80
- vision_model.encoder.layers.5.attn.attn_drop
81
- vision_model.encoder.layers.5.attn.proj_drop
82
- vision_model.encoder.layers.5.attn.proj
83
- vision_model.encoder.layers.5.mlp
84
- vision_model.encoder.layers.5.mlp.act
85
- vision_model.encoder.layers.5.mlp.fc1
86
- vision_model.encoder.layers.5.mlp.fc2
87
- vision_model.encoder.layers.5.norm1
88
- vision_model.encoder.layers.5.norm2
89
- vision_model.encoder.layers.5.drop_path1
90
- vision_model.encoder.layers.5.drop_path2
91
- vision_model.encoder.layers.6
92
- vision_model.encoder.layers.6.attn
93
- vision_model.encoder.layers.6.attn.qkv
94
- vision_model.encoder.layers.6.attn.attn_drop
95
- vision_model.encoder.layers.6.attn.proj_drop
96
- vision_model.encoder.layers.6.attn.proj
97
- vision_model.encoder.layers.6.mlp
98
- vision_model.encoder.layers.6.mlp.act
99
- vision_model.encoder.layers.6.mlp.fc1
100
- vision_model.encoder.layers.6.mlp.fc2
101
- vision_model.encoder.layers.6.norm1
102
- vision_model.encoder.layers.6.norm2
103
- vision_model.encoder.layers.6.drop_path1
104
- vision_model.encoder.layers.6.drop_path2
105
- vision_model.encoder.layers.7
106
- vision_model.encoder.layers.7.attn
107
- vision_model.encoder.layers.7.attn.qkv
108
- vision_model.encoder.layers.7.attn.attn_drop
109
- vision_model.encoder.layers.7.attn.proj_drop
110
- vision_model.encoder.layers.7.attn.proj
111
- vision_model.encoder.layers.7.mlp
112
- vision_model.encoder.layers.7.mlp.act
113
- vision_model.encoder.layers.7.mlp.fc1
114
- vision_model.encoder.layers.7.mlp.fc2
115
- vision_model.encoder.layers.7.norm1
116
- vision_model.encoder.layers.7.norm2
117
- vision_model.encoder.layers.7.drop_path1
118
- vision_model.encoder.layers.7.drop_path2
119
- vision_model.encoder.layers.8
120
- vision_model.encoder.layers.8.attn
121
- vision_model.encoder.layers.8.attn.qkv
122
- vision_model.encoder.layers.8.attn.attn_drop
123
- vision_model.encoder.layers.8.attn.proj_drop
124
- vision_model.encoder.layers.8.attn.proj
125
- vision_model.encoder.layers.8.mlp
126
- vision_model.encoder.layers.8.mlp.act
127
- vision_model.encoder.layers.8.mlp.fc1
128
- vision_model.encoder.layers.8.mlp.fc2
129
- vision_model.encoder.layers.8.norm1
130
- vision_model.encoder.layers.8.norm2
131
- vision_model.encoder.layers.8.drop_path1
132
- vision_model.encoder.layers.8.drop_path2
133
- vision_model.encoder.layers.9
134
- vision_model.encoder.layers.9.attn
135
- vision_model.encoder.layers.9.attn.qkv
136
- vision_model.encoder.layers.9.attn.attn_drop
137
- vision_model.encoder.layers.9.attn.proj_drop
138
- vision_model.encoder.layers.9.attn.proj
139
- vision_model.encoder.layers.9.mlp
140
- vision_model.encoder.layers.9.mlp.act
141
- vision_model.encoder.layers.9.mlp.fc1
142
- vision_model.encoder.layers.9.mlp.fc2
143
- vision_model.encoder.layers.9.norm1
144
- vision_model.encoder.layers.9.norm2
145
- vision_model.encoder.layers.9.drop_path1
146
- vision_model.encoder.layers.9.drop_path2
147
- vision_model.encoder.layers.10
148
- vision_model.encoder.layers.10.attn
149
- vision_model.encoder.layers.10.attn.qkv
150
- vision_model.encoder.layers.10.attn.attn_drop
151
- vision_model.encoder.layers.10.attn.proj_drop
152
- vision_model.encoder.layers.10.attn.proj
153
- vision_model.encoder.layers.10.mlp
154
- vision_model.encoder.layers.10.mlp.act
155
- vision_model.encoder.layers.10.mlp.fc1
156
- vision_model.encoder.layers.10.mlp.fc2
157
- vision_model.encoder.layers.10.norm1
158
- vision_model.encoder.layers.10.norm2
159
- vision_model.encoder.layers.10.drop_path1
160
- vision_model.encoder.layers.10.drop_path2
161
- vision_model.encoder.layers.11
162
- vision_model.encoder.layers.11.attn
163
- vision_model.encoder.layers.11.attn.qkv
164
- vision_model.encoder.layers.11.attn.attn_drop
165
- vision_model.encoder.layers.11.attn.proj_drop
166
- vision_model.encoder.layers.11.attn.proj
167
- vision_model.encoder.layers.11.mlp
168
- vision_model.encoder.layers.11.mlp.act
169
- vision_model.encoder.layers.11.mlp.fc1
170
- vision_model.encoder.layers.11.mlp.fc2
171
- vision_model.encoder.layers.11.norm1
172
- vision_model.encoder.layers.11.norm2
173
- vision_model.encoder.layers.11.drop_path1
174
- vision_model.encoder.layers.11.drop_path2
175
- vision_model.encoder.layers.12
176
- vision_model.encoder.layers.12.attn
177
- vision_model.encoder.layers.12.attn.qkv
178
- vision_model.encoder.layers.12.attn.attn_drop
179
- vision_model.encoder.layers.12.attn.proj_drop
180
- vision_model.encoder.layers.12.attn.proj
181
- vision_model.encoder.layers.12.mlp
182
- vision_model.encoder.layers.12.mlp.act
183
- vision_model.encoder.layers.12.mlp.fc1
184
- vision_model.encoder.layers.12.mlp.fc2
185
- vision_model.encoder.layers.12.norm1
186
- vision_model.encoder.layers.12.norm2
187
- vision_model.encoder.layers.12.drop_path1
188
- vision_model.encoder.layers.12.drop_path2
189
- vision_model.encoder.layers.13
190
- vision_model.encoder.layers.13.attn
191
- vision_model.encoder.layers.13.attn.qkv
192
- vision_model.encoder.layers.13.attn.attn_drop
193
- vision_model.encoder.layers.13.attn.proj_drop
194
- vision_model.encoder.layers.13.attn.proj
195
- vision_model.encoder.layers.13.mlp
196
- vision_model.encoder.layers.13.mlp.act
197
- vision_model.encoder.layers.13.mlp.fc1
198
- vision_model.encoder.layers.13.mlp.fc2
199
- vision_model.encoder.layers.13.norm1
200
- vision_model.encoder.layers.13.norm2
201
- vision_model.encoder.layers.13.drop_path1
202
- vision_model.encoder.layers.13.drop_path2
203
- vision_model.encoder.layers.14
204
- vision_model.encoder.layers.14.attn
205
- vision_model.encoder.layers.14.attn.qkv
206
- vision_model.encoder.layers.14.attn.attn_drop
207
- vision_model.encoder.layers.14.attn.proj_drop
208
- vision_model.encoder.layers.14.attn.proj
209
- vision_model.encoder.layers.14.mlp
210
- vision_model.encoder.layers.14.mlp.act
211
- vision_model.encoder.layers.14.mlp.fc1
212
- vision_model.encoder.layers.14.mlp.fc2
213
- vision_model.encoder.layers.14.norm1
214
- vision_model.encoder.layers.14.norm2
215
- vision_model.encoder.layers.14.drop_path1
216
- vision_model.encoder.layers.14.drop_path2
217
- vision_model.encoder.layers.15
218
- vision_model.encoder.layers.15.attn
219
- vision_model.encoder.layers.15.attn.qkv
220
- vision_model.encoder.layers.15.attn.attn_drop
221
- vision_model.encoder.layers.15.attn.proj_drop
222
- vision_model.encoder.layers.15.attn.proj
223
- vision_model.encoder.layers.15.mlp
224
- vision_model.encoder.layers.15.mlp.act
225
- vision_model.encoder.layers.15.mlp.fc1
226
- vision_model.encoder.layers.15.mlp.fc2
227
- vision_model.encoder.layers.15.norm1
228
- vision_model.encoder.layers.15.norm2
229
- vision_model.encoder.layers.15.drop_path1
230
- vision_model.encoder.layers.15.drop_path2
231
- vision_model.encoder.layers.16
232
- vision_model.encoder.layers.16.attn
233
- vision_model.encoder.layers.16.attn.qkv
234
- vision_model.encoder.layers.16.attn.attn_drop
235
- vision_model.encoder.layers.16.attn.proj_drop
236
- vision_model.encoder.layers.16.attn.proj
237
- vision_model.encoder.layers.16.mlp
238
- vision_model.encoder.layers.16.mlp.act
239
- vision_model.encoder.layers.16.mlp.fc1
240
- vision_model.encoder.layers.16.mlp.fc2
241
- vision_model.encoder.layers.16.norm1
242
- vision_model.encoder.layers.16.norm2
243
- vision_model.encoder.layers.16.drop_path1
244
- vision_model.encoder.layers.16.drop_path2
245
- vision_model.encoder.layers.17
246
- vision_model.encoder.layers.17.attn
247
- vision_model.encoder.layers.17.attn.qkv
248
- vision_model.encoder.layers.17.attn.attn_drop
249
- vision_model.encoder.layers.17.attn.proj_drop
250
- vision_model.encoder.layers.17.attn.proj
251
- vision_model.encoder.layers.17.mlp
252
- vision_model.encoder.layers.17.mlp.act
253
- vision_model.encoder.layers.17.mlp.fc1
254
- vision_model.encoder.layers.17.mlp.fc2
255
- vision_model.encoder.layers.17.norm1
256
- vision_model.encoder.layers.17.norm2
257
- vision_model.encoder.layers.17.drop_path1
258
- vision_model.encoder.layers.17.drop_path2
259
- vision_model.encoder.layers.18
260
- vision_model.encoder.layers.18.attn
261
- vision_model.encoder.layers.18.attn.qkv
262
- vision_model.encoder.layers.18.attn.attn_drop
263
- vision_model.encoder.layers.18.attn.proj_drop
264
- vision_model.encoder.layers.18.attn.proj
265
- vision_model.encoder.layers.18.mlp
266
- vision_model.encoder.layers.18.mlp.act
267
- vision_model.encoder.layers.18.mlp.fc1
268
- vision_model.encoder.layers.18.mlp.fc2
269
- vision_model.encoder.layers.18.norm1
270
- vision_model.encoder.layers.18.norm2
271
- vision_model.encoder.layers.18.drop_path1
272
- vision_model.encoder.layers.18.drop_path2
273
- vision_model.encoder.layers.19
274
- vision_model.encoder.layers.19.attn
275
- vision_model.encoder.layers.19.attn.qkv
276
- vision_model.encoder.layers.19.attn.attn_drop
277
- vision_model.encoder.layers.19.attn.proj_drop
278
- vision_model.encoder.layers.19.attn.proj
279
- vision_model.encoder.layers.19.mlp
280
- vision_model.encoder.layers.19.mlp.act
281
- vision_model.encoder.layers.19.mlp.fc1
282
- vision_model.encoder.layers.19.mlp.fc2
283
- vision_model.encoder.layers.19.norm1
284
- vision_model.encoder.layers.19.norm2
285
- vision_model.encoder.layers.19.drop_path1
286
- vision_model.encoder.layers.19.drop_path2
287
- vision_model.encoder.layers.20
288
- vision_model.encoder.layers.20.attn
289
- vision_model.encoder.layers.20.attn.qkv
290
- vision_model.encoder.layers.20.attn.attn_drop
291
- vision_model.encoder.layers.20.attn.proj_drop
292
- vision_model.encoder.layers.20.attn.proj
293
- vision_model.encoder.layers.20.mlp
294
- vision_model.encoder.layers.20.mlp.act
295
- vision_model.encoder.layers.20.mlp.fc1
296
- vision_model.encoder.layers.20.mlp.fc2
297
- vision_model.encoder.layers.20.norm1
298
- vision_model.encoder.layers.20.norm2
299
- vision_model.encoder.layers.20.drop_path1
300
- vision_model.encoder.layers.20.drop_path2
301
- vision_model.encoder.layers.21
302
- vision_model.encoder.layers.21.attn
303
- vision_model.encoder.layers.21.attn.qkv
304
- vision_model.encoder.layers.21.attn.attn_drop
305
- vision_model.encoder.layers.21.attn.proj_drop
306
- vision_model.encoder.layers.21.attn.proj
307
- vision_model.encoder.layers.21.mlp
308
- vision_model.encoder.layers.21.mlp.act
309
- vision_model.encoder.layers.21.mlp.fc1
310
- vision_model.encoder.layers.21.mlp.fc2
311
- vision_model.encoder.layers.21.norm1
312
- vision_model.encoder.layers.21.norm2
313
- vision_model.encoder.layers.21.drop_path1
314
- vision_model.encoder.layers.21.drop_path2
315
- vision_model.encoder.layers.22
316
- vision_model.encoder.layers.22.attn
317
- vision_model.encoder.layers.22.attn.qkv
318
- vision_model.encoder.layers.22.attn.attn_drop
319
- vision_model.encoder.layers.22.attn.proj_drop
320
- vision_model.encoder.layers.22.attn.proj
321
- vision_model.encoder.layers.22.mlp
322
- vision_model.encoder.layers.22.mlp.act
323
- vision_model.encoder.layers.22.mlp.fc1
324
- vision_model.encoder.layers.22.mlp.fc2
325
- vision_model.encoder.layers.22.norm1
326
- vision_model.encoder.layers.22.norm2
327
- vision_model.encoder.layers.22.drop_path1
328
- vision_model.encoder.layers.22.drop_path2
329
- vision_model.encoder.layers.23
330
- vision_model.encoder.layers.23.attn
331
- vision_model.encoder.layers.23.attn.qkv
332
- vision_model.encoder.layers.23.attn.attn_drop
333
- vision_model.encoder.layers.23.attn.proj_drop
334
- vision_model.encoder.layers.23.attn.proj
335
- vision_model.encoder.layers.23.mlp
336
- vision_model.encoder.layers.23.mlp.act
337
- vision_model.encoder.layers.23.mlp.fc1
338
- vision_model.encoder.layers.23.mlp.fc2
339
- vision_model.encoder.layers.23.norm1
340
- vision_model.encoder.layers.23.norm2
341
- vision_model.encoder.layers.23.drop_path1
342
- vision_model.encoder.layers.23.drop_path2
343
- language_model
344
- language_model.model
345
- language_model.model.tok_embeddings
346
- language_model.model.layers
347
- language_model.model.layers.0
348
- language_model.model.layers.0.attention
349
- language_model.model.layers.0.attention.wqkv
350
- language_model.model.layers.0.attention.wo
351
- language_model.model.layers.0.attention.rotary_emb
352
- language_model.model.layers.0.feed_forward
353
- language_model.model.layers.0.feed_forward.w1
354
- language_model.model.layers.0.feed_forward.w3
355
- language_model.model.layers.0.feed_forward.w2
356
- language_model.model.layers.0.feed_forward.act_fn
357
- language_model.model.layers.0.attention_norm
358
- language_model.model.layers.0.ffn_norm
359
- language_model.model.layers.1
360
- language_model.model.layers.1.attention
361
- language_model.model.layers.1.attention.wqkv
362
- language_model.model.layers.1.attention.wo
363
- language_model.model.layers.1.attention.rotary_emb
364
- language_model.model.layers.1.feed_forward
365
- language_model.model.layers.1.feed_forward.w1
366
- language_model.model.layers.1.feed_forward.w3
367
- language_model.model.layers.1.feed_forward.w2
368
- language_model.model.layers.1.feed_forward.act_fn
369
- language_model.model.layers.1.attention_norm
370
- language_model.model.layers.1.ffn_norm
371
- language_model.model.layers.2
372
- language_model.model.layers.2.attention
373
- language_model.model.layers.2.attention.wqkv
374
- language_model.model.layers.2.attention.wo
375
- language_model.model.layers.2.attention.rotary_emb
376
- language_model.model.layers.2.feed_forward
377
- language_model.model.layers.2.feed_forward.w1
378
- language_model.model.layers.2.feed_forward.w3
379
- language_model.model.layers.2.feed_forward.w2
380
- language_model.model.layers.2.feed_forward.act_fn
381
- language_model.model.layers.2.attention_norm
382
- language_model.model.layers.2.ffn_norm
383
- language_model.model.layers.3
384
- language_model.model.layers.3.attention
385
- language_model.model.layers.3.attention.wqkv
386
- language_model.model.layers.3.attention.wo
387
- language_model.model.layers.3.attention.rotary_emb
388
- language_model.model.layers.3.feed_forward
389
- language_model.model.layers.3.feed_forward.w1
390
- language_model.model.layers.3.feed_forward.w3
391
- language_model.model.layers.3.feed_forward.w2
392
- language_model.model.layers.3.feed_forward.act_fn
393
- language_model.model.layers.3.attention_norm
394
- language_model.model.layers.3.ffn_norm
395
- language_model.model.layers.4
396
- language_model.model.layers.4.attention
397
- language_model.model.layers.4.attention.wqkv
398
- language_model.model.layers.4.attention.wo
399
- language_model.model.layers.4.attention.rotary_emb
400
- language_model.model.layers.4.feed_forward
401
- language_model.model.layers.4.feed_forward.w1
402
- language_model.model.layers.4.feed_forward.w3
403
- language_model.model.layers.4.feed_forward.w2
404
- language_model.model.layers.4.feed_forward.act_fn
405
- language_model.model.layers.4.attention_norm
406
- language_model.model.layers.4.ffn_norm
407
- language_model.model.layers.5
408
- language_model.model.layers.5.attention
409
- language_model.model.layers.5.attention.wqkv
410
- language_model.model.layers.5.attention.wo
411
- language_model.model.layers.5.attention.rotary_emb
412
- language_model.model.layers.5.feed_forward
413
- language_model.model.layers.5.feed_forward.w1
414
- language_model.model.layers.5.feed_forward.w3
415
- language_model.model.layers.5.feed_forward.w2
416
- language_model.model.layers.5.feed_forward.act_fn
417
- language_model.model.layers.5.attention_norm
418
- language_model.model.layers.5.ffn_norm
419
- language_model.model.layers.6
420
- language_model.model.layers.6.attention
421
- language_model.model.layers.6.attention.wqkv
422
- language_model.model.layers.6.attention.wo
423
- language_model.model.layers.6.attention.rotary_emb
424
- language_model.model.layers.6.feed_forward
425
- language_model.model.layers.6.feed_forward.w1
426
- language_model.model.layers.6.feed_forward.w3
427
- language_model.model.layers.6.feed_forward.w2
428
- language_model.model.layers.6.feed_forward.act_fn
429
- language_model.model.layers.6.attention_norm
430
- language_model.model.layers.6.ffn_norm
431
- language_model.model.layers.7
432
- language_model.model.layers.7.attention
433
- language_model.model.layers.7.attention.wqkv
434
- language_model.model.layers.7.attention.wo
435
- language_model.model.layers.7.attention.rotary_emb
436
- language_model.model.layers.7.feed_forward
437
- language_model.model.layers.7.feed_forward.w1
438
- language_model.model.layers.7.feed_forward.w3
439
- language_model.model.layers.7.feed_forward.w2
440
- language_model.model.layers.7.feed_forward.act_fn
441
- language_model.model.layers.7.attention_norm
442
- language_model.model.layers.7.ffn_norm
443
- language_model.model.layers.8
444
- language_model.model.layers.8.attention
445
- language_model.model.layers.8.attention.wqkv
446
- language_model.model.layers.8.attention.wo
447
- language_model.model.layers.8.attention.rotary_emb
448
- language_model.model.layers.8.feed_forward
449
- language_model.model.layers.8.feed_forward.w1
450
- language_model.model.layers.8.feed_forward.w3
451
- language_model.model.layers.8.feed_forward.w2
452
- language_model.model.layers.8.feed_forward.act_fn
453
- language_model.model.layers.8.attention_norm
454
- language_model.model.layers.8.ffn_norm
455
- language_model.model.layers.9
456
- language_model.model.layers.9.attention
457
- language_model.model.layers.9.attention.wqkv
458
- language_model.model.layers.9.attention.wo
459
- language_model.model.layers.9.attention.rotary_emb
460
- language_model.model.layers.9.feed_forward
461
- language_model.model.layers.9.feed_forward.w1
462
- language_model.model.layers.9.feed_forward.w3
463
- language_model.model.layers.9.feed_forward.w2
464
- language_model.model.layers.9.feed_forward.act_fn
465
- language_model.model.layers.9.attention_norm
466
- language_model.model.layers.9.ffn_norm
467
- language_model.model.layers.10
468
- language_model.model.layers.10.attention
469
- language_model.model.layers.10.attention.wqkv
470
- language_model.model.layers.10.attention.wo
471
- language_model.model.layers.10.attention.rotary_emb
472
- language_model.model.layers.10.feed_forward
473
- language_model.model.layers.10.feed_forward.w1
474
- language_model.model.layers.10.feed_forward.w3
475
- language_model.model.layers.10.feed_forward.w2
476
- language_model.model.layers.10.feed_forward.act_fn
477
- language_model.model.layers.10.attention_norm
478
- language_model.model.layers.10.ffn_norm
479
- language_model.model.layers.11
480
- language_model.model.layers.11.attention
481
- language_model.model.layers.11.attention.wqkv
482
- language_model.model.layers.11.attention.wo
483
- language_model.model.layers.11.attention.rotary_emb
484
- language_model.model.layers.11.feed_forward
485
- language_model.model.layers.11.feed_forward.w1
486
- language_model.model.layers.11.feed_forward.w3
487
- language_model.model.layers.11.feed_forward.w2
488
- language_model.model.layers.11.feed_forward.act_fn
489
- language_model.model.layers.11.attention_norm
490
- language_model.model.layers.11.ffn_norm
491
- language_model.model.layers.12
492
- language_model.model.layers.12.attention
493
- language_model.model.layers.12.attention.wqkv
494
- language_model.model.layers.12.attention.wo
495
- language_model.model.layers.12.attention.rotary_emb
496
- language_model.model.layers.12.feed_forward
497
- language_model.model.layers.12.feed_forward.w1
498
- language_model.model.layers.12.feed_forward.w3
499
- language_model.model.layers.12.feed_forward.w2
500
- language_model.model.layers.12.feed_forward.act_fn
501
- language_model.model.layers.12.attention_norm
502
- language_model.model.layers.12.ffn_norm
503
- language_model.model.layers.13
504
- language_model.model.layers.13.attention
505
- language_model.model.layers.13.attention.wqkv
506
- language_model.model.layers.13.attention.wo
507
- language_model.model.layers.13.attention.rotary_emb
508
- language_model.model.layers.13.feed_forward
509
- language_model.model.layers.13.feed_forward.w1
510
- language_model.model.layers.13.feed_forward.w3
511
- language_model.model.layers.13.feed_forward.w2
512
- language_model.model.layers.13.feed_forward.act_fn
513
- language_model.model.layers.13.attention_norm
514
- language_model.model.layers.13.ffn_norm
515
- language_model.model.layers.14
516
- language_model.model.layers.14.attention
517
- language_model.model.layers.14.attention.wqkv
518
- language_model.model.layers.14.attention.wo
519
- language_model.model.layers.14.attention.rotary_emb
520
- language_model.model.layers.14.feed_forward
521
- language_model.model.layers.14.feed_forward.w1
522
- language_model.model.layers.14.feed_forward.w3
523
- language_model.model.layers.14.feed_forward.w2
524
- language_model.model.layers.14.feed_forward.act_fn
525
- language_model.model.layers.14.attention_norm
526
- language_model.model.layers.14.ffn_norm
527
- language_model.model.layers.15
528
- language_model.model.layers.15.attention
529
- language_model.model.layers.15.attention.wqkv
530
- language_model.model.layers.15.attention.wo
531
- language_model.model.layers.15.attention.rotary_emb
532
- language_model.model.layers.15.feed_forward
533
- language_model.model.layers.15.feed_forward.w1
534
- language_model.model.layers.15.feed_forward.w3
535
- language_model.model.layers.15.feed_forward.w2
536
- language_model.model.layers.15.feed_forward.act_fn
537
- language_model.model.layers.15.attention_norm
538
- language_model.model.layers.15.ffn_norm
539
- language_model.model.layers.16
540
- language_model.model.layers.16.attention
541
- language_model.model.layers.16.attention.wqkv
542
- language_model.model.layers.16.attention.wo
543
- language_model.model.layers.16.attention.rotary_emb
544
- language_model.model.layers.16.feed_forward
545
- language_model.model.layers.16.feed_forward.w1
546
- language_model.model.layers.16.feed_forward.w3
547
- language_model.model.layers.16.feed_forward.w2
548
- language_model.model.layers.16.feed_forward.act_fn
549
- language_model.model.layers.16.attention_norm
550
- language_model.model.layers.16.ffn_norm
551
- language_model.model.layers.17
552
- language_model.model.layers.17.attention
553
- language_model.model.layers.17.attention.wqkv
554
- language_model.model.layers.17.attention.wo
555
- language_model.model.layers.17.attention.rotary_emb
556
- language_model.model.layers.17.feed_forward
557
- language_model.model.layers.17.feed_forward.w1
558
- language_model.model.layers.17.feed_forward.w3
559
- language_model.model.layers.17.feed_forward.w2
560
- language_model.model.layers.17.feed_forward.act_fn
561
- language_model.model.layers.17.attention_norm
562
- language_model.model.layers.17.ffn_norm
563
- language_model.model.layers.18
564
- language_model.model.layers.18.attention
565
- language_model.model.layers.18.attention.wqkv
566
- language_model.model.layers.18.attention.wo
567
- language_model.model.layers.18.attention.rotary_emb
568
- language_model.model.layers.18.feed_forward
569
- language_model.model.layers.18.feed_forward.w1
570
- language_model.model.layers.18.feed_forward.w3
571
- language_model.model.layers.18.feed_forward.w2
572
- language_model.model.layers.18.feed_forward.act_fn
573
- language_model.model.layers.18.attention_norm
574
- language_model.model.layers.18.ffn_norm
575
- language_model.model.layers.19
576
- language_model.model.layers.19.attention
577
- language_model.model.layers.19.attention.wqkv
578
- language_model.model.layers.19.attention.wo
579
- language_model.model.layers.19.attention.rotary_emb
580
- language_model.model.layers.19.feed_forward
581
- language_model.model.layers.19.feed_forward.w1
582
- language_model.model.layers.19.feed_forward.w3
583
- language_model.model.layers.19.feed_forward.w2
584
- language_model.model.layers.19.feed_forward.act_fn
585
- language_model.model.layers.19.attention_norm
586
- language_model.model.layers.19.ffn_norm
587
- language_model.model.layers.20
588
- language_model.model.layers.20.attention
589
- language_model.model.layers.20.attention.wqkv
590
- language_model.model.layers.20.attention.wo
591
- language_model.model.layers.20.attention.rotary_emb
592
- language_model.model.layers.20.feed_forward
593
- language_model.model.layers.20.feed_forward.w1
594
- language_model.model.layers.20.feed_forward.w3
595
- language_model.model.layers.20.feed_forward.w2
596
- language_model.model.layers.20.feed_forward.act_fn
597
- language_model.model.layers.20.attention_norm
598
- language_model.model.layers.20.ffn_norm
599
- language_model.model.layers.21
600
- language_model.model.layers.21.attention
601
- language_model.model.layers.21.attention.wqkv
602
- language_model.model.layers.21.attention.wo
603
- language_model.model.layers.21.attention.rotary_emb
604
- language_model.model.layers.21.feed_forward
605
- language_model.model.layers.21.feed_forward.w1
606
- language_model.model.layers.21.feed_forward.w3
607
- language_model.model.layers.21.feed_forward.w2
608
- language_model.model.layers.21.feed_forward.act_fn
609
- language_model.model.layers.21.attention_norm
610
- language_model.model.layers.21.ffn_norm
611
- language_model.model.layers.22
612
- language_model.model.layers.22.attention
613
- language_model.model.layers.22.attention.wqkv
614
- language_model.model.layers.22.attention.wo
615
- language_model.model.layers.22.attention.rotary_emb
616
- language_model.model.layers.22.feed_forward
617
- language_model.model.layers.22.feed_forward.w1
618
- language_model.model.layers.22.feed_forward.w3
619
- language_model.model.layers.22.feed_forward.w2
620
- language_model.model.layers.22.feed_forward.act_fn
621
- language_model.model.layers.22.attention_norm
622
- language_model.model.layers.22.ffn_norm
623
- language_model.model.layers.23
624
- language_model.model.layers.23.attention
625
- language_model.model.layers.23.attention.wqkv
626
- language_model.model.layers.23.attention.wo
627
- language_model.model.layers.23.attention.rotary_emb
628
- language_model.model.layers.23.feed_forward
629
- language_model.model.layers.23.feed_forward.w1
630
- language_model.model.layers.23.feed_forward.w3
631
- language_model.model.layers.23.feed_forward.w2
632
- language_model.model.layers.23.feed_forward.act_fn
633
- language_model.model.layers.23.attention_norm
634
- language_model.model.layers.23.ffn_norm
635
- language_model.model.layers.24
636
- language_model.model.layers.24.attention
637
- language_model.model.layers.24.attention.wqkv
638
- language_model.model.layers.24.attention.wo
639
- language_model.model.layers.24.attention.rotary_emb
640
- language_model.model.layers.24.feed_forward
641
- language_model.model.layers.24.feed_forward.w1
642
- language_model.model.layers.24.feed_forward.w3
643
- language_model.model.layers.24.feed_forward.w2
644
- language_model.model.layers.24.feed_forward.act_fn
645
- language_model.model.layers.24.attention_norm
646
- language_model.model.layers.24.ffn_norm
647
- language_model.model.layers.25
648
- language_model.model.layers.25.attention
649
- language_model.model.layers.25.attention.wqkv
650
- language_model.model.layers.25.attention.wo
651
- language_model.model.layers.25.attention.rotary_emb
652
- language_model.model.layers.25.feed_forward
653
- language_model.model.layers.25.feed_forward.w1
654
- language_model.model.layers.25.feed_forward.w3
655
- language_model.model.layers.25.feed_forward.w2
656
- language_model.model.layers.25.feed_forward.act_fn
657
- language_model.model.layers.25.attention_norm
658
- language_model.model.layers.25.ffn_norm
659
- language_model.model.layers.26
660
- language_model.model.layers.26.attention
661
- language_model.model.layers.26.attention.wqkv
662
- language_model.model.layers.26.attention.wo
663
- language_model.model.layers.26.attention.rotary_emb
664
- language_model.model.layers.26.feed_forward
665
- language_model.model.layers.26.feed_forward.w1
666
- language_model.model.layers.26.feed_forward.w3
667
- language_model.model.layers.26.feed_forward.w2
668
- language_model.model.layers.26.feed_forward.act_fn
669
- language_model.model.layers.26.attention_norm
670
- language_model.model.layers.26.ffn_norm
671
- language_model.model.layers.27
672
- language_model.model.layers.27.attention
673
- language_model.model.layers.27.attention.wqkv
674
- language_model.model.layers.27.attention.wo
675
- language_model.model.layers.27.attention.rotary_emb
676
- language_model.model.layers.27.feed_forward
677
- language_model.model.layers.27.feed_forward.w1
678
- language_model.model.layers.27.feed_forward.w3
679
- language_model.model.layers.27.feed_forward.w2
680
- language_model.model.layers.27.feed_forward.act_fn
681
- language_model.model.layers.27.attention_norm
682
- language_model.model.layers.27.ffn_norm
683
- language_model.model.layers.28
684
- language_model.model.layers.28.attention
685
- language_model.model.layers.28.attention.wqkv
686
- language_model.model.layers.28.attention.wo
687
- language_model.model.layers.28.attention.rotary_emb
688
- language_model.model.layers.28.feed_forward
689
- language_model.model.layers.28.feed_forward.w1
690
- language_model.model.layers.28.feed_forward.w3
691
- language_model.model.layers.28.feed_forward.w2
692
- language_model.model.layers.28.feed_forward.act_fn
693
- language_model.model.layers.28.attention_norm
694
- language_model.model.layers.28.ffn_norm
695
- language_model.model.layers.29
696
- language_model.model.layers.29.attention
697
- language_model.model.layers.29.attention.wqkv
698
- language_model.model.layers.29.attention.wo
699
- language_model.model.layers.29.attention.rotary_emb
700
- language_model.model.layers.29.feed_forward
701
- language_model.model.layers.29.feed_forward.w1
702
- language_model.model.layers.29.feed_forward.w3
703
- language_model.model.layers.29.feed_forward.w2
704
- language_model.model.layers.29.feed_forward.act_fn
705
- language_model.model.layers.29.attention_norm
706
- language_model.model.layers.29.ffn_norm
707
- language_model.model.layers.30
708
- language_model.model.layers.30.attention
709
- language_model.model.layers.30.attention.wqkv
710
- language_model.model.layers.30.attention.wo
711
- language_model.model.layers.30.attention.rotary_emb
712
- language_model.model.layers.30.feed_forward
713
- language_model.model.layers.30.feed_forward.w1
714
- language_model.model.layers.30.feed_forward.w3
715
- language_model.model.layers.30.feed_forward.w2
716
- language_model.model.layers.30.feed_forward.act_fn
717
- language_model.model.layers.30.attention_norm
718
- language_model.model.layers.30.ffn_norm
719
- language_model.model.layers.31
720
- language_model.model.layers.31.attention
721
- language_model.model.layers.31.attention.wqkv
722
- language_model.model.layers.31.attention.wo
723
- language_model.model.layers.31.attention.rotary_emb
724
- language_model.model.layers.31.feed_forward
725
- language_model.model.layers.31.feed_forward.w1
726
- language_model.model.layers.31.feed_forward.w3
727
- language_model.model.layers.31.feed_forward.w2
728
- language_model.model.layers.31.feed_forward.act_fn
729
- language_model.model.layers.31.attention_norm
730
- language_model.model.layers.31.ffn_norm
731
- language_model.model.norm
732
- language_model.output
733
- mlp1
734
- mlp1.0
735
- mlp1.1
736
- mlp1.2
737
- mlp1.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/mistralai/Pixtral-12B-2409.txt DELETED
@@ -1,782 +0,0 @@
1
-
2
- tok_embeddings
3
- vision_encoder
4
- vision_encoder.patch_conv
5
- vision_encoder.ln_pre
6
- vision_encoder.transformer
7
- vision_encoder.transformer.layers
8
- vision_encoder.transformer.layers.0
9
- vision_encoder.transformer.layers.0.attention
10
- vision_encoder.transformer.layers.0.attention.wq
11
- vision_encoder.transformer.layers.0.attention.wk
12
- vision_encoder.transformer.layers.0.attention.wv
13
- vision_encoder.transformer.layers.0.attention.wo
14
- vision_encoder.transformer.layers.0.attention_norm
15
- vision_encoder.transformer.layers.0.ffn_norm
16
- vision_encoder.transformer.layers.0.feed_forward
17
- vision_encoder.transformer.layers.0.feed_forward.w1
18
- vision_encoder.transformer.layers.0.feed_forward.w2
19
- vision_encoder.transformer.layers.0.feed_forward.w3
20
- vision_encoder.transformer.layers.1
21
- vision_encoder.transformer.layers.1.attention
22
- vision_encoder.transformer.layers.1.attention.wq
23
- vision_encoder.transformer.layers.1.attention.wk
24
- vision_encoder.transformer.layers.1.attention.wv
25
- vision_encoder.transformer.layers.1.attention.wo
26
- vision_encoder.transformer.layers.1.attention_norm
27
- vision_encoder.transformer.layers.1.ffn_norm
28
- vision_encoder.transformer.layers.1.feed_forward
29
- vision_encoder.transformer.layers.1.feed_forward.w1
30
- vision_encoder.transformer.layers.1.feed_forward.w2
31
- vision_encoder.transformer.layers.1.feed_forward.w3
32
- vision_encoder.transformer.layers.2
33
- vision_encoder.transformer.layers.2.attention
34
- vision_encoder.transformer.layers.2.attention.wq
35
- vision_encoder.transformer.layers.2.attention.wk
36
- vision_encoder.transformer.layers.2.attention.wv
37
- vision_encoder.transformer.layers.2.attention.wo
38
- vision_encoder.transformer.layers.2.attention_norm
39
- vision_encoder.transformer.layers.2.ffn_norm
40
- vision_encoder.transformer.layers.2.feed_forward
41
- vision_encoder.transformer.layers.2.feed_forward.w1
42
- vision_encoder.transformer.layers.2.feed_forward.w2
43
- vision_encoder.transformer.layers.2.feed_forward.w3
44
- vision_encoder.transformer.layers.3
45
- vision_encoder.transformer.layers.3.attention
46
- vision_encoder.transformer.layers.3.attention.wq
47
- vision_encoder.transformer.layers.3.attention.wk
48
- vision_encoder.transformer.layers.3.attention.wv
49
- vision_encoder.transformer.layers.3.attention.wo
50
- vision_encoder.transformer.layers.3.attention_norm
51
- vision_encoder.transformer.layers.3.ffn_norm
52
- vision_encoder.transformer.layers.3.feed_forward
53
- vision_encoder.transformer.layers.3.feed_forward.w1
54
- vision_encoder.transformer.layers.3.feed_forward.w2
55
- vision_encoder.transformer.layers.3.feed_forward.w3
56
- vision_encoder.transformer.layers.4
57
- vision_encoder.transformer.layers.4.attention
58
- vision_encoder.transformer.layers.4.attention.wq
59
- vision_encoder.transformer.layers.4.attention.wk
60
- vision_encoder.transformer.layers.4.attention.wv
61
- vision_encoder.transformer.layers.4.attention.wo
62
- vision_encoder.transformer.layers.4.attention_norm
63
- vision_encoder.transformer.layers.4.ffn_norm
64
- vision_encoder.transformer.layers.4.feed_forward
65
- vision_encoder.transformer.layers.4.feed_forward.w1
66
- vision_encoder.transformer.layers.4.feed_forward.w2
67
- vision_encoder.transformer.layers.4.feed_forward.w3
68
- vision_encoder.transformer.layers.5
69
- vision_encoder.transformer.layers.5.attention
70
- vision_encoder.transformer.layers.5.attention.wq
71
- vision_encoder.transformer.layers.5.attention.wk
72
- vision_encoder.transformer.layers.5.attention.wv
73
- vision_encoder.transformer.layers.5.attention.wo
74
- vision_encoder.transformer.layers.5.attention_norm
75
- vision_encoder.transformer.layers.5.ffn_norm
76
- vision_encoder.transformer.layers.5.feed_forward
77
- vision_encoder.transformer.layers.5.feed_forward.w1
78
- vision_encoder.transformer.layers.5.feed_forward.w2
79
- vision_encoder.transformer.layers.5.feed_forward.w3
80
- vision_encoder.transformer.layers.6
81
- vision_encoder.transformer.layers.6.attention
82
- vision_encoder.transformer.layers.6.attention.wq
83
- vision_encoder.transformer.layers.6.attention.wk
84
- vision_encoder.transformer.layers.6.attention.wv
85
- vision_encoder.transformer.layers.6.attention.wo
86
- vision_encoder.transformer.layers.6.attention_norm
87
- vision_encoder.transformer.layers.6.ffn_norm
88
- vision_encoder.transformer.layers.6.feed_forward
89
- vision_encoder.transformer.layers.6.feed_forward.w1
90
- vision_encoder.transformer.layers.6.feed_forward.w2
91
- vision_encoder.transformer.layers.6.feed_forward.w3
92
- vision_encoder.transformer.layers.7
93
- vision_encoder.transformer.layers.7.attention
94
- vision_encoder.transformer.layers.7.attention.wq
95
- vision_encoder.transformer.layers.7.attention.wk
96
- vision_encoder.transformer.layers.7.attention.wv
97
- vision_encoder.transformer.layers.7.attention.wo
98
- vision_encoder.transformer.layers.7.attention_norm
99
- vision_encoder.transformer.layers.7.ffn_norm
100
- vision_encoder.transformer.layers.7.feed_forward
101
- vision_encoder.transformer.layers.7.feed_forward.w1
102
- vision_encoder.transformer.layers.7.feed_forward.w2
103
- vision_encoder.transformer.layers.7.feed_forward.w3
104
- vision_encoder.transformer.layers.8
105
- vision_encoder.transformer.layers.8.attention
106
- vision_encoder.transformer.layers.8.attention.wq
107
- vision_encoder.transformer.layers.8.attention.wk
108
- vision_encoder.transformer.layers.8.attention.wv
109
- vision_encoder.transformer.layers.8.attention.wo
110
- vision_encoder.transformer.layers.8.attention_norm
111
- vision_encoder.transformer.layers.8.ffn_norm
112
- vision_encoder.transformer.layers.8.feed_forward
113
- vision_encoder.transformer.layers.8.feed_forward.w1
114
- vision_encoder.transformer.layers.8.feed_forward.w2
115
- vision_encoder.transformer.layers.8.feed_forward.w3
116
- vision_encoder.transformer.layers.9
117
- vision_encoder.transformer.layers.9.attention
118
- vision_encoder.transformer.layers.9.attention.wq
119
- vision_encoder.transformer.layers.9.attention.wk
120
- vision_encoder.transformer.layers.9.attention.wv
121
- vision_encoder.transformer.layers.9.attention.wo
122
- vision_encoder.transformer.layers.9.attention_norm
123
- vision_encoder.transformer.layers.9.ffn_norm
124
- vision_encoder.transformer.layers.9.feed_forward
125
- vision_encoder.transformer.layers.9.feed_forward.w1
126
- vision_encoder.transformer.layers.9.feed_forward.w2
127
- vision_encoder.transformer.layers.9.feed_forward.w3
128
- vision_encoder.transformer.layers.10
129
- vision_encoder.transformer.layers.10.attention
130
- vision_encoder.transformer.layers.10.attention.wq
131
- vision_encoder.transformer.layers.10.attention.wk
132
- vision_encoder.transformer.layers.10.attention.wv
133
- vision_encoder.transformer.layers.10.attention.wo
134
- vision_encoder.transformer.layers.10.attention_norm
135
- vision_encoder.transformer.layers.10.ffn_norm
136
- vision_encoder.transformer.layers.10.feed_forward
137
- vision_encoder.transformer.layers.10.feed_forward.w1
138
- vision_encoder.transformer.layers.10.feed_forward.w2
139
- vision_encoder.transformer.layers.10.feed_forward.w3
140
- vision_encoder.transformer.layers.11
141
- vision_encoder.transformer.layers.11.attention
142
- vision_encoder.transformer.layers.11.attention.wq
143
- vision_encoder.transformer.layers.11.attention.wk
144
- vision_encoder.transformer.layers.11.attention.wv
145
- vision_encoder.transformer.layers.11.attention.wo
146
- vision_encoder.transformer.layers.11.attention_norm
147
- vision_encoder.transformer.layers.11.ffn_norm
148
- vision_encoder.transformer.layers.11.feed_forward
149
- vision_encoder.transformer.layers.11.feed_forward.w1
150
- vision_encoder.transformer.layers.11.feed_forward.w2
151
- vision_encoder.transformer.layers.11.feed_forward.w3
152
- vision_encoder.transformer.layers.12
153
- vision_encoder.transformer.layers.12.attention
154
- vision_encoder.transformer.layers.12.attention.wq
155
- vision_encoder.transformer.layers.12.attention.wk
156
- vision_encoder.transformer.layers.12.attention.wv
157
- vision_encoder.transformer.layers.12.attention.wo
158
- vision_encoder.transformer.layers.12.attention_norm
159
- vision_encoder.transformer.layers.12.ffn_norm
160
- vision_encoder.transformer.layers.12.feed_forward
161
- vision_encoder.transformer.layers.12.feed_forward.w1
162
- vision_encoder.transformer.layers.12.feed_forward.w2
163
- vision_encoder.transformer.layers.12.feed_forward.w3
164
- vision_encoder.transformer.layers.13
165
- vision_encoder.transformer.layers.13.attention
166
- vision_encoder.transformer.layers.13.attention.wq
167
- vision_encoder.transformer.layers.13.attention.wk
168
- vision_encoder.transformer.layers.13.attention.wv
169
- vision_encoder.transformer.layers.13.attention.wo
170
- vision_encoder.transformer.layers.13.attention_norm
171
- vision_encoder.transformer.layers.13.ffn_norm
172
- vision_encoder.transformer.layers.13.feed_forward
173
- vision_encoder.transformer.layers.13.feed_forward.w1
174
- vision_encoder.transformer.layers.13.feed_forward.w2
175
- vision_encoder.transformer.layers.13.feed_forward.w3
176
- vision_encoder.transformer.layers.14
177
- vision_encoder.transformer.layers.14.attention
178
- vision_encoder.transformer.layers.14.attention.wq
179
- vision_encoder.transformer.layers.14.attention.wk
180
- vision_encoder.transformer.layers.14.attention.wv
181
- vision_encoder.transformer.layers.14.attention.wo
182
- vision_encoder.transformer.layers.14.attention_norm
183
- vision_encoder.transformer.layers.14.ffn_norm
184
- vision_encoder.transformer.layers.14.feed_forward
185
- vision_encoder.transformer.layers.14.feed_forward.w1
186
- vision_encoder.transformer.layers.14.feed_forward.w2
187
- vision_encoder.transformer.layers.14.feed_forward.w3
188
- vision_encoder.transformer.layers.15
189
- vision_encoder.transformer.layers.15.attention
190
- vision_encoder.transformer.layers.15.attention.wq
191
- vision_encoder.transformer.layers.15.attention.wk
192
- vision_encoder.transformer.layers.15.attention.wv
193
- vision_encoder.transformer.layers.15.attention.wo
194
- vision_encoder.transformer.layers.15.attention_norm
195
- vision_encoder.transformer.layers.15.ffn_norm
196
- vision_encoder.transformer.layers.15.feed_forward
197
- vision_encoder.transformer.layers.15.feed_forward.w1
198
- vision_encoder.transformer.layers.15.feed_forward.w2
199
- vision_encoder.transformer.layers.15.feed_forward.w3
200
- vision_encoder.transformer.layers.16
201
- vision_encoder.transformer.layers.16.attention
202
- vision_encoder.transformer.layers.16.attention.wq
203
- vision_encoder.transformer.layers.16.attention.wk
204
- vision_encoder.transformer.layers.16.attention.wv
205
- vision_encoder.transformer.layers.16.attention.wo
206
- vision_encoder.transformer.layers.16.attention_norm
207
- vision_encoder.transformer.layers.16.ffn_norm
208
- vision_encoder.transformer.layers.16.feed_forward
209
- vision_encoder.transformer.layers.16.feed_forward.w1
210
- vision_encoder.transformer.layers.16.feed_forward.w2
211
- vision_encoder.transformer.layers.16.feed_forward.w3
212
- vision_encoder.transformer.layers.17
213
- vision_encoder.transformer.layers.17.attention
214
- vision_encoder.transformer.layers.17.attention.wq
215
- vision_encoder.transformer.layers.17.attention.wk
216
- vision_encoder.transformer.layers.17.attention.wv
217
- vision_encoder.transformer.layers.17.attention.wo
218
- vision_encoder.transformer.layers.17.attention_norm
219
- vision_encoder.transformer.layers.17.ffn_norm
220
- vision_encoder.transformer.layers.17.feed_forward
221
- vision_encoder.transformer.layers.17.feed_forward.w1
222
- vision_encoder.transformer.layers.17.feed_forward.w2
223
- vision_encoder.transformer.layers.17.feed_forward.w3
224
- vision_encoder.transformer.layers.18
225
- vision_encoder.transformer.layers.18.attention
226
- vision_encoder.transformer.layers.18.attention.wq
227
- vision_encoder.transformer.layers.18.attention.wk
228
- vision_encoder.transformer.layers.18.attention.wv
229
- vision_encoder.transformer.layers.18.attention.wo
230
- vision_encoder.transformer.layers.18.attention_norm
231
- vision_encoder.transformer.layers.18.ffn_norm
232
- vision_encoder.transformer.layers.18.feed_forward
233
- vision_encoder.transformer.layers.18.feed_forward.w1
234
- vision_encoder.transformer.layers.18.feed_forward.w2
235
- vision_encoder.transformer.layers.18.feed_forward.w3
236
- vision_encoder.transformer.layers.19
237
- vision_encoder.transformer.layers.19.attention
238
- vision_encoder.transformer.layers.19.attention.wq
239
- vision_encoder.transformer.layers.19.attention.wk
240
- vision_encoder.transformer.layers.19.attention.wv
241
- vision_encoder.transformer.layers.19.attention.wo
242
- vision_encoder.transformer.layers.19.attention_norm
243
- vision_encoder.transformer.layers.19.ffn_norm
244
- vision_encoder.transformer.layers.19.feed_forward
245
- vision_encoder.transformer.layers.19.feed_forward.w1
246
- vision_encoder.transformer.layers.19.feed_forward.w2
247
- vision_encoder.transformer.layers.19.feed_forward.w3
248
- vision_encoder.transformer.layers.20
249
- vision_encoder.transformer.layers.20.attention
250
- vision_encoder.transformer.layers.20.attention.wq
251
- vision_encoder.transformer.layers.20.attention.wk
252
- vision_encoder.transformer.layers.20.attention.wv
253
- vision_encoder.transformer.layers.20.attention.wo
254
- vision_encoder.transformer.layers.20.attention_norm
255
- vision_encoder.transformer.layers.20.ffn_norm
256
- vision_encoder.transformer.layers.20.feed_forward
257
- vision_encoder.transformer.layers.20.feed_forward.w1
258
- vision_encoder.transformer.layers.20.feed_forward.w2
259
- vision_encoder.transformer.layers.20.feed_forward.w3
260
- vision_encoder.transformer.layers.21
261
- vision_encoder.transformer.layers.21.attention
262
- vision_encoder.transformer.layers.21.attention.wq
263
- vision_encoder.transformer.layers.21.attention.wk
264
- vision_encoder.transformer.layers.21.attention.wv
265
- vision_encoder.transformer.layers.21.attention.wo
266
- vision_encoder.transformer.layers.21.attention_norm
267
- vision_encoder.transformer.layers.21.ffn_norm
268
- vision_encoder.transformer.layers.21.feed_forward
269
- vision_encoder.transformer.layers.21.feed_forward.w1
270
- vision_encoder.transformer.layers.21.feed_forward.w2
271
- vision_encoder.transformer.layers.21.feed_forward.w3
272
- vision_encoder.transformer.layers.22
273
- vision_encoder.transformer.layers.22.attention
274
- vision_encoder.transformer.layers.22.attention.wq
275
- vision_encoder.transformer.layers.22.attention.wk
276
- vision_encoder.transformer.layers.22.attention.wv
277
- vision_encoder.transformer.layers.22.attention.wo
278
- vision_encoder.transformer.layers.22.attention_norm
279
- vision_encoder.transformer.layers.22.ffn_norm
280
- vision_encoder.transformer.layers.22.feed_forward
281
- vision_encoder.transformer.layers.22.feed_forward.w1
282
- vision_encoder.transformer.layers.22.feed_forward.w2
283
- vision_encoder.transformer.layers.22.feed_forward.w3
284
- vision_encoder.transformer.layers.23
285
- vision_encoder.transformer.layers.23.attention
286
- vision_encoder.transformer.layers.23.attention.wq
287
- vision_encoder.transformer.layers.23.attention.wk
288
- vision_encoder.transformer.layers.23.attention.wv
289
- vision_encoder.transformer.layers.23.attention.wo
290
- vision_encoder.transformer.layers.23.attention_norm
291
- vision_encoder.transformer.layers.23.ffn_norm
292
- vision_encoder.transformer.layers.23.feed_forward
293
- vision_encoder.transformer.layers.23.feed_forward.w1
294
- vision_encoder.transformer.layers.23.feed_forward.w2
295
- vision_encoder.transformer.layers.23.feed_forward.w3
296
- vision_language_adapter
297
- vision_language_adapter.w_in
298
- vision_language_adapter.gelu
299
- vision_language_adapter.w_out
300
- norm
301
- output
302
- layers
303
- layers.0
304
- layers.0.attention
305
- layers.0.attention.wq
306
- layers.0.attention.wk
307
- layers.0.attention.wv
308
- layers.0.attention.wo
309
- layers.0.attention_norm
310
- layers.0.ffn_norm
311
- layers.0.feed_forward
312
- layers.0.feed_forward.w1
313
- layers.0.feed_forward.w2
314
- layers.0.feed_forward.w3
315
- layers.1
316
- layers.1.attention
317
- layers.1.attention.wq
318
- layers.1.attention.wk
319
- layers.1.attention.wv
320
- layers.1.attention.wo
321
- layers.1.attention_norm
322
- layers.1.ffn_norm
323
- layers.1.feed_forward
324
- layers.1.feed_forward.w1
325
- layers.1.feed_forward.w2
326
- layers.1.feed_forward.w3
327
- layers.2
328
- layers.2.attention
329
- layers.2.attention.wq
330
- layers.2.attention.wk
331
- layers.2.attention.wv
332
- layers.2.attention.wo
333
- layers.2.attention_norm
334
- layers.2.ffn_norm
335
- layers.2.feed_forward
336
- layers.2.feed_forward.w1
337
- layers.2.feed_forward.w2
338
- layers.2.feed_forward.w3
339
- layers.3
340
- layers.3.attention
341
- layers.3.attention.wq
342
- layers.3.attention.wk
343
- layers.3.attention.wv
344
- layers.3.attention.wo
345
- layers.3.attention_norm
346
- layers.3.ffn_norm
347
- layers.3.feed_forward
348
- layers.3.feed_forward.w1
349
- layers.3.feed_forward.w2
350
- layers.3.feed_forward.w3
351
- layers.4
352
- layers.4.attention
353
- layers.4.attention.wq
354
- layers.4.attention.wk
355
- layers.4.attention.wv
356
- layers.4.attention.wo
357
- layers.4.attention_norm
358
- layers.4.ffn_norm
359
- layers.4.feed_forward
360
- layers.4.feed_forward.w1
361
- layers.4.feed_forward.w2
362
- layers.4.feed_forward.w3
363
- layers.5
364
- layers.5.attention
365
- layers.5.attention.wq
366
- layers.5.attention.wk
367
- layers.5.attention.wv
368
- layers.5.attention.wo
369
- layers.5.attention_norm
370
- layers.5.ffn_norm
371
- layers.5.feed_forward
372
- layers.5.feed_forward.w1
373
- layers.5.feed_forward.w2
374
- layers.5.feed_forward.w3
375
- layers.6
376
- layers.6.attention
377
- layers.6.attention.wq
378
- layers.6.attention.wk
379
- layers.6.attention.wv
380
- layers.6.attention.wo
381
- layers.6.attention_norm
382
- layers.6.ffn_norm
383
- layers.6.feed_forward
384
- layers.6.feed_forward.w1
385
- layers.6.feed_forward.w2
386
- layers.6.feed_forward.w3
387
- layers.7
388
- layers.7.attention
389
- layers.7.attention.wq
390
- layers.7.attention.wk
391
- layers.7.attention.wv
392
- layers.7.attention.wo
393
- layers.7.attention_norm
394
- layers.7.ffn_norm
395
- layers.7.feed_forward
396
- layers.7.feed_forward.w1
397
- layers.7.feed_forward.w2
398
- layers.7.feed_forward.w3
399
- layers.8
400
- layers.8.attention
401
- layers.8.attention.wq
402
- layers.8.attention.wk
403
- layers.8.attention.wv
404
- layers.8.attention.wo
405
- layers.8.attention_norm
406
- layers.8.ffn_norm
407
- layers.8.feed_forward
408
- layers.8.feed_forward.w1
409
- layers.8.feed_forward.w2
410
- layers.8.feed_forward.w3
411
- layers.9
412
- layers.9.attention
413
- layers.9.attention.wq
414
- layers.9.attention.wk
415
- layers.9.attention.wv
416
- layers.9.attention.wo
417
- layers.9.attention_norm
418
- layers.9.ffn_norm
419
- layers.9.feed_forward
420
- layers.9.feed_forward.w1
421
- layers.9.feed_forward.w2
422
- layers.9.feed_forward.w3
423
- layers.10
424
- layers.10.attention
425
- layers.10.attention.wq
426
- layers.10.attention.wk
427
- layers.10.attention.wv
428
- layers.10.attention.wo
429
- layers.10.attention_norm
430
- layers.10.ffn_norm
431
- layers.10.feed_forward
432
- layers.10.feed_forward.w1
433
- layers.10.feed_forward.w2
434
- layers.10.feed_forward.w3
435
- layers.11
436
- layers.11.attention
437
- layers.11.attention.wq
438
- layers.11.attention.wk
439
- layers.11.attention.wv
440
- layers.11.attention.wo
441
- layers.11.attention_norm
442
- layers.11.ffn_norm
443
- layers.11.feed_forward
444
- layers.11.feed_forward.w1
445
- layers.11.feed_forward.w2
446
- layers.11.feed_forward.w3
447
- layers.12
448
- layers.12.attention
449
- layers.12.attention.wq
450
- layers.12.attention.wk
451
- layers.12.attention.wv
452
- layers.12.attention.wo
453
- layers.12.attention_norm
454
- layers.12.ffn_norm
455
- layers.12.feed_forward
456
- layers.12.feed_forward.w1
457
- layers.12.feed_forward.w2
458
- layers.12.feed_forward.w3
459
- layers.13
460
- layers.13.attention
461
- layers.13.attention.wq
462
- layers.13.attention.wk
463
- layers.13.attention.wv
464
- layers.13.attention.wo
465
- layers.13.attention_norm
466
- layers.13.ffn_norm
467
- layers.13.feed_forward
468
- layers.13.feed_forward.w1
469
- layers.13.feed_forward.w2
470
- layers.13.feed_forward.w3
471
- layers.14
472
- layers.14.attention
473
- layers.14.attention.wq
474
- layers.14.attention.wk
475
- layers.14.attention.wv
476
- layers.14.attention.wo
477
- layers.14.attention_norm
478
- layers.14.ffn_norm
479
- layers.14.feed_forward
480
- layers.14.feed_forward.w1
481
- layers.14.feed_forward.w2
482
- layers.14.feed_forward.w3
483
- layers.15
484
- layers.15.attention
485
- layers.15.attention.wq
486
- layers.15.attention.wk
487
- layers.15.attention.wv
488
- layers.15.attention.wo
489
- layers.15.attention_norm
490
- layers.15.ffn_norm
491
- layers.15.feed_forward
492
- layers.15.feed_forward.w1
493
- layers.15.feed_forward.w2
494
- layers.15.feed_forward.w3
495
- layers.16
496
- layers.16.attention
497
- layers.16.attention.wq
498
- layers.16.attention.wk
499
- layers.16.attention.wv
500
- layers.16.attention.wo
501
- layers.16.attention_norm
502
- layers.16.ffn_norm
503
- layers.16.feed_forward
504
- layers.16.feed_forward.w1
505
- layers.16.feed_forward.w2
506
- layers.16.feed_forward.w3
507
- layers.17
508
- layers.17.attention
509
- layers.17.attention.wq
510
- layers.17.attention.wk
511
- layers.17.attention.wv
512
- layers.17.attention.wo
513
- layers.17.attention_norm
514
- layers.17.ffn_norm
515
- layers.17.feed_forward
516
- layers.17.feed_forward.w1
517
- layers.17.feed_forward.w2
518
- layers.17.feed_forward.w3
519
- layers.18
520
- layers.18.attention
521
- layers.18.attention.wq
522
- layers.18.attention.wk
523
- layers.18.attention.wv
524
- layers.18.attention.wo
525
- layers.18.attention_norm
526
- layers.18.ffn_norm
527
- layers.18.feed_forward
528
- layers.18.feed_forward.w1
529
- layers.18.feed_forward.w2
530
- layers.18.feed_forward.w3
531
- layers.19
532
- layers.19.attention
533
- layers.19.attention.wq
534
- layers.19.attention.wk
535
- layers.19.attention.wv
536
- layers.19.attention.wo
537
- layers.19.attention_norm
538
- layers.19.ffn_norm
539
- layers.19.feed_forward
540
- layers.19.feed_forward.w1
541
- layers.19.feed_forward.w2
542
- layers.19.feed_forward.w3
543
- layers.20
544
- layers.20.attention
545
- layers.20.attention.wq
546
- layers.20.attention.wk
547
- layers.20.attention.wv
548
- layers.20.attention.wo
549
- layers.20.attention_norm
550
- layers.20.ffn_norm
551
- layers.20.feed_forward
552
- layers.20.feed_forward.w1
553
- layers.20.feed_forward.w2
554
- layers.20.feed_forward.w3
555
- layers.21
556
- layers.21.attention
557
- layers.21.attention.wq
558
- layers.21.attention.wk
559
- layers.21.attention.wv
560
- layers.21.attention.wo
561
- layers.21.attention_norm
562
- layers.21.ffn_norm
563
- layers.21.feed_forward
564
- layers.21.feed_forward.w1
565
- layers.21.feed_forward.w2
566
- layers.21.feed_forward.w3
567
- layers.22
568
- layers.22.attention
569
- layers.22.attention.wq
570
- layers.22.attention.wk
571
- layers.22.attention.wv
572
- layers.22.attention.wo
573
- layers.22.attention_norm
574
- layers.22.ffn_norm
575
- layers.22.feed_forward
576
- layers.22.feed_forward.w1
577
- layers.22.feed_forward.w2
578
- layers.22.feed_forward.w3
579
- layers.23
580
- layers.23.attention
581
- layers.23.attention.wq
582
- layers.23.attention.wk
583
- layers.23.attention.wv
584
- layers.23.attention.wo
585
- layers.23.attention_norm
586
- layers.23.ffn_norm
587
- layers.23.feed_forward
588
- layers.23.feed_forward.w1
589
- layers.23.feed_forward.w2
590
- layers.23.feed_forward.w3
591
- layers.24
592
- layers.24.attention
593
- layers.24.attention.wq
594
- layers.24.attention.wk
595
- layers.24.attention.wv
596
- layers.24.attention.wo
597
- layers.24.attention_norm
598
- layers.24.ffn_norm
599
- layers.24.feed_forward
600
- layers.24.feed_forward.w1
601
- layers.24.feed_forward.w2
602
- layers.24.feed_forward.w3
603
- layers.25
604
- layers.25.attention
605
- layers.25.attention.wq
606
- layers.25.attention.wk
607
- layers.25.attention.wv
608
- layers.25.attention.wo
609
- layers.25.attention_norm
610
- layers.25.ffn_norm
611
- layers.25.feed_forward
612
- layers.25.feed_forward.w1
613
- layers.25.feed_forward.w2
614
- layers.25.feed_forward.w3
615
- layers.26
616
- layers.26.attention
617
- layers.26.attention.wq
618
- layers.26.attention.wk
619
- layers.26.attention.wv
620
- layers.26.attention.wo
621
- layers.26.attention_norm
622
- layers.26.ffn_norm
623
- layers.26.feed_forward
624
- layers.26.feed_forward.w1
625
- layers.26.feed_forward.w2
626
- layers.26.feed_forward.w3
627
- layers.27
628
- layers.27.attention
629
- layers.27.attention.wq
630
- layers.27.attention.wk
631
- layers.27.attention.wv
632
- layers.27.attention.wo
633
- layers.27.attention_norm
634
- layers.27.ffn_norm
635
- layers.27.feed_forward
636
- layers.27.feed_forward.w1
637
- layers.27.feed_forward.w2
638
- layers.27.feed_forward.w3
639
- layers.28
640
- layers.28.attention
641
- layers.28.attention.wq
642
- layers.28.attention.wk
643
- layers.28.attention.wv
644
- layers.28.attention.wo
645
- layers.28.attention_norm
646
- layers.28.ffn_norm
647
- layers.28.feed_forward
648
- layers.28.feed_forward.w1
649
- layers.28.feed_forward.w2
650
- layers.28.feed_forward.w3
651
- layers.29
652
- layers.29.attention
653
- layers.29.attention.wq
654
- layers.29.attention.wk
655
- layers.29.attention.wv
656
- layers.29.attention.wo
657
- layers.29.attention_norm
658
- layers.29.ffn_norm
659
- layers.29.feed_forward
660
- layers.29.feed_forward.w1
661
- layers.29.feed_forward.w2
662
- layers.29.feed_forward.w3
663
- layers.30
664
- layers.30.attention
665
- layers.30.attention.wq
666
- layers.30.attention.wk
667
- layers.30.attention.wv
668
- layers.30.attention.wo
669
- layers.30.attention_norm
670
- layers.30.ffn_norm
671
- layers.30.feed_forward
672
- layers.30.feed_forward.w1
673
- layers.30.feed_forward.w2
674
- layers.30.feed_forward.w3
675
- layers.31
676
- layers.31.attention
677
- layers.31.attention.wq
678
- layers.31.attention.wk
679
- layers.31.attention.wv
680
- layers.31.attention.wo
681
- layers.31.attention_norm
682
- layers.31.ffn_norm
683
- layers.31.feed_forward
684
- layers.31.feed_forward.w1
685
- layers.31.feed_forward.w2
686
- layers.31.feed_forward.w3
687
- layers.32
688
- layers.32.attention
689
- layers.32.attention.wq
690
- layers.32.attention.wk
691
- layers.32.attention.wv
692
- layers.32.attention.wo
693
- layers.32.attention_norm
694
- layers.32.ffn_norm
695
- layers.32.feed_forward
696
- layers.32.feed_forward.w1
697
- layers.32.feed_forward.w2
698
- layers.32.feed_forward.w3
699
- layers.33
700
- layers.33.attention
701
- layers.33.attention.wq
702
- layers.33.attention.wk
703
- layers.33.attention.wv
704
- layers.33.attention.wo
705
- layers.33.attention_norm
706
- layers.33.ffn_norm
707
- layers.33.feed_forward
708
- layers.33.feed_forward.w1
709
- layers.33.feed_forward.w2
710
- layers.33.feed_forward.w3
711
- layers.34
712
- layers.34.attention
713
- layers.34.attention.wq
714
- layers.34.attention.wk
715
- layers.34.attention.wv
716
- layers.34.attention.wo
717
- layers.34.attention_norm
718
- layers.34.ffn_norm
719
- layers.34.feed_forward
720
- layers.34.feed_forward.w1
721
- layers.34.feed_forward.w2
722
- layers.34.feed_forward.w3
723
- layers.35
724
- layers.35.attention
725
- layers.35.attention.wq
726
- layers.35.attention.wk
727
- layers.35.attention.wv
728
- layers.35.attention.wo
729
- layers.35.attention_norm
730
- layers.35.ffn_norm
731
- layers.35.feed_forward
732
- layers.35.feed_forward.w1
733
- layers.35.feed_forward.w2
734
- layers.35.feed_forward.w3
735
- layers.36
736
- layers.36.attention
737
- layers.36.attention.wq
738
- layers.36.attention.wk
739
- layers.36.attention.wv
740
- layers.36.attention.wo
741
- layers.36.attention_norm
742
- layers.36.ffn_norm
743
- layers.36.feed_forward
744
- layers.36.feed_forward.w1
745
- layers.36.feed_forward.w2
746
- layers.36.feed_forward.w3
747
- layers.37
748
- layers.37.attention
749
- layers.37.attention.wq
750
- layers.37.attention.wk
751
- layers.37.attention.wv
752
- layers.37.attention.wo
753
- layers.37.attention_norm
754
- layers.37.ffn_norm
755
- layers.37.feed_forward
756
- layers.37.feed_forward.w1
757
- layers.37.feed_forward.w2
758
- layers.37.feed_forward.w3
759
- layers.38
760
- layers.38.attention
761
- layers.38.attention.wq
762
- layers.38.attention.wk
763
- layers.38.attention.wv
764
- layers.38.attention.wo
765
- layers.38.attention_norm
766
- layers.38.ffn_norm
767
- layers.38.feed_forward
768
- layers.38.feed_forward.w1
769
- layers.38.feed_forward.w2
770
- layers.38.feed_forward.w3
771
- layers.39
772
- layers.39.attention
773
- layers.39.attention.wq
774
- layers.39.attention.wk
775
- layers.39.attention.wv
776
- layers.39.attention.wo
777
- layers.39.attention_norm
778
- layers.39.ffn_norm
779
- layers.39.feed_forward
780
- layers.39.feed_forward.w1
781
- layers.39.feed_forward.w2
782
- layers.39.feed_forward.w3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/openai/clip-vit-base-patch32.txt DELETED
@@ -1,306 +0,0 @@
1
-
2
- text_model
3
- text_model.embeddings
4
- text_model.embeddings.token_embedding
5
- text_model.embeddings.position_embedding
6
- text_model.encoder
7
- text_model.encoder.layers
8
- text_model.encoder.layers.0
9
- text_model.encoder.layers.0.self_attn
10
- text_model.encoder.layers.0.self_attn.k_proj
11
- text_model.encoder.layers.0.self_attn.v_proj
12
- text_model.encoder.layers.0.self_attn.q_proj
13
- text_model.encoder.layers.0.self_attn.out_proj
14
- text_model.encoder.layers.0.layer_norm1
15
- text_model.encoder.layers.0.mlp
16
- text_model.encoder.layers.0.mlp.activation_fn
17
- text_model.encoder.layers.0.mlp.fc1
18
- text_model.encoder.layers.0.mlp.fc2
19
- text_model.encoder.layers.0.layer_norm2
20
- text_model.encoder.layers.1
21
- text_model.encoder.layers.1.self_attn
22
- text_model.encoder.layers.1.self_attn.k_proj
23
- text_model.encoder.layers.1.self_attn.v_proj
24
- text_model.encoder.layers.1.self_attn.q_proj
25
- text_model.encoder.layers.1.self_attn.out_proj
26
- text_model.encoder.layers.1.layer_norm1
27
- text_model.encoder.layers.1.mlp
28
- text_model.encoder.layers.1.mlp.activation_fn
29
- text_model.encoder.layers.1.mlp.fc1
30
- text_model.encoder.layers.1.mlp.fc2
31
- text_model.encoder.layers.1.layer_norm2
32
- text_model.encoder.layers.2
33
- text_model.encoder.layers.2.self_attn
34
- text_model.encoder.layers.2.self_attn.k_proj
35
- text_model.encoder.layers.2.self_attn.v_proj
36
- text_model.encoder.layers.2.self_attn.q_proj
37
- text_model.encoder.layers.2.self_attn.out_proj
38
- text_model.encoder.layers.2.layer_norm1
39
- text_model.encoder.layers.2.mlp
40
- text_model.encoder.layers.2.mlp.activation_fn
41
- text_model.encoder.layers.2.mlp.fc1
42
- text_model.encoder.layers.2.mlp.fc2
43
- text_model.encoder.layers.2.layer_norm2
44
- text_model.encoder.layers.3
45
- text_model.encoder.layers.3.self_attn
46
- text_model.encoder.layers.3.self_attn.k_proj
47
- text_model.encoder.layers.3.self_attn.v_proj
48
- text_model.encoder.layers.3.self_attn.q_proj
49
- text_model.encoder.layers.3.self_attn.out_proj
50
- text_model.encoder.layers.3.layer_norm1
51
- text_model.encoder.layers.3.mlp
52
- text_model.encoder.layers.3.mlp.activation_fn
53
- text_model.encoder.layers.3.mlp.fc1
54
- text_model.encoder.layers.3.mlp.fc2
55
- text_model.encoder.layers.3.layer_norm2
56
- text_model.encoder.layers.4
57
- text_model.encoder.layers.4.self_attn
58
- text_model.encoder.layers.4.self_attn.k_proj
59
- text_model.encoder.layers.4.self_attn.v_proj
60
- text_model.encoder.layers.4.self_attn.q_proj
61
- text_model.encoder.layers.4.self_attn.out_proj
62
- text_model.encoder.layers.4.layer_norm1
63
- text_model.encoder.layers.4.mlp
64
- text_model.encoder.layers.4.mlp.activation_fn
65
- text_model.encoder.layers.4.mlp.fc1
66
- text_model.encoder.layers.4.mlp.fc2
67
- text_model.encoder.layers.4.layer_norm2
68
- text_model.encoder.layers.5
69
- text_model.encoder.layers.5.self_attn
70
- text_model.encoder.layers.5.self_attn.k_proj
71
- text_model.encoder.layers.5.self_attn.v_proj
72
- text_model.encoder.layers.5.self_attn.q_proj
73
- text_model.encoder.layers.5.self_attn.out_proj
74
- text_model.encoder.layers.5.layer_norm1
75
- text_model.encoder.layers.5.mlp
76
- text_model.encoder.layers.5.mlp.activation_fn
77
- text_model.encoder.layers.5.mlp.fc1
78
- text_model.encoder.layers.5.mlp.fc2
79
- text_model.encoder.layers.5.layer_norm2
80
- text_model.encoder.layers.6
81
- text_model.encoder.layers.6.self_attn
82
- text_model.encoder.layers.6.self_attn.k_proj
83
- text_model.encoder.layers.6.self_attn.v_proj
84
- text_model.encoder.layers.6.self_attn.q_proj
85
- text_model.encoder.layers.6.self_attn.out_proj
86
- text_model.encoder.layers.6.layer_norm1
87
- text_model.encoder.layers.6.mlp
88
- text_model.encoder.layers.6.mlp.activation_fn
89
- text_model.encoder.layers.6.mlp.fc1
90
- text_model.encoder.layers.6.mlp.fc2
91
- text_model.encoder.layers.6.layer_norm2
92
- text_model.encoder.layers.7
93
- text_model.encoder.layers.7.self_attn
94
- text_model.encoder.layers.7.self_attn.k_proj
95
- text_model.encoder.layers.7.self_attn.v_proj
96
- text_model.encoder.layers.7.self_attn.q_proj
97
- text_model.encoder.layers.7.self_attn.out_proj
98
- text_model.encoder.layers.7.layer_norm1
99
- text_model.encoder.layers.7.mlp
100
- text_model.encoder.layers.7.mlp.activation_fn
101
- text_model.encoder.layers.7.mlp.fc1
102
- text_model.encoder.layers.7.mlp.fc2
103
- text_model.encoder.layers.7.layer_norm2
104
- text_model.encoder.layers.8
105
- text_model.encoder.layers.8.self_attn
106
- text_model.encoder.layers.8.self_attn.k_proj
107
- text_model.encoder.layers.8.self_attn.v_proj
108
- text_model.encoder.layers.8.self_attn.q_proj
109
- text_model.encoder.layers.8.self_attn.out_proj
110
- text_model.encoder.layers.8.layer_norm1
111
- text_model.encoder.layers.8.mlp
112
- text_model.encoder.layers.8.mlp.activation_fn
113
- text_model.encoder.layers.8.mlp.fc1
114
- text_model.encoder.layers.8.mlp.fc2
115
- text_model.encoder.layers.8.layer_norm2
116
- text_model.encoder.layers.9
117
- text_model.encoder.layers.9.self_attn
118
- text_model.encoder.layers.9.self_attn.k_proj
119
- text_model.encoder.layers.9.self_attn.v_proj
120
- text_model.encoder.layers.9.self_attn.q_proj
121
- text_model.encoder.layers.9.self_attn.out_proj
122
- text_model.encoder.layers.9.layer_norm1
123
- text_model.encoder.layers.9.mlp
124
- text_model.encoder.layers.9.mlp.activation_fn
125
- text_model.encoder.layers.9.mlp.fc1
126
- text_model.encoder.layers.9.mlp.fc2
127
- text_model.encoder.layers.9.layer_norm2
128
- text_model.encoder.layers.10
129
- text_model.encoder.layers.10.self_attn
130
- text_model.encoder.layers.10.self_attn.k_proj
131
- text_model.encoder.layers.10.self_attn.v_proj
132
- text_model.encoder.layers.10.self_attn.q_proj
133
- text_model.encoder.layers.10.self_attn.out_proj
134
- text_model.encoder.layers.10.layer_norm1
135
- text_model.encoder.layers.10.mlp
136
- text_model.encoder.layers.10.mlp.activation_fn
137
- text_model.encoder.layers.10.mlp.fc1
138
- text_model.encoder.layers.10.mlp.fc2
139
- text_model.encoder.layers.10.layer_norm2
140
- text_model.encoder.layers.11
141
- text_model.encoder.layers.11.self_attn
142
- text_model.encoder.layers.11.self_attn.k_proj
143
- text_model.encoder.layers.11.self_attn.v_proj
144
- text_model.encoder.layers.11.self_attn.q_proj
145
- text_model.encoder.layers.11.self_attn.out_proj
146
- text_model.encoder.layers.11.layer_norm1
147
- text_model.encoder.layers.11.mlp
148
- text_model.encoder.layers.11.mlp.activation_fn
149
- text_model.encoder.layers.11.mlp.fc1
150
- text_model.encoder.layers.11.mlp.fc2
151
- text_model.encoder.layers.11.layer_norm2
152
- text_model.final_layer_norm
153
- vision_model
154
- vision_model.embeddings
155
- vision_model.embeddings.patch_embedding
156
- vision_model.embeddings.position_embedding
157
- vision_model.pre_layrnorm
158
- vision_model.encoder
159
- vision_model.encoder.layers
160
- vision_model.encoder.layers.0
161
- vision_model.encoder.layers.0.self_attn
162
- vision_model.encoder.layers.0.self_attn.k_proj
163
- vision_model.encoder.layers.0.self_attn.v_proj
164
- vision_model.encoder.layers.0.self_attn.q_proj
165
- vision_model.encoder.layers.0.self_attn.out_proj
166
- vision_model.encoder.layers.0.layer_norm1
167
- vision_model.encoder.layers.0.mlp
168
- vision_model.encoder.layers.0.mlp.activation_fn
169
- vision_model.encoder.layers.0.mlp.fc1
170
- vision_model.encoder.layers.0.mlp.fc2
171
- vision_model.encoder.layers.0.layer_norm2
172
- vision_model.encoder.layers.1
173
- vision_model.encoder.layers.1.self_attn
174
- vision_model.encoder.layers.1.self_attn.k_proj
175
- vision_model.encoder.layers.1.self_attn.v_proj
176
- vision_model.encoder.layers.1.self_attn.q_proj
177
- vision_model.encoder.layers.1.self_attn.out_proj
178
- vision_model.encoder.layers.1.layer_norm1
179
- vision_model.encoder.layers.1.mlp
180
- vision_model.encoder.layers.1.mlp.activation_fn
181
- vision_model.encoder.layers.1.mlp.fc1
182
- vision_model.encoder.layers.1.mlp.fc2
183
- vision_model.encoder.layers.1.layer_norm2
184
- vision_model.encoder.layers.2
185
- vision_model.encoder.layers.2.self_attn
186
- vision_model.encoder.layers.2.self_attn.k_proj
187
- vision_model.encoder.layers.2.self_attn.v_proj
188
- vision_model.encoder.layers.2.self_attn.q_proj
189
- vision_model.encoder.layers.2.self_attn.out_proj
190
- vision_model.encoder.layers.2.layer_norm1
191
- vision_model.encoder.layers.2.mlp
192
- vision_model.encoder.layers.2.mlp.activation_fn
193
- vision_model.encoder.layers.2.mlp.fc1
194
- vision_model.encoder.layers.2.mlp.fc2
195
- vision_model.encoder.layers.2.layer_norm2
196
- vision_model.encoder.layers.3
197
- vision_model.encoder.layers.3.self_attn
198
- vision_model.encoder.layers.3.self_attn.k_proj
199
- vision_model.encoder.layers.3.self_attn.v_proj
200
- vision_model.encoder.layers.3.self_attn.q_proj
201
- vision_model.encoder.layers.3.self_attn.out_proj
202
- vision_model.encoder.layers.3.layer_norm1
203
- vision_model.encoder.layers.3.mlp
204
- vision_model.encoder.layers.3.mlp.activation_fn
205
- vision_model.encoder.layers.3.mlp.fc1
206
- vision_model.encoder.layers.3.mlp.fc2
207
- vision_model.encoder.layers.3.layer_norm2
208
- vision_model.encoder.layers.4
209
- vision_model.encoder.layers.4.self_attn
210
- vision_model.encoder.layers.4.self_attn.k_proj
211
- vision_model.encoder.layers.4.self_attn.v_proj
212
- vision_model.encoder.layers.4.self_attn.q_proj
213
- vision_model.encoder.layers.4.self_attn.out_proj
214
- vision_model.encoder.layers.4.layer_norm1
215
- vision_model.encoder.layers.4.mlp
216
- vision_model.encoder.layers.4.mlp.activation_fn
217
- vision_model.encoder.layers.4.mlp.fc1
218
- vision_model.encoder.layers.4.mlp.fc2
219
- vision_model.encoder.layers.4.layer_norm2
220
- vision_model.encoder.layers.5
221
- vision_model.encoder.layers.5.self_attn
222
- vision_model.encoder.layers.5.self_attn.k_proj
223
- vision_model.encoder.layers.5.self_attn.v_proj
224
- vision_model.encoder.layers.5.self_attn.q_proj
225
- vision_model.encoder.layers.5.self_attn.out_proj
226
- vision_model.encoder.layers.5.layer_norm1
227
- vision_model.encoder.layers.5.mlp
228
- vision_model.encoder.layers.5.mlp.activation_fn
229
- vision_model.encoder.layers.5.mlp.fc1
230
- vision_model.encoder.layers.5.mlp.fc2
231
- vision_model.encoder.layers.5.layer_norm2
232
- vision_model.encoder.layers.6
233
- vision_model.encoder.layers.6.self_attn
234
- vision_model.encoder.layers.6.self_attn.k_proj
235
- vision_model.encoder.layers.6.self_attn.v_proj
236
- vision_model.encoder.layers.6.self_attn.q_proj
237
- vision_model.encoder.layers.6.self_attn.out_proj
238
- vision_model.encoder.layers.6.layer_norm1
239
- vision_model.encoder.layers.6.mlp
240
- vision_model.encoder.layers.6.mlp.activation_fn
241
- vision_model.encoder.layers.6.mlp.fc1
242
- vision_model.encoder.layers.6.mlp.fc2
243
- vision_model.encoder.layers.6.layer_norm2
244
- vision_model.encoder.layers.7
245
- vision_model.encoder.layers.7.self_attn
246
- vision_model.encoder.layers.7.self_attn.k_proj
247
- vision_model.encoder.layers.7.self_attn.v_proj
248
- vision_model.encoder.layers.7.self_attn.q_proj
249
- vision_model.encoder.layers.7.self_attn.out_proj
250
- vision_model.encoder.layers.7.layer_norm1
251
- vision_model.encoder.layers.7.mlp
252
- vision_model.encoder.layers.7.mlp.activation_fn
253
- vision_model.encoder.layers.7.mlp.fc1
254
- vision_model.encoder.layers.7.mlp.fc2
255
- vision_model.encoder.layers.7.layer_norm2
256
- vision_model.encoder.layers.8
257
- vision_model.encoder.layers.8.self_attn
258
- vision_model.encoder.layers.8.self_attn.k_proj
259
- vision_model.encoder.layers.8.self_attn.v_proj
260
- vision_model.encoder.layers.8.self_attn.q_proj
261
- vision_model.encoder.layers.8.self_attn.out_proj
262
- vision_model.encoder.layers.8.layer_norm1
263
- vision_model.encoder.layers.8.mlp
264
- vision_model.encoder.layers.8.mlp.activation_fn
265
- vision_model.encoder.layers.8.mlp.fc1
266
- vision_model.encoder.layers.8.mlp.fc2
267
- vision_model.encoder.layers.8.layer_norm2
268
- vision_model.encoder.layers.9
269
- vision_model.encoder.layers.9.self_attn
270
- vision_model.encoder.layers.9.self_attn.k_proj
271
- vision_model.encoder.layers.9.self_attn.v_proj
272
- vision_model.encoder.layers.9.self_attn.q_proj
273
- vision_model.encoder.layers.9.self_attn.out_proj
274
- vision_model.encoder.layers.9.layer_norm1
275
- vision_model.encoder.layers.9.mlp
276
- vision_model.encoder.layers.9.mlp.activation_fn
277
- vision_model.encoder.layers.9.mlp.fc1
278
- vision_model.encoder.layers.9.mlp.fc2
279
- vision_model.encoder.layers.9.layer_norm2
280
- vision_model.encoder.layers.10
281
- vision_model.encoder.layers.10.self_attn
282
- vision_model.encoder.layers.10.self_attn.k_proj
283
- vision_model.encoder.layers.10.self_attn.v_proj
284
- vision_model.encoder.layers.10.self_attn.q_proj
285
- vision_model.encoder.layers.10.self_attn.out_proj
286
- vision_model.encoder.layers.10.layer_norm1
287
- vision_model.encoder.layers.10.mlp
288
- vision_model.encoder.layers.10.mlp.activation_fn
289
- vision_model.encoder.layers.10.mlp.fc1
290
- vision_model.encoder.layers.10.mlp.fc2
291
- vision_model.encoder.layers.10.layer_norm2
292
- vision_model.encoder.layers.11
293
- vision_model.encoder.layers.11.self_attn
294
- vision_model.encoder.layers.11.self_attn.k_proj
295
- vision_model.encoder.layers.11.self_attn.v_proj
296
- vision_model.encoder.layers.11.self_attn.q_proj
297
- vision_model.encoder.layers.11.self_attn.out_proj
298
- vision_model.encoder.layers.11.layer_norm1
299
- vision_model.encoder.layers.11.mlp
300
- vision_model.encoder.layers.11.mlp.activation_fn
301
- vision_model.encoder.layers.11.mlp.fc1
302
- vision_model.encoder.layers.11.mlp.fc2
303
- vision_model.encoder.layers.11.layer_norm2
304
- vision_model.post_layernorm
305
- visual_projection
306
- text_projection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/openbmb/MiniCPM-o-2_6.txt DELETED
@@ -1,1466 +0,0 @@
1
-
2
- llm
3
- llm.model
4
- llm.model.embed_tokens
5
- llm.model.layers
6
- llm.model.layers.0
7
- llm.model.layers.0.self_attn
8
- llm.model.layers.0.self_attn.q_proj
9
- llm.model.layers.0.self_attn.k_proj
10
- llm.model.layers.0.self_attn.v_proj
11
- llm.model.layers.0.self_attn.o_proj
12
- llm.model.layers.0.mlp
13
- llm.model.layers.0.mlp.gate_proj
14
- llm.model.layers.0.mlp.up_proj
15
- llm.model.layers.0.mlp.down_proj
16
- llm.model.layers.0.mlp.act_fn
17
- llm.model.layers.0.input_layernorm
18
- llm.model.layers.0.post_attention_layernorm
19
- llm.model.layers.1
20
- llm.model.layers.1.self_attn
21
- llm.model.layers.1.self_attn.q_proj
22
- llm.model.layers.1.self_attn.k_proj
23
- llm.model.layers.1.self_attn.v_proj
24
- llm.model.layers.1.self_attn.o_proj
25
- llm.model.layers.1.mlp
26
- llm.model.layers.1.mlp.gate_proj
27
- llm.model.layers.1.mlp.up_proj
28
- llm.model.layers.1.mlp.down_proj
29
- llm.model.layers.1.mlp.act_fn
30
- llm.model.layers.1.input_layernorm
31
- llm.model.layers.1.post_attention_layernorm
32
- llm.model.layers.2
33
- llm.model.layers.2.self_attn
34
- llm.model.layers.2.self_attn.q_proj
35
- llm.model.layers.2.self_attn.k_proj
36
- llm.model.layers.2.self_attn.v_proj
37
- llm.model.layers.2.self_attn.o_proj
38
- llm.model.layers.2.mlp
39
- llm.model.layers.2.mlp.gate_proj
40
- llm.model.layers.2.mlp.up_proj
41
- llm.model.layers.2.mlp.down_proj
42
- llm.model.layers.2.mlp.act_fn
43
- llm.model.layers.2.input_layernorm
44
- llm.model.layers.2.post_attention_layernorm
45
- llm.model.layers.3
46
- llm.model.layers.3.self_attn
47
- llm.model.layers.3.self_attn.q_proj
48
- llm.model.layers.3.self_attn.k_proj
49
- llm.model.layers.3.self_attn.v_proj
50
- llm.model.layers.3.self_attn.o_proj
51
- llm.model.layers.3.mlp
52
- llm.model.layers.3.mlp.gate_proj
53
- llm.model.layers.3.mlp.up_proj
54
- llm.model.layers.3.mlp.down_proj
55
- llm.model.layers.3.mlp.act_fn
56
- llm.model.layers.3.input_layernorm
57
- llm.model.layers.3.post_attention_layernorm
58
- llm.model.layers.4
59
- llm.model.layers.4.self_attn
60
- llm.model.layers.4.self_attn.q_proj
61
- llm.model.layers.4.self_attn.k_proj
62
- llm.model.layers.4.self_attn.v_proj
63
- llm.model.layers.4.self_attn.o_proj
64
- llm.model.layers.4.mlp
65
- llm.model.layers.4.mlp.gate_proj
66
- llm.model.layers.4.mlp.up_proj
67
- llm.model.layers.4.mlp.down_proj
68
- llm.model.layers.4.mlp.act_fn
69
- llm.model.layers.4.input_layernorm
70
- llm.model.layers.4.post_attention_layernorm
71
- llm.model.layers.5
72
- llm.model.layers.5.self_attn
73
- llm.model.layers.5.self_attn.q_proj
74
- llm.model.layers.5.self_attn.k_proj
75
- llm.model.layers.5.self_attn.v_proj
76
- llm.model.layers.5.self_attn.o_proj
77
- llm.model.layers.5.mlp
78
- llm.model.layers.5.mlp.gate_proj
79
- llm.model.layers.5.mlp.up_proj
80
- llm.model.layers.5.mlp.down_proj
81
- llm.model.layers.5.mlp.act_fn
82
- llm.model.layers.5.input_layernorm
83
- llm.model.layers.5.post_attention_layernorm
84
- llm.model.layers.6
85
- llm.model.layers.6.self_attn
86
- llm.model.layers.6.self_attn.q_proj
87
- llm.model.layers.6.self_attn.k_proj
88
- llm.model.layers.6.self_attn.v_proj
89
- llm.model.layers.6.self_attn.o_proj
90
- llm.model.layers.6.mlp
91
- llm.model.layers.6.mlp.gate_proj
92
- llm.model.layers.6.mlp.up_proj
93
- llm.model.layers.6.mlp.down_proj
94
- llm.model.layers.6.mlp.act_fn
95
- llm.model.layers.6.input_layernorm
96
- llm.model.layers.6.post_attention_layernorm
97
- llm.model.layers.7
98
- llm.model.layers.7.self_attn
99
- llm.model.layers.7.self_attn.q_proj
100
- llm.model.layers.7.self_attn.k_proj
101
- llm.model.layers.7.self_attn.v_proj
102
- llm.model.layers.7.self_attn.o_proj
103
- llm.model.layers.7.mlp
104
- llm.model.layers.7.mlp.gate_proj
105
- llm.model.layers.7.mlp.up_proj
106
- llm.model.layers.7.mlp.down_proj
107
- llm.model.layers.7.mlp.act_fn
108
- llm.model.layers.7.input_layernorm
109
- llm.model.layers.7.post_attention_layernorm
110
- llm.model.layers.8
111
- llm.model.layers.8.self_attn
112
- llm.model.layers.8.self_attn.q_proj
113
- llm.model.layers.8.self_attn.k_proj
114
- llm.model.layers.8.self_attn.v_proj
115
- llm.model.layers.8.self_attn.o_proj
116
- llm.model.layers.8.mlp
117
- llm.model.layers.8.mlp.gate_proj
118
- llm.model.layers.8.mlp.up_proj
119
- llm.model.layers.8.mlp.down_proj
120
- llm.model.layers.8.mlp.act_fn
121
- llm.model.layers.8.input_layernorm
122
- llm.model.layers.8.post_attention_layernorm
123
- llm.model.layers.9
124
- llm.model.layers.9.self_attn
125
- llm.model.layers.9.self_attn.q_proj
126
- llm.model.layers.9.self_attn.k_proj
127
- llm.model.layers.9.self_attn.v_proj
128
- llm.model.layers.9.self_attn.o_proj
129
- llm.model.layers.9.mlp
130
- llm.model.layers.9.mlp.gate_proj
131
- llm.model.layers.9.mlp.up_proj
132
- llm.model.layers.9.mlp.down_proj
133
- llm.model.layers.9.mlp.act_fn
134
- llm.model.layers.9.input_layernorm
135
- llm.model.layers.9.post_attention_layernorm
136
- llm.model.layers.10
137
- llm.model.layers.10.self_attn
138
- llm.model.layers.10.self_attn.q_proj
139
- llm.model.layers.10.self_attn.k_proj
140
- llm.model.layers.10.self_attn.v_proj
141
- llm.model.layers.10.self_attn.o_proj
142
- llm.model.layers.10.mlp
143
- llm.model.layers.10.mlp.gate_proj
144
- llm.model.layers.10.mlp.up_proj
145
- llm.model.layers.10.mlp.down_proj
146
- llm.model.layers.10.mlp.act_fn
147
- llm.model.layers.10.input_layernorm
148
- llm.model.layers.10.post_attention_layernorm
149
- llm.model.layers.11
150
- llm.model.layers.11.self_attn
151
- llm.model.layers.11.self_attn.q_proj
152
- llm.model.layers.11.self_attn.k_proj
153
- llm.model.layers.11.self_attn.v_proj
154
- llm.model.layers.11.self_attn.o_proj
155
- llm.model.layers.11.mlp
156
- llm.model.layers.11.mlp.gate_proj
157
- llm.model.layers.11.mlp.up_proj
158
- llm.model.layers.11.mlp.down_proj
159
- llm.model.layers.11.mlp.act_fn
160
- llm.model.layers.11.input_layernorm
161
- llm.model.layers.11.post_attention_layernorm
162
- llm.model.layers.12
163
- llm.model.layers.12.self_attn
164
- llm.model.layers.12.self_attn.q_proj
165
- llm.model.layers.12.self_attn.k_proj
166
- llm.model.layers.12.self_attn.v_proj
167
- llm.model.layers.12.self_attn.o_proj
168
- llm.model.layers.12.mlp
169
- llm.model.layers.12.mlp.gate_proj
170
- llm.model.layers.12.mlp.up_proj
171
- llm.model.layers.12.mlp.down_proj
172
- llm.model.layers.12.mlp.act_fn
173
- llm.model.layers.12.input_layernorm
174
- llm.model.layers.12.post_attention_layernorm
175
- llm.model.layers.13
176
- llm.model.layers.13.self_attn
177
- llm.model.layers.13.self_attn.q_proj
178
- llm.model.layers.13.self_attn.k_proj
179
- llm.model.layers.13.self_attn.v_proj
180
- llm.model.layers.13.self_attn.o_proj
181
- llm.model.layers.13.mlp
182
- llm.model.layers.13.mlp.gate_proj
183
- llm.model.layers.13.mlp.up_proj
184
- llm.model.layers.13.mlp.down_proj
185
- llm.model.layers.13.mlp.act_fn
186
- llm.model.layers.13.input_layernorm
187
- llm.model.layers.13.post_attention_layernorm
188
- llm.model.layers.14
189
- llm.model.layers.14.self_attn
190
- llm.model.layers.14.self_attn.q_proj
191
- llm.model.layers.14.self_attn.k_proj
192
- llm.model.layers.14.self_attn.v_proj
193
- llm.model.layers.14.self_attn.o_proj
194
- llm.model.layers.14.mlp
195
- llm.model.layers.14.mlp.gate_proj
196
- llm.model.layers.14.mlp.up_proj
197
- llm.model.layers.14.mlp.down_proj
198
- llm.model.layers.14.mlp.act_fn
199
- llm.model.layers.14.input_layernorm
200
- llm.model.layers.14.post_attention_layernorm
201
- llm.model.layers.15
202
- llm.model.layers.15.self_attn
203
- llm.model.layers.15.self_attn.q_proj
204
- llm.model.layers.15.self_attn.k_proj
205
- llm.model.layers.15.self_attn.v_proj
206
- llm.model.layers.15.self_attn.o_proj
207
- llm.model.layers.15.mlp
208
- llm.model.layers.15.mlp.gate_proj
209
- llm.model.layers.15.mlp.up_proj
210
- llm.model.layers.15.mlp.down_proj
211
- llm.model.layers.15.mlp.act_fn
212
- llm.model.layers.15.input_layernorm
213
- llm.model.layers.15.post_attention_layernorm
214
- llm.model.layers.16
215
- llm.model.layers.16.self_attn
216
- llm.model.layers.16.self_attn.q_proj
217
- llm.model.layers.16.self_attn.k_proj
218
- llm.model.layers.16.self_attn.v_proj
219
- llm.model.layers.16.self_attn.o_proj
220
- llm.model.layers.16.mlp
221
- llm.model.layers.16.mlp.gate_proj
222
- llm.model.layers.16.mlp.up_proj
223
- llm.model.layers.16.mlp.down_proj
224
- llm.model.layers.16.mlp.act_fn
225
- llm.model.layers.16.input_layernorm
226
- llm.model.layers.16.post_attention_layernorm
227
- llm.model.layers.17
228
- llm.model.layers.17.self_attn
229
- llm.model.layers.17.self_attn.q_proj
230
- llm.model.layers.17.self_attn.k_proj
231
- llm.model.layers.17.self_attn.v_proj
232
- llm.model.layers.17.self_attn.o_proj
233
- llm.model.layers.17.mlp
234
- llm.model.layers.17.mlp.gate_proj
235
- llm.model.layers.17.mlp.up_proj
236
- llm.model.layers.17.mlp.down_proj
237
- llm.model.layers.17.mlp.act_fn
238
- llm.model.layers.17.input_layernorm
239
- llm.model.layers.17.post_attention_layernorm
240
- llm.model.layers.18
241
- llm.model.layers.18.self_attn
242
- llm.model.layers.18.self_attn.q_proj
243
- llm.model.layers.18.self_attn.k_proj
244
- llm.model.layers.18.self_attn.v_proj
245
- llm.model.layers.18.self_attn.o_proj
246
- llm.model.layers.18.mlp
247
- llm.model.layers.18.mlp.gate_proj
248
- llm.model.layers.18.mlp.up_proj
249
- llm.model.layers.18.mlp.down_proj
250
- llm.model.layers.18.mlp.act_fn
251
- llm.model.layers.18.input_layernorm
252
- llm.model.layers.18.post_attention_layernorm
253
- llm.model.layers.19
254
- llm.model.layers.19.self_attn
255
- llm.model.layers.19.self_attn.q_proj
256
- llm.model.layers.19.self_attn.k_proj
257
- llm.model.layers.19.self_attn.v_proj
258
- llm.model.layers.19.self_attn.o_proj
259
- llm.model.layers.19.mlp
260
- llm.model.layers.19.mlp.gate_proj
261
- llm.model.layers.19.mlp.up_proj
262
- llm.model.layers.19.mlp.down_proj
263
- llm.model.layers.19.mlp.act_fn
264
- llm.model.layers.19.input_layernorm
265
- llm.model.layers.19.post_attention_layernorm
266
- llm.model.layers.20
267
- llm.model.layers.20.self_attn
268
- llm.model.layers.20.self_attn.q_proj
269
- llm.model.layers.20.self_attn.k_proj
270
- llm.model.layers.20.self_attn.v_proj
271
- llm.model.layers.20.self_attn.o_proj
272
- llm.model.layers.20.mlp
273
- llm.model.layers.20.mlp.gate_proj
274
- llm.model.layers.20.mlp.up_proj
275
- llm.model.layers.20.mlp.down_proj
276
- llm.model.layers.20.mlp.act_fn
277
- llm.model.layers.20.input_layernorm
278
- llm.model.layers.20.post_attention_layernorm
279
- llm.model.layers.21
280
- llm.model.layers.21.self_attn
281
- llm.model.layers.21.self_attn.q_proj
282
- llm.model.layers.21.self_attn.k_proj
283
- llm.model.layers.21.self_attn.v_proj
284
- llm.model.layers.21.self_attn.o_proj
285
- llm.model.layers.21.mlp
286
- llm.model.layers.21.mlp.gate_proj
287
- llm.model.layers.21.mlp.up_proj
288
- llm.model.layers.21.mlp.down_proj
289
- llm.model.layers.21.mlp.act_fn
290
- llm.model.layers.21.input_layernorm
291
- llm.model.layers.21.post_attention_layernorm
292
- llm.model.layers.22
293
- llm.model.layers.22.self_attn
294
- llm.model.layers.22.self_attn.q_proj
295
- llm.model.layers.22.self_attn.k_proj
296
- llm.model.layers.22.self_attn.v_proj
297
- llm.model.layers.22.self_attn.o_proj
298
- llm.model.layers.22.mlp
299
- llm.model.layers.22.mlp.gate_proj
300
- llm.model.layers.22.mlp.up_proj
301
- llm.model.layers.22.mlp.down_proj
302
- llm.model.layers.22.mlp.act_fn
303
- llm.model.layers.22.input_layernorm
304
- llm.model.layers.22.post_attention_layernorm
305
- llm.model.layers.23
306
- llm.model.layers.23.self_attn
307
- llm.model.layers.23.self_attn.q_proj
308
- llm.model.layers.23.self_attn.k_proj
309
- llm.model.layers.23.self_attn.v_proj
310
- llm.model.layers.23.self_attn.o_proj
311
- llm.model.layers.23.mlp
312
- llm.model.layers.23.mlp.gate_proj
313
- llm.model.layers.23.mlp.up_proj
314
- llm.model.layers.23.mlp.down_proj
315
- llm.model.layers.23.mlp.act_fn
316
- llm.model.layers.23.input_layernorm
317
- llm.model.layers.23.post_attention_layernorm
318
- llm.model.layers.24
319
- llm.model.layers.24.self_attn
320
- llm.model.layers.24.self_attn.q_proj
321
- llm.model.layers.24.self_attn.k_proj
322
- llm.model.layers.24.self_attn.v_proj
323
- llm.model.layers.24.self_attn.o_proj
324
- llm.model.layers.24.mlp
325
- llm.model.layers.24.mlp.gate_proj
326
- llm.model.layers.24.mlp.up_proj
327
- llm.model.layers.24.mlp.down_proj
328
- llm.model.layers.24.mlp.act_fn
329
- llm.model.layers.24.input_layernorm
330
- llm.model.layers.24.post_attention_layernorm
331
- llm.model.layers.25
332
- llm.model.layers.25.self_attn
333
- llm.model.layers.25.self_attn.q_proj
334
- llm.model.layers.25.self_attn.k_proj
335
- llm.model.layers.25.self_attn.v_proj
336
- llm.model.layers.25.self_attn.o_proj
337
- llm.model.layers.25.mlp
338
- llm.model.layers.25.mlp.gate_proj
339
- llm.model.layers.25.mlp.up_proj
340
- llm.model.layers.25.mlp.down_proj
341
- llm.model.layers.25.mlp.act_fn
342
- llm.model.layers.25.input_layernorm
343
- llm.model.layers.25.post_attention_layernorm
344
- llm.model.layers.26
345
- llm.model.layers.26.self_attn
346
- llm.model.layers.26.self_attn.q_proj
347
- llm.model.layers.26.self_attn.k_proj
348
- llm.model.layers.26.self_attn.v_proj
349
- llm.model.layers.26.self_attn.o_proj
350
- llm.model.layers.26.mlp
351
- llm.model.layers.26.mlp.gate_proj
352
- llm.model.layers.26.mlp.up_proj
353
- llm.model.layers.26.mlp.down_proj
354
- llm.model.layers.26.mlp.act_fn
355
- llm.model.layers.26.input_layernorm
356
- llm.model.layers.26.post_attention_layernorm
357
- llm.model.layers.27
358
- llm.model.layers.27.self_attn
359
- llm.model.layers.27.self_attn.q_proj
360
- llm.model.layers.27.self_attn.k_proj
361
- llm.model.layers.27.self_attn.v_proj
362
- llm.model.layers.27.self_attn.o_proj
363
- llm.model.layers.27.mlp
364
- llm.model.layers.27.mlp.gate_proj
365
- llm.model.layers.27.mlp.up_proj
366
- llm.model.layers.27.mlp.down_proj
367
- llm.model.layers.27.mlp.act_fn
368
- llm.model.layers.27.input_layernorm
369
- llm.model.layers.27.post_attention_layernorm
370
- llm.model.norm
371
- llm.model.rotary_emb
372
- llm.lm_head
373
- vpm
374
- vpm.embeddings
375
- vpm.embeddings.patch_embedding
376
- vpm.embeddings.position_embedding
377
- vpm.encoder
378
- vpm.encoder.layers
379
- vpm.encoder.layers.0
380
- vpm.encoder.layers.0.self_attn
381
- vpm.encoder.layers.0.self_attn.k_proj
382
- vpm.encoder.layers.0.self_attn.v_proj
383
- vpm.encoder.layers.0.self_attn.q_proj
384
- vpm.encoder.layers.0.self_attn.out_proj
385
- vpm.encoder.layers.0.layer_norm1
386
- vpm.encoder.layers.0.mlp
387
- vpm.encoder.layers.0.mlp.activation_fn
388
- vpm.encoder.layers.0.mlp.fc1
389
- vpm.encoder.layers.0.mlp.fc2
390
- vpm.encoder.layers.0.layer_norm2
391
- vpm.encoder.layers.1
392
- vpm.encoder.layers.1.self_attn
393
- vpm.encoder.layers.1.self_attn.k_proj
394
- vpm.encoder.layers.1.self_attn.v_proj
395
- vpm.encoder.layers.1.self_attn.q_proj
396
- vpm.encoder.layers.1.self_attn.out_proj
397
- vpm.encoder.layers.1.layer_norm1
398
- vpm.encoder.layers.1.mlp
399
- vpm.encoder.layers.1.mlp.activation_fn
400
- vpm.encoder.layers.1.mlp.fc1
401
- vpm.encoder.layers.1.mlp.fc2
402
- vpm.encoder.layers.1.layer_norm2
403
- vpm.encoder.layers.2
404
- vpm.encoder.layers.2.self_attn
405
- vpm.encoder.layers.2.self_attn.k_proj
406
- vpm.encoder.layers.2.self_attn.v_proj
407
- vpm.encoder.layers.2.self_attn.q_proj
408
- vpm.encoder.layers.2.self_attn.out_proj
409
- vpm.encoder.layers.2.layer_norm1
410
- vpm.encoder.layers.2.mlp
411
- vpm.encoder.layers.2.mlp.activation_fn
412
- vpm.encoder.layers.2.mlp.fc1
413
- vpm.encoder.layers.2.mlp.fc2
414
- vpm.encoder.layers.2.layer_norm2
415
- vpm.encoder.layers.3
416
- vpm.encoder.layers.3.self_attn
417
- vpm.encoder.layers.3.self_attn.k_proj
418
- vpm.encoder.layers.3.self_attn.v_proj
419
- vpm.encoder.layers.3.self_attn.q_proj
420
- vpm.encoder.layers.3.self_attn.out_proj
421
- vpm.encoder.layers.3.layer_norm1
422
- vpm.encoder.layers.3.mlp
423
- vpm.encoder.layers.3.mlp.activation_fn
424
- vpm.encoder.layers.3.mlp.fc1
425
- vpm.encoder.layers.3.mlp.fc2
426
- vpm.encoder.layers.3.layer_norm2
427
- vpm.encoder.layers.4
428
- vpm.encoder.layers.4.self_attn
429
- vpm.encoder.layers.4.self_attn.k_proj
430
- vpm.encoder.layers.4.self_attn.v_proj
431
- vpm.encoder.layers.4.self_attn.q_proj
432
- vpm.encoder.layers.4.self_attn.out_proj
433
- vpm.encoder.layers.4.layer_norm1
434
- vpm.encoder.layers.4.mlp
435
- vpm.encoder.layers.4.mlp.activation_fn
436
- vpm.encoder.layers.4.mlp.fc1
437
- vpm.encoder.layers.4.mlp.fc2
438
- vpm.encoder.layers.4.layer_norm2
439
- vpm.encoder.layers.5
440
- vpm.encoder.layers.5.self_attn
441
- vpm.encoder.layers.5.self_attn.k_proj
442
- vpm.encoder.layers.5.self_attn.v_proj
443
- vpm.encoder.layers.5.self_attn.q_proj
444
- vpm.encoder.layers.5.self_attn.out_proj
445
- vpm.encoder.layers.5.layer_norm1
446
- vpm.encoder.layers.5.mlp
447
- vpm.encoder.layers.5.mlp.activation_fn
448
- vpm.encoder.layers.5.mlp.fc1
449
- vpm.encoder.layers.5.mlp.fc2
450
- vpm.encoder.layers.5.layer_norm2
451
- vpm.encoder.layers.6
452
- vpm.encoder.layers.6.self_attn
453
- vpm.encoder.layers.6.self_attn.k_proj
454
- vpm.encoder.layers.6.self_attn.v_proj
455
- vpm.encoder.layers.6.self_attn.q_proj
456
- vpm.encoder.layers.6.self_attn.out_proj
457
- vpm.encoder.layers.6.layer_norm1
458
- vpm.encoder.layers.6.mlp
459
- vpm.encoder.layers.6.mlp.activation_fn
460
- vpm.encoder.layers.6.mlp.fc1
461
- vpm.encoder.layers.6.mlp.fc2
462
- vpm.encoder.layers.6.layer_norm2
463
- vpm.encoder.layers.7
464
- vpm.encoder.layers.7.self_attn
465
- vpm.encoder.layers.7.self_attn.k_proj
466
- vpm.encoder.layers.7.self_attn.v_proj
467
- vpm.encoder.layers.7.self_attn.q_proj
468
- vpm.encoder.layers.7.self_attn.out_proj
469
- vpm.encoder.layers.7.layer_norm1
470
- vpm.encoder.layers.7.mlp
471
- vpm.encoder.layers.7.mlp.activation_fn
472
- vpm.encoder.layers.7.mlp.fc1
473
- vpm.encoder.layers.7.mlp.fc2
474
- vpm.encoder.layers.7.layer_norm2
475
- vpm.encoder.layers.8
476
- vpm.encoder.layers.8.self_attn
477
- vpm.encoder.layers.8.self_attn.k_proj
478
- vpm.encoder.layers.8.self_attn.v_proj
479
- vpm.encoder.layers.8.self_attn.q_proj
480
- vpm.encoder.layers.8.self_attn.out_proj
481
- vpm.encoder.layers.8.layer_norm1
482
- vpm.encoder.layers.8.mlp
483
- vpm.encoder.layers.8.mlp.activation_fn
484
- vpm.encoder.layers.8.mlp.fc1
485
- vpm.encoder.layers.8.mlp.fc2
486
- vpm.encoder.layers.8.layer_norm2
487
- vpm.encoder.layers.9
488
- vpm.encoder.layers.9.self_attn
489
- vpm.encoder.layers.9.self_attn.k_proj
490
- vpm.encoder.layers.9.self_attn.v_proj
491
- vpm.encoder.layers.9.self_attn.q_proj
492
- vpm.encoder.layers.9.self_attn.out_proj
493
- vpm.encoder.layers.9.layer_norm1
494
- vpm.encoder.layers.9.mlp
495
- vpm.encoder.layers.9.mlp.activation_fn
496
- vpm.encoder.layers.9.mlp.fc1
497
- vpm.encoder.layers.9.mlp.fc2
498
- vpm.encoder.layers.9.layer_norm2
499
- vpm.encoder.layers.10
500
- vpm.encoder.layers.10.self_attn
501
- vpm.encoder.layers.10.self_attn.k_proj
502
- vpm.encoder.layers.10.self_attn.v_proj
503
- vpm.encoder.layers.10.self_attn.q_proj
504
- vpm.encoder.layers.10.self_attn.out_proj
505
- vpm.encoder.layers.10.layer_norm1
506
- vpm.encoder.layers.10.mlp
507
- vpm.encoder.layers.10.mlp.activation_fn
508
- vpm.encoder.layers.10.mlp.fc1
509
- vpm.encoder.layers.10.mlp.fc2
510
- vpm.encoder.layers.10.layer_norm2
511
- vpm.encoder.layers.11
512
- vpm.encoder.layers.11.self_attn
513
- vpm.encoder.layers.11.self_attn.k_proj
514
- vpm.encoder.layers.11.self_attn.v_proj
515
- vpm.encoder.layers.11.self_attn.q_proj
516
- vpm.encoder.layers.11.self_attn.out_proj
517
- vpm.encoder.layers.11.layer_norm1
518
- vpm.encoder.layers.11.mlp
519
- vpm.encoder.layers.11.mlp.activation_fn
520
- vpm.encoder.layers.11.mlp.fc1
521
- vpm.encoder.layers.11.mlp.fc2
522
- vpm.encoder.layers.11.layer_norm2
523
- vpm.encoder.layers.12
524
- vpm.encoder.layers.12.self_attn
525
- vpm.encoder.layers.12.self_attn.k_proj
526
- vpm.encoder.layers.12.self_attn.v_proj
527
- vpm.encoder.layers.12.self_attn.q_proj
528
- vpm.encoder.layers.12.self_attn.out_proj
529
- vpm.encoder.layers.12.layer_norm1
530
- vpm.encoder.layers.12.mlp
531
- vpm.encoder.layers.12.mlp.activation_fn
532
- vpm.encoder.layers.12.mlp.fc1
533
- vpm.encoder.layers.12.mlp.fc2
534
- vpm.encoder.layers.12.layer_norm2
535
- vpm.encoder.layers.13
536
- vpm.encoder.layers.13.self_attn
537
- vpm.encoder.layers.13.self_attn.k_proj
538
- vpm.encoder.layers.13.self_attn.v_proj
539
- vpm.encoder.layers.13.self_attn.q_proj
540
- vpm.encoder.layers.13.self_attn.out_proj
541
- vpm.encoder.layers.13.layer_norm1
542
- vpm.encoder.layers.13.mlp
543
- vpm.encoder.layers.13.mlp.activation_fn
544
- vpm.encoder.layers.13.mlp.fc1
545
- vpm.encoder.layers.13.mlp.fc2
546
- vpm.encoder.layers.13.layer_norm2
547
- vpm.encoder.layers.14
548
- vpm.encoder.layers.14.self_attn
549
- vpm.encoder.layers.14.self_attn.k_proj
550
- vpm.encoder.layers.14.self_attn.v_proj
551
- vpm.encoder.layers.14.self_attn.q_proj
552
- vpm.encoder.layers.14.self_attn.out_proj
553
- vpm.encoder.layers.14.layer_norm1
554
- vpm.encoder.layers.14.mlp
555
- vpm.encoder.layers.14.mlp.activation_fn
556
- vpm.encoder.layers.14.mlp.fc1
557
- vpm.encoder.layers.14.mlp.fc2
558
- vpm.encoder.layers.14.layer_norm2
559
- vpm.encoder.layers.15
560
- vpm.encoder.layers.15.self_attn
561
- vpm.encoder.layers.15.self_attn.k_proj
562
- vpm.encoder.layers.15.self_attn.v_proj
563
- vpm.encoder.layers.15.self_attn.q_proj
564
- vpm.encoder.layers.15.self_attn.out_proj
565
- vpm.encoder.layers.15.layer_norm1
566
- vpm.encoder.layers.15.mlp
567
- vpm.encoder.layers.15.mlp.activation_fn
568
- vpm.encoder.layers.15.mlp.fc1
569
- vpm.encoder.layers.15.mlp.fc2
570
- vpm.encoder.layers.15.layer_norm2
571
- vpm.encoder.layers.16
572
- vpm.encoder.layers.16.self_attn
573
- vpm.encoder.layers.16.self_attn.k_proj
574
- vpm.encoder.layers.16.self_attn.v_proj
575
- vpm.encoder.layers.16.self_attn.q_proj
576
- vpm.encoder.layers.16.self_attn.out_proj
577
- vpm.encoder.layers.16.layer_norm1
578
- vpm.encoder.layers.16.mlp
579
- vpm.encoder.layers.16.mlp.activation_fn
580
- vpm.encoder.layers.16.mlp.fc1
581
- vpm.encoder.layers.16.mlp.fc2
582
- vpm.encoder.layers.16.layer_norm2
583
- vpm.encoder.layers.17
584
- vpm.encoder.layers.17.self_attn
585
- vpm.encoder.layers.17.self_attn.k_proj
586
- vpm.encoder.layers.17.self_attn.v_proj
587
- vpm.encoder.layers.17.self_attn.q_proj
588
- vpm.encoder.layers.17.self_attn.out_proj
589
- vpm.encoder.layers.17.layer_norm1
590
- vpm.encoder.layers.17.mlp
591
- vpm.encoder.layers.17.mlp.activation_fn
592
- vpm.encoder.layers.17.mlp.fc1
593
- vpm.encoder.layers.17.mlp.fc2
594
- vpm.encoder.layers.17.layer_norm2
595
- vpm.encoder.layers.18
596
- vpm.encoder.layers.18.self_attn
597
- vpm.encoder.layers.18.self_attn.k_proj
598
- vpm.encoder.layers.18.self_attn.v_proj
599
- vpm.encoder.layers.18.self_attn.q_proj
600
- vpm.encoder.layers.18.self_attn.out_proj
601
- vpm.encoder.layers.18.layer_norm1
602
- vpm.encoder.layers.18.mlp
603
- vpm.encoder.layers.18.mlp.activation_fn
604
- vpm.encoder.layers.18.mlp.fc1
605
- vpm.encoder.layers.18.mlp.fc2
606
- vpm.encoder.layers.18.layer_norm2
607
- vpm.encoder.layers.19
608
- vpm.encoder.layers.19.self_attn
609
- vpm.encoder.layers.19.self_attn.k_proj
610
- vpm.encoder.layers.19.self_attn.v_proj
611
- vpm.encoder.layers.19.self_attn.q_proj
612
- vpm.encoder.layers.19.self_attn.out_proj
613
- vpm.encoder.layers.19.layer_norm1
614
- vpm.encoder.layers.19.mlp
615
- vpm.encoder.layers.19.mlp.activation_fn
616
- vpm.encoder.layers.19.mlp.fc1
617
- vpm.encoder.layers.19.mlp.fc2
618
- vpm.encoder.layers.19.layer_norm2
619
- vpm.encoder.layers.20
620
- vpm.encoder.layers.20.self_attn
621
- vpm.encoder.layers.20.self_attn.k_proj
622
- vpm.encoder.layers.20.self_attn.v_proj
623
- vpm.encoder.layers.20.self_attn.q_proj
624
- vpm.encoder.layers.20.self_attn.out_proj
625
- vpm.encoder.layers.20.layer_norm1
626
- vpm.encoder.layers.20.mlp
627
- vpm.encoder.layers.20.mlp.activation_fn
628
- vpm.encoder.layers.20.mlp.fc1
629
- vpm.encoder.layers.20.mlp.fc2
630
- vpm.encoder.layers.20.layer_norm2
631
- vpm.encoder.layers.21
632
- vpm.encoder.layers.21.self_attn
633
- vpm.encoder.layers.21.self_attn.k_proj
634
- vpm.encoder.layers.21.self_attn.v_proj
635
- vpm.encoder.layers.21.self_attn.q_proj
636
- vpm.encoder.layers.21.self_attn.out_proj
637
- vpm.encoder.layers.21.layer_norm1
638
- vpm.encoder.layers.21.mlp
639
- vpm.encoder.layers.21.mlp.activation_fn
640
- vpm.encoder.layers.21.mlp.fc1
641
- vpm.encoder.layers.21.mlp.fc2
642
- vpm.encoder.layers.21.layer_norm2
643
- vpm.encoder.layers.22
644
- vpm.encoder.layers.22.self_attn
645
- vpm.encoder.layers.22.self_attn.k_proj
646
- vpm.encoder.layers.22.self_attn.v_proj
647
- vpm.encoder.layers.22.self_attn.q_proj
648
- vpm.encoder.layers.22.self_attn.out_proj
649
- vpm.encoder.layers.22.layer_norm1
650
- vpm.encoder.layers.22.mlp
651
- vpm.encoder.layers.22.mlp.activation_fn
652
- vpm.encoder.layers.22.mlp.fc1
653
- vpm.encoder.layers.22.mlp.fc2
654
- vpm.encoder.layers.22.layer_norm2
655
- vpm.encoder.layers.23
656
- vpm.encoder.layers.23.self_attn
657
- vpm.encoder.layers.23.self_attn.k_proj
658
- vpm.encoder.layers.23.self_attn.v_proj
659
- vpm.encoder.layers.23.self_attn.q_proj
660
- vpm.encoder.layers.23.self_attn.out_proj
661
- vpm.encoder.layers.23.layer_norm1
662
- vpm.encoder.layers.23.mlp
663
- vpm.encoder.layers.23.mlp.activation_fn
664
- vpm.encoder.layers.23.mlp.fc1
665
- vpm.encoder.layers.23.mlp.fc2
666
- vpm.encoder.layers.23.layer_norm2
667
- vpm.encoder.layers.24
668
- vpm.encoder.layers.24.self_attn
669
- vpm.encoder.layers.24.self_attn.k_proj
670
- vpm.encoder.layers.24.self_attn.v_proj
671
- vpm.encoder.layers.24.self_attn.q_proj
672
- vpm.encoder.layers.24.self_attn.out_proj
673
- vpm.encoder.layers.24.layer_norm1
674
- vpm.encoder.layers.24.mlp
675
- vpm.encoder.layers.24.mlp.activation_fn
676
- vpm.encoder.layers.24.mlp.fc1
677
- vpm.encoder.layers.24.mlp.fc2
678
- vpm.encoder.layers.24.layer_norm2
679
- vpm.encoder.layers.25
680
- vpm.encoder.layers.25.self_attn
681
- vpm.encoder.layers.25.self_attn.k_proj
682
- vpm.encoder.layers.25.self_attn.v_proj
683
- vpm.encoder.layers.25.self_attn.q_proj
684
- vpm.encoder.layers.25.self_attn.out_proj
685
- vpm.encoder.layers.25.layer_norm1
686
- vpm.encoder.layers.25.mlp
687
- vpm.encoder.layers.25.mlp.activation_fn
688
- vpm.encoder.layers.25.mlp.fc1
689
- vpm.encoder.layers.25.mlp.fc2
690
- vpm.encoder.layers.25.layer_norm2
691
- vpm.encoder.layers.26
692
- vpm.encoder.layers.26.self_attn
693
- vpm.encoder.layers.26.self_attn.k_proj
694
- vpm.encoder.layers.26.self_attn.v_proj
695
- vpm.encoder.layers.26.self_attn.q_proj
696
- vpm.encoder.layers.26.self_attn.out_proj
697
- vpm.encoder.layers.26.layer_norm1
698
- vpm.encoder.layers.26.mlp
699
- vpm.encoder.layers.26.mlp.activation_fn
700
- vpm.encoder.layers.26.mlp.fc1
701
- vpm.encoder.layers.26.mlp.fc2
702
- vpm.encoder.layers.26.layer_norm2
703
- vpm.post_layernorm
704
- resampler
705
- resampler.kv_proj
706
- resampler.attn
707
- resampler.attn.out_proj
708
- resampler.ln_q
709
- resampler.ln_kv
710
- resampler.ln_post
711
- apm
712
- apm.conv1
713
- apm.conv2
714
- apm.embed_positions
715
- apm.layers
716
- apm.layers.0
717
- apm.layers.0.self_attn
718
- apm.layers.0.self_attn.k_proj
719
- apm.layers.0.self_attn.v_proj
720
- apm.layers.0.self_attn.q_proj
721
- apm.layers.0.self_attn.out_proj
722
- apm.layers.0.self_attn_layer_norm
723
- apm.layers.0.activation_fn
724
- apm.layers.0.fc1
725
- apm.layers.0.fc2
726
- apm.layers.0.final_layer_norm
727
- apm.layers.1
728
- apm.layers.1.self_attn
729
- apm.layers.1.self_attn.k_proj
730
- apm.layers.1.self_attn.v_proj
731
- apm.layers.1.self_attn.q_proj
732
- apm.layers.1.self_attn.out_proj
733
- apm.layers.1.self_attn_layer_norm
734
- apm.layers.1.activation_fn
735
- apm.layers.1.fc1
736
- apm.layers.1.fc2
737
- apm.layers.1.final_layer_norm
738
- apm.layers.2
739
- apm.layers.2.self_attn
740
- apm.layers.2.self_attn.k_proj
741
- apm.layers.2.self_attn.v_proj
742
- apm.layers.2.self_attn.q_proj
743
- apm.layers.2.self_attn.out_proj
744
- apm.layers.2.self_attn_layer_norm
745
- apm.layers.2.activation_fn
746
- apm.layers.2.fc1
747
- apm.layers.2.fc2
748
- apm.layers.2.final_layer_norm
749
- apm.layers.3
750
- apm.layers.3.self_attn
751
- apm.layers.3.self_attn.k_proj
752
- apm.layers.3.self_attn.v_proj
753
- apm.layers.3.self_attn.q_proj
754
- apm.layers.3.self_attn.out_proj
755
- apm.layers.3.self_attn_layer_norm
756
- apm.layers.3.activation_fn
757
- apm.layers.3.fc1
758
- apm.layers.3.fc2
759
- apm.layers.3.final_layer_norm
760
- apm.layers.4
761
- apm.layers.4.self_attn
762
- apm.layers.4.self_attn.k_proj
763
- apm.layers.4.self_attn.v_proj
764
- apm.layers.4.self_attn.q_proj
765
- apm.layers.4.self_attn.out_proj
766
- apm.layers.4.self_attn_layer_norm
767
- apm.layers.4.activation_fn
768
- apm.layers.4.fc1
769
- apm.layers.4.fc2
770
- apm.layers.4.final_layer_norm
771
- apm.layers.5
772
- apm.layers.5.self_attn
773
- apm.layers.5.self_attn.k_proj
774
- apm.layers.5.self_attn.v_proj
775
- apm.layers.5.self_attn.q_proj
776
- apm.layers.5.self_attn.out_proj
777
- apm.layers.5.self_attn_layer_norm
778
- apm.layers.5.activation_fn
779
- apm.layers.5.fc1
780
- apm.layers.5.fc2
781
- apm.layers.5.final_layer_norm
782
- apm.layers.6
783
- apm.layers.6.self_attn
784
- apm.layers.6.self_attn.k_proj
785
- apm.layers.6.self_attn.v_proj
786
- apm.layers.6.self_attn.q_proj
787
- apm.layers.6.self_attn.out_proj
788
- apm.layers.6.self_attn_layer_norm
789
- apm.layers.6.activation_fn
790
- apm.layers.6.fc1
791
- apm.layers.6.fc2
792
- apm.layers.6.final_layer_norm
793
- apm.layers.7
794
- apm.layers.7.self_attn
795
- apm.layers.7.self_attn.k_proj
796
- apm.layers.7.self_attn.v_proj
797
- apm.layers.7.self_attn.q_proj
798
- apm.layers.7.self_attn.out_proj
799
- apm.layers.7.self_attn_layer_norm
800
- apm.layers.7.activation_fn
801
- apm.layers.7.fc1
802
- apm.layers.7.fc2
803
- apm.layers.7.final_layer_norm
804
- apm.layers.8
805
- apm.layers.8.self_attn
806
- apm.layers.8.self_attn.k_proj
807
- apm.layers.8.self_attn.v_proj
808
- apm.layers.8.self_attn.q_proj
809
- apm.layers.8.self_attn.out_proj
810
- apm.layers.8.self_attn_layer_norm
811
- apm.layers.8.activation_fn
812
- apm.layers.8.fc1
813
- apm.layers.8.fc2
814
- apm.layers.8.final_layer_norm
815
- apm.layers.9
816
- apm.layers.9.self_attn
817
- apm.layers.9.self_attn.k_proj
818
- apm.layers.9.self_attn.v_proj
819
- apm.layers.9.self_attn.q_proj
820
- apm.layers.9.self_attn.out_proj
821
- apm.layers.9.self_attn_layer_norm
822
- apm.layers.9.activation_fn
823
- apm.layers.9.fc1
824
- apm.layers.9.fc2
825
- apm.layers.9.final_layer_norm
826
- apm.layers.10
827
- apm.layers.10.self_attn
828
- apm.layers.10.self_attn.k_proj
829
- apm.layers.10.self_attn.v_proj
830
- apm.layers.10.self_attn.q_proj
831
- apm.layers.10.self_attn.out_proj
832
- apm.layers.10.self_attn_layer_norm
833
- apm.layers.10.activation_fn
834
- apm.layers.10.fc1
835
- apm.layers.10.fc2
836
- apm.layers.10.final_layer_norm
837
- apm.layers.11
838
- apm.layers.11.self_attn
839
- apm.layers.11.self_attn.k_proj
840
- apm.layers.11.self_attn.v_proj
841
- apm.layers.11.self_attn.q_proj
842
- apm.layers.11.self_attn.out_proj
843
- apm.layers.11.self_attn_layer_norm
844
- apm.layers.11.activation_fn
845
- apm.layers.11.fc1
846
- apm.layers.11.fc2
847
- apm.layers.11.final_layer_norm
848
- apm.layers.12
849
- apm.layers.12.self_attn
850
- apm.layers.12.self_attn.k_proj
851
- apm.layers.12.self_attn.v_proj
852
- apm.layers.12.self_attn.q_proj
853
- apm.layers.12.self_attn.out_proj
854
- apm.layers.12.self_attn_layer_norm
855
- apm.layers.12.activation_fn
856
- apm.layers.12.fc1
857
- apm.layers.12.fc2
858
- apm.layers.12.final_layer_norm
859
- apm.layers.13
860
- apm.layers.13.self_attn
861
- apm.layers.13.self_attn.k_proj
862
- apm.layers.13.self_attn.v_proj
863
- apm.layers.13.self_attn.q_proj
864
- apm.layers.13.self_attn.out_proj
865
- apm.layers.13.self_attn_layer_norm
866
- apm.layers.13.activation_fn
867
- apm.layers.13.fc1
868
- apm.layers.13.fc2
869
- apm.layers.13.final_layer_norm
870
- apm.layers.14
871
- apm.layers.14.self_attn
872
- apm.layers.14.self_attn.k_proj
873
- apm.layers.14.self_attn.v_proj
874
- apm.layers.14.self_attn.q_proj
875
- apm.layers.14.self_attn.out_proj
876
- apm.layers.14.self_attn_layer_norm
877
- apm.layers.14.activation_fn
878
- apm.layers.14.fc1
879
- apm.layers.14.fc2
880
- apm.layers.14.final_layer_norm
881
- apm.layers.15
882
- apm.layers.15.self_attn
883
- apm.layers.15.self_attn.k_proj
884
- apm.layers.15.self_attn.v_proj
885
- apm.layers.15.self_attn.q_proj
886
- apm.layers.15.self_attn.out_proj
887
- apm.layers.15.self_attn_layer_norm
888
- apm.layers.15.activation_fn
889
- apm.layers.15.fc1
890
- apm.layers.15.fc2
891
- apm.layers.15.final_layer_norm
892
- apm.layers.16
893
- apm.layers.16.self_attn
894
- apm.layers.16.self_attn.k_proj
895
- apm.layers.16.self_attn.v_proj
896
- apm.layers.16.self_attn.q_proj
897
- apm.layers.16.self_attn.out_proj
898
- apm.layers.16.self_attn_layer_norm
899
- apm.layers.16.activation_fn
900
- apm.layers.16.fc1
901
- apm.layers.16.fc2
902
- apm.layers.16.final_layer_norm
903
- apm.layers.17
904
- apm.layers.17.self_attn
905
- apm.layers.17.self_attn.k_proj
906
- apm.layers.17.self_attn.v_proj
907
- apm.layers.17.self_attn.q_proj
908
- apm.layers.17.self_attn.out_proj
909
- apm.layers.17.self_attn_layer_norm
910
- apm.layers.17.activation_fn
911
- apm.layers.17.fc1
912
- apm.layers.17.fc2
913
- apm.layers.17.final_layer_norm
914
- apm.layers.18
915
- apm.layers.18.self_attn
916
- apm.layers.18.self_attn.k_proj
917
- apm.layers.18.self_attn.v_proj
918
- apm.layers.18.self_attn.q_proj
919
- apm.layers.18.self_attn.out_proj
920
- apm.layers.18.self_attn_layer_norm
921
- apm.layers.18.activation_fn
922
- apm.layers.18.fc1
923
- apm.layers.18.fc2
924
- apm.layers.18.final_layer_norm
925
- apm.layers.19
926
- apm.layers.19.self_attn
927
- apm.layers.19.self_attn.k_proj
928
- apm.layers.19.self_attn.v_proj
929
- apm.layers.19.self_attn.q_proj
930
- apm.layers.19.self_attn.out_proj
931
- apm.layers.19.self_attn_layer_norm
932
- apm.layers.19.activation_fn
933
- apm.layers.19.fc1
934
- apm.layers.19.fc2
935
- apm.layers.19.final_layer_norm
936
- apm.layers.20
937
- apm.layers.20.self_attn
938
- apm.layers.20.self_attn.k_proj
939
- apm.layers.20.self_attn.v_proj
940
- apm.layers.20.self_attn.q_proj
941
- apm.layers.20.self_attn.out_proj
942
- apm.layers.20.self_attn_layer_norm
943
- apm.layers.20.activation_fn
944
- apm.layers.20.fc1
945
- apm.layers.20.fc2
946
- apm.layers.20.final_layer_norm
947
- apm.layers.21
948
- apm.layers.21.self_attn
949
- apm.layers.21.self_attn.k_proj
950
- apm.layers.21.self_attn.v_proj
951
- apm.layers.21.self_attn.q_proj
952
- apm.layers.21.self_attn.out_proj
953
- apm.layers.21.self_attn_layer_norm
954
- apm.layers.21.activation_fn
955
- apm.layers.21.fc1
956
- apm.layers.21.fc2
957
- apm.layers.21.final_layer_norm
958
- apm.layers.22
959
- apm.layers.22.self_attn
960
- apm.layers.22.self_attn.k_proj
961
- apm.layers.22.self_attn.v_proj
962
- apm.layers.22.self_attn.q_proj
963
- apm.layers.22.self_attn.out_proj
964
- apm.layers.22.self_attn_layer_norm
965
- apm.layers.22.activation_fn
966
- apm.layers.22.fc1
967
- apm.layers.22.fc2
968
- apm.layers.22.final_layer_norm
969
- apm.layers.23
970
- apm.layers.23.self_attn
971
- apm.layers.23.self_attn.k_proj
972
- apm.layers.23.self_attn.v_proj
973
- apm.layers.23.self_attn.q_proj
974
- apm.layers.23.self_attn.out_proj
975
- apm.layers.23.self_attn_layer_norm
976
- apm.layers.23.activation_fn
977
- apm.layers.23.fc1
978
- apm.layers.23.fc2
979
- apm.layers.23.final_layer_norm
980
- apm.layer_norm
981
- audio_avg_pooler
982
- audio_projection_layer
983
- audio_projection_layer.linear1
984
- audio_projection_layer.relu
985
- audio_projection_layer.linear2
986
- tts
987
- tts.projector
988
- tts.projector.linear1
989
- tts.projector.relu
990
- tts.projector.linear2
991
- tts.emb_code
992
- tts.emb_code.0
993
- tts.emb_code.1
994
- tts.emb_code.2
995
- tts.emb_code.3
996
- tts.emb_text
997
- tts.head_code
998
- tts.head_code.0
999
- tts.head_code.0.parametrizations
1000
- tts.head_code.0.parametrizations.weight
1001
- tts.head_code.0.parametrizations.weight.0
1002
- tts.head_code.1
1003
- tts.head_code.1.parametrizations
1004
- tts.head_code.1.parametrizations.weight
1005
- tts.head_code.1.parametrizations.weight.0
1006
- tts.head_code.2
1007
- tts.head_code.2.parametrizations
1008
- tts.head_code.2.parametrizations.weight
1009
- tts.head_code.2.parametrizations.weight.0
1010
- tts.head_code.3
1011
- tts.head_code.3.parametrizations
1012
- tts.head_code.3.parametrizations.weight
1013
- tts.head_code.3.parametrizations.weight.0
1014
- tts.dvae
1015
- tts.dvae.downsample_conv
1016
- tts.dvae.downsample_conv.0
1017
- tts.dvae.downsample_conv.1
1018
- tts.dvae.downsample_conv.2
1019
- tts.dvae.downsample_conv.3
1020
- tts.dvae.encoder
1021
- tts.dvae.encoder.conv_in
1022
- tts.dvae.encoder.conv_in.0
1023
- tts.dvae.encoder.conv_in.1
1024
- tts.dvae.encoder.conv_in.2
1025
- tts.dvae.encoder.decoder_block
1026
- tts.dvae.encoder.decoder_block.0
1027
- tts.dvae.encoder.decoder_block.0.dwconv
1028
- tts.dvae.encoder.decoder_block.0.norm
1029
- tts.dvae.encoder.decoder_block.0.pwconv1
1030
- tts.dvae.encoder.decoder_block.0.act
1031
- tts.dvae.encoder.decoder_block.0.pwconv2
1032
- tts.dvae.encoder.decoder_block.1
1033
- tts.dvae.encoder.decoder_block.1.dwconv
1034
- tts.dvae.encoder.decoder_block.1.norm
1035
- tts.dvae.encoder.decoder_block.1.pwconv1
1036
- tts.dvae.encoder.decoder_block.1.act
1037
- tts.dvae.encoder.decoder_block.1.pwconv2
1038
- tts.dvae.encoder.decoder_block.2
1039
- tts.dvae.encoder.decoder_block.2.dwconv
1040
- tts.dvae.encoder.decoder_block.2.norm
1041
- tts.dvae.encoder.decoder_block.2.pwconv1
1042
- tts.dvae.encoder.decoder_block.2.act
1043
- tts.dvae.encoder.decoder_block.2.pwconv2
1044
- tts.dvae.encoder.decoder_block.3
1045
- tts.dvae.encoder.decoder_block.3.dwconv
1046
- tts.dvae.encoder.decoder_block.3.norm
1047
- tts.dvae.encoder.decoder_block.3.pwconv1
1048
- tts.dvae.encoder.decoder_block.3.act
1049
- tts.dvae.encoder.decoder_block.3.pwconv2
1050
- tts.dvae.encoder.decoder_block.4
1051
- tts.dvae.encoder.decoder_block.4.dwconv
1052
- tts.dvae.encoder.decoder_block.4.norm
1053
- tts.dvae.encoder.decoder_block.4.pwconv1
1054
- tts.dvae.encoder.decoder_block.4.act
1055
- tts.dvae.encoder.decoder_block.4.pwconv2
1056
- tts.dvae.encoder.decoder_block.5
1057
- tts.dvae.encoder.decoder_block.5.dwconv
1058
- tts.dvae.encoder.decoder_block.5.norm
1059
- tts.dvae.encoder.decoder_block.5.pwconv1
1060
- tts.dvae.encoder.decoder_block.5.act
1061
- tts.dvae.encoder.decoder_block.5.pwconv2
1062
- tts.dvae.encoder.decoder_block.6
1063
- tts.dvae.encoder.decoder_block.6.dwconv
1064
- tts.dvae.encoder.decoder_block.6.norm
1065
- tts.dvae.encoder.decoder_block.6.pwconv1
1066
- tts.dvae.encoder.decoder_block.6.act
1067
- tts.dvae.encoder.decoder_block.6.pwconv2
1068
- tts.dvae.encoder.decoder_block.7
1069
- tts.dvae.encoder.decoder_block.7.dwconv
1070
- tts.dvae.encoder.decoder_block.7.norm
1071
- tts.dvae.encoder.decoder_block.7.pwconv1
1072
- tts.dvae.encoder.decoder_block.7.act
1073
- tts.dvae.encoder.decoder_block.7.pwconv2
1074
- tts.dvae.encoder.decoder_block.8
1075
- tts.dvae.encoder.decoder_block.8.dwconv
1076
- tts.dvae.encoder.decoder_block.8.norm
1077
- tts.dvae.encoder.decoder_block.8.pwconv1
1078
- tts.dvae.encoder.decoder_block.8.act
1079
- tts.dvae.encoder.decoder_block.8.pwconv2
1080
- tts.dvae.encoder.decoder_block.9
1081
- tts.dvae.encoder.decoder_block.9.dwconv
1082
- tts.dvae.encoder.decoder_block.9.norm
1083
- tts.dvae.encoder.decoder_block.9.pwconv1
1084
- tts.dvae.encoder.decoder_block.9.act
1085
- tts.dvae.encoder.decoder_block.9.pwconv2
1086
- tts.dvae.encoder.decoder_block.10
1087
- tts.dvae.encoder.decoder_block.10.dwconv
1088
- tts.dvae.encoder.decoder_block.10.norm
1089
- tts.dvae.encoder.decoder_block.10.pwconv1
1090
- tts.dvae.encoder.decoder_block.10.act
1091
- tts.dvae.encoder.decoder_block.10.pwconv2
1092
- tts.dvae.encoder.decoder_block.11
1093
- tts.dvae.encoder.decoder_block.11.dwconv
1094
- tts.dvae.encoder.decoder_block.11.norm
1095
- tts.dvae.encoder.decoder_block.11.pwconv1
1096
- tts.dvae.encoder.decoder_block.11.act
1097
- tts.dvae.encoder.decoder_block.11.pwconv2
1098
- tts.dvae.encoder.conv_out
1099
- tts.dvae.decoder
1100
- tts.dvae.decoder.conv_in
1101
- tts.dvae.decoder.conv_in.0
1102
- tts.dvae.decoder.conv_in.1
1103
- tts.dvae.decoder.conv_in.2
1104
- tts.dvae.decoder.decoder_block
1105
- tts.dvae.decoder.decoder_block.0
1106
- tts.dvae.decoder.decoder_block.0.dwconv
1107
- tts.dvae.decoder.decoder_block.0.norm
1108
- tts.dvae.decoder.decoder_block.0.pwconv1
1109
- tts.dvae.decoder.decoder_block.0.act
1110
- tts.dvae.decoder.decoder_block.0.pwconv2
1111
- tts.dvae.decoder.decoder_block.1
1112
- tts.dvae.decoder.decoder_block.1.dwconv
1113
- tts.dvae.decoder.decoder_block.1.norm
1114
- tts.dvae.decoder.decoder_block.1.pwconv1
1115
- tts.dvae.decoder.decoder_block.1.act
1116
- tts.dvae.decoder.decoder_block.1.pwconv2
1117
- tts.dvae.decoder.decoder_block.2
1118
- tts.dvae.decoder.decoder_block.2.dwconv
1119
- tts.dvae.decoder.decoder_block.2.norm
1120
- tts.dvae.decoder.decoder_block.2.pwconv1
1121
- tts.dvae.decoder.decoder_block.2.act
1122
- tts.dvae.decoder.decoder_block.2.pwconv2
1123
- tts.dvae.decoder.decoder_block.3
1124
- tts.dvae.decoder.decoder_block.3.dwconv
1125
- tts.dvae.decoder.decoder_block.3.norm
1126
- tts.dvae.decoder.decoder_block.3.pwconv1
1127
- tts.dvae.decoder.decoder_block.3.act
1128
- tts.dvae.decoder.decoder_block.3.pwconv2
1129
- tts.dvae.decoder.decoder_block.4
1130
- tts.dvae.decoder.decoder_block.4.dwconv
1131
- tts.dvae.decoder.decoder_block.4.norm
1132
- tts.dvae.decoder.decoder_block.4.pwconv1
1133
- tts.dvae.decoder.decoder_block.4.act
1134
- tts.dvae.decoder.decoder_block.4.pwconv2
1135
- tts.dvae.decoder.decoder_block.5
1136
- tts.dvae.decoder.decoder_block.5.dwconv
1137
- tts.dvae.decoder.decoder_block.5.norm
1138
- tts.dvae.decoder.decoder_block.5.pwconv1
1139
- tts.dvae.decoder.decoder_block.5.act
1140
- tts.dvae.decoder.decoder_block.5.pwconv2
1141
- tts.dvae.decoder.decoder_block.6
1142
- tts.dvae.decoder.decoder_block.6.dwconv
1143
- tts.dvae.decoder.decoder_block.6.norm
1144
- tts.dvae.decoder.decoder_block.6.pwconv1
1145
- tts.dvae.decoder.decoder_block.6.act
1146
- tts.dvae.decoder.decoder_block.6.pwconv2
1147
- tts.dvae.decoder.decoder_block.7
1148
- tts.dvae.decoder.decoder_block.7.dwconv
1149
- tts.dvae.decoder.decoder_block.7.norm
1150
- tts.dvae.decoder.decoder_block.7.pwconv1
1151
- tts.dvae.decoder.decoder_block.7.act
1152
- tts.dvae.decoder.decoder_block.7.pwconv2
1153
- tts.dvae.decoder.decoder_block.8
1154
- tts.dvae.decoder.decoder_block.8.dwconv
1155
- tts.dvae.decoder.decoder_block.8.norm
1156
- tts.dvae.decoder.decoder_block.8.pwconv1
1157
- tts.dvae.decoder.decoder_block.8.act
1158
- tts.dvae.decoder.decoder_block.8.pwconv2
1159
- tts.dvae.decoder.decoder_block.9
1160
- tts.dvae.decoder.decoder_block.9.dwconv
1161
- tts.dvae.decoder.decoder_block.9.norm
1162
- tts.dvae.decoder.decoder_block.9.pwconv1
1163
- tts.dvae.decoder.decoder_block.9.act
1164
- tts.dvae.decoder.decoder_block.9.pwconv2
1165
- tts.dvae.decoder.decoder_block.10
1166
- tts.dvae.decoder.decoder_block.10.dwconv
1167
- tts.dvae.decoder.decoder_block.10.norm
1168
- tts.dvae.decoder.decoder_block.10.pwconv1
1169
- tts.dvae.decoder.decoder_block.10.act
1170
- tts.dvae.decoder.decoder_block.10.pwconv2
1171
- tts.dvae.decoder.decoder_block.11
1172
- tts.dvae.decoder.decoder_block.11.dwconv
1173
- tts.dvae.decoder.decoder_block.11.norm
1174
- tts.dvae.decoder.decoder_block.11.pwconv1
1175
- tts.dvae.decoder.decoder_block.11.act
1176
- tts.dvae.decoder.decoder_block.11.pwconv2
1177
- tts.dvae.decoder.conv_out
1178
- tts.dvae.out_conv
1179
- tts.dvae.vq_layer
1180
- tts.dvae.vq_layer.quantizer
1181
- tts.dvae.vq_layer.quantizer.rvqs
1182
- tts.dvae.vq_layer.quantizer.rvqs.0
1183
- tts.dvae.vq_layer.quantizer.rvqs.0.project_in
1184
- tts.dvae.vq_layer.quantizer.rvqs.0.project_out
1185
- tts.dvae.vq_layer.quantizer.rvqs.0.layers
1186
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.0
1187
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.0.project_in
1188
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.0.project_out
1189
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.1
1190
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.1.project_in
1191
- tts.dvae.vq_layer.quantizer.rvqs.0.layers.1.project_out
1192
- tts.dvae.vq_layer.quantizer.rvqs.1
1193
- tts.dvae.vq_layer.quantizer.rvqs.1.project_in
1194
- tts.dvae.vq_layer.quantizer.rvqs.1.project_out
1195
- tts.dvae.vq_layer.quantizer.rvqs.1.layers
1196
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.0
1197
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.0.project_in
1198
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.0.project_out
1199
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.1
1200
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.1.project_in
1201
- tts.dvae.vq_layer.quantizer.rvqs.1.layers.1.project_out
1202
- tts.model
1203
- tts.model.embed_tokens
1204
- tts.model.layers
1205
- tts.model.layers.0
1206
- tts.model.layers.0.self_attn
1207
- tts.model.layers.0.self_attn.q_proj
1208
- tts.model.layers.0.self_attn.k_proj
1209
- tts.model.layers.0.self_attn.v_proj
1210
- tts.model.layers.0.self_attn.o_proj
1211
- tts.model.layers.0.mlp
1212
- tts.model.layers.0.mlp.gate_proj
1213
- tts.model.layers.0.mlp.up_proj
1214
- tts.model.layers.0.mlp.down_proj
1215
- tts.model.layers.0.mlp.act_fn
1216
- tts.model.layers.0.input_layernorm
1217
- tts.model.layers.0.post_attention_layernorm
1218
- tts.model.layers.1
1219
- tts.model.layers.1.self_attn
1220
- tts.model.layers.1.self_attn.q_proj
1221
- tts.model.layers.1.self_attn.k_proj
1222
- tts.model.layers.1.self_attn.v_proj
1223
- tts.model.layers.1.self_attn.o_proj
1224
- tts.model.layers.1.mlp
1225
- tts.model.layers.1.mlp.gate_proj
1226
- tts.model.layers.1.mlp.up_proj
1227
- tts.model.layers.1.mlp.down_proj
1228
- tts.model.layers.1.mlp.act_fn
1229
- tts.model.layers.1.input_layernorm
1230
- tts.model.layers.1.post_attention_layernorm
1231
- tts.model.layers.2
1232
- tts.model.layers.2.self_attn
1233
- tts.model.layers.2.self_attn.q_proj
1234
- tts.model.layers.2.self_attn.k_proj
1235
- tts.model.layers.2.self_attn.v_proj
1236
- tts.model.layers.2.self_attn.o_proj
1237
- tts.model.layers.2.mlp
1238
- tts.model.layers.2.mlp.gate_proj
1239
- tts.model.layers.2.mlp.up_proj
1240
- tts.model.layers.2.mlp.down_proj
1241
- tts.model.layers.2.mlp.act_fn
1242
- tts.model.layers.2.input_layernorm
1243
- tts.model.layers.2.post_attention_layernorm
1244
- tts.model.layers.3
1245
- tts.model.layers.3.self_attn
1246
- tts.model.layers.3.self_attn.q_proj
1247
- tts.model.layers.3.self_attn.k_proj
1248
- tts.model.layers.3.self_attn.v_proj
1249
- tts.model.layers.3.self_attn.o_proj
1250
- tts.model.layers.3.mlp
1251
- tts.model.layers.3.mlp.gate_proj
1252
- tts.model.layers.3.mlp.up_proj
1253
- tts.model.layers.3.mlp.down_proj
1254
- tts.model.layers.3.mlp.act_fn
1255
- tts.model.layers.3.input_layernorm
1256
- tts.model.layers.3.post_attention_layernorm
1257
- tts.model.layers.4
1258
- tts.model.layers.4.self_attn
1259
- tts.model.layers.4.self_attn.q_proj
1260
- tts.model.layers.4.self_attn.k_proj
1261
- tts.model.layers.4.self_attn.v_proj
1262
- tts.model.layers.4.self_attn.o_proj
1263
- tts.model.layers.4.mlp
1264
- tts.model.layers.4.mlp.gate_proj
1265
- tts.model.layers.4.mlp.up_proj
1266
- tts.model.layers.4.mlp.down_proj
1267
- tts.model.layers.4.mlp.act_fn
1268
- tts.model.layers.4.input_layernorm
1269
- tts.model.layers.4.post_attention_layernorm
1270
- tts.model.layers.5
1271
- tts.model.layers.5.self_attn
1272
- tts.model.layers.5.self_attn.q_proj
1273
- tts.model.layers.5.self_attn.k_proj
1274
- tts.model.layers.5.self_attn.v_proj
1275
- tts.model.layers.5.self_attn.o_proj
1276
- tts.model.layers.5.mlp
1277
- tts.model.layers.5.mlp.gate_proj
1278
- tts.model.layers.5.mlp.up_proj
1279
- tts.model.layers.5.mlp.down_proj
1280
- tts.model.layers.5.mlp.act_fn
1281
- tts.model.layers.5.input_layernorm
1282
- tts.model.layers.5.post_attention_layernorm
1283
- tts.model.layers.6
1284
- tts.model.layers.6.self_attn
1285
- tts.model.layers.6.self_attn.q_proj
1286
- tts.model.layers.6.self_attn.k_proj
1287
- tts.model.layers.6.self_attn.v_proj
1288
- tts.model.layers.6.self_attn.o_proj
1289
- tts.model.layers.6.mlp
1290
- tts.model.layers.6.mlp.gate_proj
1291
- tts.model.layers.6.mlp.up_proj
1292
- tts.model.layers.6.mlp.down_proj
1293
- tts.model.layers.6.mlp.act_fn
1294
- tts.model.layers.6.input_layernorm
1295
- tts.model.layers.6.post_attention_layernorm
1296
- tts.model.layers.7
1297
- tts.model.layers.7.self_attn
1298
- tts.model.layers.7.self_attn.q_proj
1299
- tts.model.layers.7.self_attn.k_proj
1300
- tts.model.layers.7.self_attn.v_proj
1301
- tts.model.layers.7.self_attn.o_proj
1302
- tts.model.layers.7.mlp
1303
- tts.model.layers.7.mlp.gate_proj
1304
- tts.model.layers.7.mlp.up_proj
1305
- tts.model.layers.7.mlp.down_proj
1306
- tts.model.layers.7.mlp.act_fn
1307
- tts.model.layers.7.input_layernorm
1308
- tts.model.layers.7.post_attention_layernorm
1309
- tts.model.layers.8
1310
- tts.model.layers.8.self_attn
1311
- tts.model.layers.8.self_attn.q_proj
1312
- tts.model.layers.8.self_attn.k_proj
1313
- tts.model.layers.8.self_attn.v_proj
1314
- tts.model.layers.8.self_attn.o_proj
1315
- tts.model.layers.8.mlp
1316
- tts.model.layers.8.mlp.gate_proj
1317
- tts.model.layers.8.mlp.up_proj
1318
- tts.model.layers.8.mlp.down_proj
1319
- tts.model.layers.8.mlp.act_fn
1320
- tts.model.layers.8.input_layernorm
1321
- tts.model.layers.8.post_attention_layernorm
1322
- tts.model.layers.9
1323
- tts.model.layers.9.self_attn
1324
- tts.model.layers.9.self_attn.q_proj
1325
- tts.model.layers.9.self_attn.k_proj
1326
- tts.model.layers.9.self_attn.v_proj
1327
- tts.model.layers.9.self_attn.o_proj
1328
- tts.model.layers.9.mlp
1329
- tts.model.layers.9.mlp.gate_proj
1330
- tts.model.layers.9.mlp.up_proj
1331
- tts.model.layers.9.mlp.down_proj
1332
- tts.model.layers.9.mlp.act_fn
1333
- tts.model.layers.9.input_layernorm
1334
- tts.model.layers.9.post_attention_layernorm
1335
- tts.model.layers.10
1336
- tts.model.layers.10.self_attn
1337
- tts.model.layers.10.self_attn.q_proj
1338
- tts.model.layers.10.self_attn.k_proj
1339
- tts.model.layers.10.self_attn.v_proj
1340
- tts.model.layers.10.self_attn.o_proj
1341
- tts.model.layers.10.mlp
1342
- tts.model.layers.10.mlp.gate_proj
1343
- tts.model.layers.10.mlp.up_proj
1344
- tts.model.layers.10.mlp.down_proj
1345
- tts.model.layers.10.mlp.act_fn
1346
- tts.model.layers.10.input_layernorm
1347
- tts.model.layers.10.post_attention_layernorm
1348
- tts.model.layers.11
1349
- tts.model.layers.11.self_attn
1350
- tts.model.layers.11.self_attn.q_proj
1351
- tts.model.layers.11.self_attn.k_proj
1352
- tts.model.layers.11.self_attn.v_proj
1353
- tts.model.layers.11.self_attn.o_proj
1354
- tts.model.layers.11.mlp
1355
- tts.model.layers.11.mlp.gate_proj
1356
- tts.model.layers.11.mlp.up_proj
1357
- tts.model.layers.11.mlp.down_proj
1358
- tts.model.layers.11.mlp.act_fn
1359
- tts.model.layers.11.input_layernorm
1360
- tts.model.layers.11.post_attention_layernorm
1361
- tts.model.layers.12
1362
- tts.model.layers.12.self_attn
1363
- tts.model.layers.12.self_attn.q_proj
1364
- tts.model.layers.12.self_attn.k_proj
1365
- tts.model.layers.12.self_attn.v_proj
1366
- tts.model.layers.12.self_attn.o_proj
1367
- tts.model.layers.12.mlp
1368
- tts.model.layers.12.mlp.gate_proj
1369
- tts.model.layers.12.mlp.up_proj
1370
- tts.model.layers.12.mlp.down_proj
1371
- tts.model.layers.12.mlp.act_fn
1372
- tts.model.layers.12.input_layernorm
1373
- tts.model.layers.12.post_attention_layernorm
1374
- tts.model.layers.13
1375
- tts.model.layers.13.self_attn
1376
- tts.model.layers.13.self_attn.q_proj
1377
- tts.model.layers.13.self_attn.k_proj
1378
- tts.model.layers.13.self_attn.v_proj
1379
- tts.model.layers.13.self_attn.o_proj
1380
- tts.model.layers.13.mlp
1381
- tts.model.layers.13.mlp.gate_proj
1382
- tts.model.layers.13.mlp.up_proj
1383
- tts.model.layers.13.mlp.down_proj
1384
- tts.model.layers.13.mlp.act_fn
1385
- tts.model.layers.13.input_layernorm
1386
- tts.model.layers.13.post_attention_layernorm
1387
- tts.model.layers.14
1388
- tts.model.layers.14.self_attn
1389
- tts.model.layers.14.self_attn.q_proj
1390
- tts.model.layers.14.self_attn.k_proj
1391
- tts.model.layers.14.self_attn.v_proj
1392
- tts.model.layers.14.self_attn.o_proj
1393
- tts.model.layers.14.mlp
1394
- tts.model.layers.14.mlp.gate_proj
1395
- tts.model.layers.14.mlp.up_proj
1396
- tts.model.layers.14.mlp.down_proj
1397
- tts.model.layers.14.mlp.act_fn
1398
- tts.model.layers.14.input_layernorm
1399
- tts.model.layers.14.post_attention_layernorm
1400
- tts.model.layers.15
1401
- tts.model.layers.15.self_attn
1402
- tts.model.layers.15.self_attn.q_proj
1403
- tts.model.layers.15.self_attn.k_proj
1404
- tts.model.layers.15.self_attn.v_proj
1405
- tts.model.layers.15.self_attn.o_proj
1406
- tts.model.layers.15.mlp
1407
- tts.model.layers.15.mlp.gate_proj
1408
- tts.model.layers.15.mlp.up_proj
1409
- tts.model.layers.15.mlp.down_proj
1410
- tts.model.layers.15.mlp.act_fn
1411
- tts.model.layers.15.input_layernorm
1412
- tts.model.layers.15.post_attention_layernorm
1413
- tts.model.layers.16
1414
- tts.model.layers.16.self_attn
1415
- tts.model.layers.16.self_attn.q_proj
1416
- tts.model.layers.16.self_attn.k_proj
1417
- tts.model.layers.16.self_attn.v_proj
1418
- tts.model.layers.16.self_attn.o_proj
1419
- tts.model.layers.16.mlp
1420
- tts.model.layers.16.mlp.gate_proj
1421
- tts.model.layers.16.mlp.up_proj
1422
- tts.model.layers.16.mlp.down_proj
1423
- tts.model.layers.16.mlp.act_fn
1424
- tts.model.layers.16.input_layernorm
1425
- tts.model.layers.16.post_attention_layernorm
1426
- tts.model.layers.17
1427
- tts.model.layers.17.self_attn
1428
- tts.model.layers.17.self_attn.q_proj
1429
- tts.model.layers.17.self_attn.k_proj
1430
- tts.model.layers.17.self_attn.v_proj
1431
- tts.model.layers.17.self_attn.o_proj
1432
- tts.model.layers.17.mlp
1433
- tts.model.layers.17.mlp.gate_proj
1434
- tts.model.layers.17.mlp.up_proj
1435
- tts.model.layers.17.mlp.down_proj
1436
- tts.model.layers.17.mlp.act_fn
1437
- tts.model.layers.17.input_layernorm
1438
- tts.model.layers.17.post_attention_layernorm
1439
- tts.model.layers.18
1440
- tts.model.layers.18.self_attn
1441
- tts.model.layers.18.self_attn.q_proj
1442
- tts.model.layers.18.self_attn.k_proj
1443
- tts.model.layers.18.self_attn.v_proj
1444
- tts.model.layers.18.self_attn.o_proj
1445
- tts.model.layers.18.mlp
1446
- tts.model.layers.18.mlp.gate_proj
1447
- tts.model.layers.18.mlp.up_proj
1448
- tts.model.layers.18.mlp.down_proj
1449
- tts.model.layers.18.mlp.act_fn
1450
- tts.model.layers.18.input_layernorm
1451
- tts.model.layers.18.post_attention_layernorm
1452
- tts.model.layers.19
1453
- tts.model.layers.19.self_attn
1454
- tts.model.layers.19.self_attn.q_proj
1455
- tts.model.layers.19.self_attn.k_proj
1456
- tts.model.layers.19.self_attn.v_proj
1457
- tts.model.layers.19.self_attn.o_proj
1458
- tts.model.layers.19.mlp
1459
- tts.model.layers.19.mlp.gate_proj
1460
- tts.model.layers.19.mlp.up_proj
1461
- tts.model.layers.19.mlp.down_proj
1462
- tts.model.layers.19.mlp.act_fn
1463
- tts.model.layers.19.input_layernorm
1464
- tts.model.layers.19.post_attention_layernorm
1465
- tts.model.norm
1466
- tts.model.rotary_emb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/paligemma/paligemma-3b.txt DELETED
@@ -1,575 +0,0 @@
1
- vision_tower
2
- vision_tower.vision_model
3
- vision_tower.vision_model.embeddings
4
- vision_tower.vision_model.embeddings.patch_embedding
5
- vision_tower.vision_model.embeddings.position_embedding
6
- vision_tower.vision_model.encoder
7
- vision_tower.vision_model.encoder.layers
8
- vision_tower.vision_model.encoder.layers.0
9
- vision_tower.vision_model.encoder.layers.0.layer_norm1
10
- vision_tower.vision_model.encoder.layers.0.self_attn
11
- vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
12
- vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
13
- vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
14
- vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
15
- vision_tower.vision_model.encoder.layers.0.layer_norm2
16
- vision_tower.vision_model.encoder.layers.0.mlp
17
- vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
18
- vision_tower.vision_model.encoder.layers.0.mlp.fc1
19
- vision_tower.vision_model.encoder.layers.0.mlp.fc2
20
- vision_tower.vision_model.encoder.layers.1
21
- vision_tower.vision_model.encoder.layers.1.layer_norm1
22
- vision_tower.vision_model.encoder.layers.1.self_attn
23
- vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
24
- vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
25
- vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
26
- vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
27
- vision_tower.vision_model.encoder.layers.1.layer_norm2
28
- vision_tower.vision_model.encoder.layers.1.mlp
29
- vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
30
- vision_tower.vision_model.encoder.layers.1.mlp.fc1
31
- vision_tower.vision_model.encoder.layers.1.mlp.fc2
32
- vision_tower.vision_model.encoder.layers.2
33
- vision_tower.vision_model.encoder.layers.2.layer_norm1
34
- vision_tower.vision_model.encoder.layers.2.self_attn
35
- vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
36
- vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
37
- vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
38
- vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
39
- vision_tower.vision_model.encoder.layers.2.layer_norm2
40
- vision_tower.vision_model.encoder.layers.2.mlp
41
- vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
42
- vision_tower.vision_model.encoder.layers.2.mlp.fc1
43
- vision_tower.vision_model.encoder.layers.2.mlp.fc2
44
- vision_tower.vision_model.encoder.layers.3
45
- vision_tower.vision_model.encoder.layers.3.layer_norm1
46
- vision_tower.vision_model.encoder.layers.3.self_attn
47
- vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
48
- vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
49
- vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
50
- vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
51
- vision_tower.vision_model.encoder.layers.3.layer_norm2
52
- vision_tower.vision_model.encoder.layers.3.mlp
53
- vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
54
- vision_tower.vision_model.encoder.layers.3.mlp.fc1
55
- vision_tower.vision_model.encoder.layers.3.mlp.fc2
56
- vision_tower.vision_model.encoder.layers.4
57
- vision_tower.vision_model.encoder.layers.4.layer_norm1
58
- vision_tower.vision_model.encoder.layers.4.self_attn
59
- vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
60
- vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
61
- vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
62
- vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
63
- vision_tower.vision_model.encoder.layers.4.layer_norm2
64
- vision_tower.vision_model.encoder.layers.4.mlp
65
- vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
66
- vision_tower.vision_model.encoder.layers.4.mlp.fc1
67
- vision_tower.vision_model.encoder.layers.4.mlp.fc2
68
- vision_tower.vision_model.encoder.layers.5
69
- vision_tower.vision_model.encoder.layers.5.layer_norm1
70
- vision_tower.vision_model.encoder.layers.5.self_attn
71
- vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
72
- vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
73
- vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
74
- vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
75
- vision_tower.vision_model.encoder.layers.5.layer_norm2
76
- vision_tower.vision_model.encoder.layers.5.mlp
77
- vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
78
- vision_tower.vision_model.encoder.layers.5.mlp.fc1
79
- vision_tower.vision_model.encoder.layers.5.mlp.fc2
80
- vision_tower.vision_model.encoder.layers.6
81
- vision_tower.vision_model.encoder.layers.6.layer_norm1
82
- vision_tower.vision_model.encoder.layers.6.self_attn
83
- vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
84
- vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
85
- vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
86
- vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
87
- vision_tower.vision_model.encoder.layers.6.layer_norm2
88
- vision_tower.vision_model.encoder.layers.6.mlp
89
- vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
90
- vision_tower.vision_model.encoder.layers.6.mlp.fc1
91
- vision_tower.vision_model.encoder.layers.6.mlp.fc2
92
- vision_tower.vision_model.encoder.layers.7
93
- vision_tower.vision_model.encoder.layers.7.layer_norm1
94
- vision_tower.vision_model.encoder.layers.7.self_attn
95
- vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
96
- vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
97
- vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
98
- vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
99
- vision_tower.vision_model.encoder.layers.7.layer_norm2
100
- vision_tower.vision_model.encoder.layers.7.mlp
101
- vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
102
- vision_tower.vision_model.encoder.layers.7.mlp.fc1
103
- vision_tower.vision_model.encoder.layers.7.mlp.fc2
104
- vision_tower.vision_model.encoder.layers.8
105
- vision_tower.vision_model.encoder.layers.8.layer_norm1
106
- vision_tower.vision_model.encoder.layers.8.self_attn
107
- vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
108
- vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
109
- vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
110
- vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
111
- vision_tower.vision_model.encoder.layers.8.layer_norm2
112
- vision_tower.vision_model.encoder.layers.8.mlp
113
- vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
114
- vision_tower.vision_model.encoder.layers.8.mlp.fc1
115
- vision_tower.vision_model.encoder.layers.8.mlp.fc2
116
- vision_tower.vision_model.encoder.layers.9
117
- vision_tower.vision_model.encoder.layers.9.layer_norm1
118
- vision_tower.vision_model.encoder.layers.9.self_attn
119
- vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
120
- vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
121
- vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
122
- vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
123
- vision_tower.vision_model.encoder.layers.9.layer_norm2
124
- vision_tower.vision_model.encoder.layers.9.mlp
125
- vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
126
- vision_tower.vision_model.encoder.layers.9.mlp.fc1
127
- vision_tower.vision_model.encoder.layers.9.mlp.fc2
128
- vision_tower.vision_model.encoder.layers.10
129
- vision_tower.vision_model.encoder.layers.10.layer_norm1
130
- vision_tower.vision_model.encoder.layers.10.self_attn
131
- vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
132
- vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
133
- vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
134
- vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
135
- vision_tower.vision_model.encoder.layers.10.layer_norm2
136
- vision_tower.vision_model.encoder.layers.10.mlp
137
- vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
138
- vision_tower.vision_model.encoder.layers.10.mlp.fc1
139
- vision_tower.vision_model.encoder.layers.10.mlp.fc2
140
- vision_tower.vision_model.encoder.layers.11
141
- vision_tower.vision_model.encoder.layers.11.layer_norm1
142
- vision_tower.vision_model.encoder.layers.11.self_attn
143
- vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
144
- vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
145
- vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
146
- vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
147
- vision_tower.vision_model.encoder.layers.11.layer_norm2
148
- vision_tower.vision_model.encoder.layers.11.mlp
149
- vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
150
- vision_tower.vision_model.encoder.layers.11.mlp.fc1
151
- vision_tower.vision_model.encoder.layers.11.mlp.fc2
152
- vision_tower.vision_model.encoder.layers.12
153
- vision_tower.vision_model.encoder.layers.12.layer_norm1
154
- vision_tower.vision_model.encoder.layers.12.self_attn
155
- vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
156
- vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
157
- vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
158
- vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
159
- vision_tower.vision_model.encoder.layers.12.layer_norm2
160
- vision_tower.vision_model.encoder.layers.12.mlp
161
- vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
162
- vision_tower.vision_model.encoder.layers.12.mlp.fc1
163
- vision_tower.vision_model.encoder.layers.12.mlp.fc2
164
- vision_tower.vision_model.encoder.layers.13
165
- vision_tower.vision_model.encoder.layers.13.layer_norm1
166
- vision_tower.vision_model.encoder.layers.13.self_attn
167
- vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
168
- vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
169
- vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
170
- vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
171
- vision_tower.vision_model.encoder.layers.13.layer_norm2
172
- vision_tower.vision_model.encoder.layers.13.mlp
173
- vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
174
- vision_tower.vision_model.encoder.layers.13.mlp.fc1
175
- vision_tower.vision_model.encoder.layers.13.mlp.fc2
176
- vision_tower.vision_model.encoder.layers.14
177
- vision_tower.vision_model.encoder.layers.14.layer_norm1
178
- vision_tower.vision_model.encoder.layers.14.self_attn
179
- vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
180
- vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
181
- vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
182
- vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
183
- vision_tower.vision_model.encoder.layers.14.layer_norm2
184
- vision_tower.vision_model.encoder.layers.14.mlp
185
- vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
186
- vision_tower.vision_model.encoder.layers.14.mlp.fc1
187
- vision_tower.vision_model.encoder.layers.14.mlp.fc2
188
- vision_tower.vision_model.encoder.layers.15
189
- vision_tower.vision_model.encoder.layers.15.layer_norm1
190
- vision_tower.vision_model.encoder.layers.15.self_attn
191
- vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
192
- vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
193
- vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
194
- vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
195
- vision_tower.vision_model.encoder.layers.15.layer_norm2
196
- vision_tower.vision_model.encoder.layers.15.mlp
197
- vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
198
- vision_tower.vision_model.encoder.layers.15.mlp.fc1
199
- vision_tower.vision_model.encoder.layers.15.mlp.fc2
200
- vision_tower.vision_model.encoder.layers.16
201
- vision_tower.vision_model.encoder.layers.16.layer_norm1
202
- vision_tower.vision_model.encoder.layers.16.self_attn
203
- vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
204
- vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
205
- vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
206
- vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
207
- vision_tower.vision_model.encoder.layers.16.layer_norm2
208
- vision_tower.vision_model.encoder.layers.16.mlp
209
- vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
210
- vision_tower.vision_model.encoder.layers.16.mlp.fc1
211
- vision_tower.vision_model.encoder.layers.16.mlp.fc2
212
- vision_tower.vision_model.encoder.layers.17
213
- vision_tower.vision_model.encoder.layers.17.layer_norm1
214
- vision_tower.vision_model.encoder.layers.17.self_attn
215
- vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
216
- vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
217
- vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
218
- vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
219
- vision_tower.vision_model.encoder.layers.17.layer_norm2
220
- vision_tower.vision_model.encoder.layers.17.mlp
221
- vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
222
- vision_tower.vision_model.encoder.layers.17.mlp.fc1
223
- vision_tower.vision_model.encoder.layers.17.mlp.fc2
224
- vision_tower.vision_model.encoder.layers.18
225
- vision_tower.vision_model.encoder.layers.18.layer_norm1
226
- vision_tower.vision_model.encoder.layers.18.self_attn
227
- vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
228
- vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
229
- vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
230
- vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
231
- vision_tower.vision_model.encoder.layers.18.layer_norm2
232
- vision_tower.vision_model.encoder.layers.18.mlp
233
- vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
234
- vision_tower.vision_model.encoder.layers.18.mlp.fc1
235
- vision_tower.vision_model.encoder.layers.18.mlp.fc2
236
- vision_tower.vision_model.encoder.layers.19
237
- vision_tower.vision_model.encoder.layers.19.layer_norm1
238
- vision_tower.vision_model.encoder.layers.19.self_attn
239
- vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
240
- vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
241
- vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
242
- vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
243
- vision_tower.vision_model.encoder.layers.19.layer_norm2
244
- vision_tower.vision_model.encoder.layers.19.mlp
245
- vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
246
- vision_tower.vision_model.encoder.layers.19.mlp.fc1
247
- vision_tower.vision_model.encoder.layers.19.mlp.fc2
248
- vision_tower.vision_model.encoder.layers.20
249
- vision_tower.vision_model.encoder.layers.20.layer_norm1
250
- vision_tower.vision_model.encoder.layers.20.self_attn
251
- vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
252
- vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
253
- vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
254
- vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
255
- vision_tower.vision_model.encoder.layers.20.layer_norm2
256
- vision_tower.vision_model.encoder.layers.20.mlp
257
- vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
258
- vision_tower.vision_model.encoder.layers.20.mlp.fc1
259
- vision_tower.vision_model.encoder.layers.20.mlp.fc2
260
- vision_tower.vision_model.encoder.layers.21
261
- vision_tower.vision_model.encoder.layers.21.layer_norm1
262
- vision_tower.vision_model.encoder.layers.21.self_attn
263
- vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
264
- vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
265
- vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
266
- vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
267
- vision_tower.vision_model.encoder.layers.21.layer_norm2
268
- vision_tower.vision_model.encoder.layers.21.mlp
269
- vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
270
- vision_tower.vision_model.encoder.layers.21.mlp.fc1
271
- vision_tower.vision_model.encoder.layers.21.mlp.fc2
272
- vision_tower.vision_model.encoder.layers.22
273
- vision_tower.vision_model.encoder.layers.22.layer_norm1
274
- vision_tower.vision_model.encoder.layers.22.self_attn
275
- vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
276
- vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
277
- vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
278
- vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
279
- vision_tower.vision_model.encoder.layers.22.layer_norm2
280
- vision_tower.vision_model.encoder.layers.22.mlp
281
- vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
282
- vision_tower.vision_model.encoder.layers.22.mlp.fc1
283
- vision_tower.vision_model.encoder.layers.22.mlp.fc2
284
- vision_tower.vision_model.encoder.layers.23
285
- vision_tower.vision_model.encoder.layers.23.layer_norm1
286
- vision_tower.vision_model.encoder.layers.23.self_attn
287
- vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
288
- vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
289
- vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
290
- vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
291
- vision_tower.vision_model.encoder.layers.23.layer_norm2
292
- vision_tower.vision_model.encoder.layers.23.mlp
293
- vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
294
- vision_tower.vision_model.encoder.layers.23.mlp.fc1
295
- vision_tower.vision_model.encoder.layers.23.mlp.fc2
296
- vision_tower.vision_model.encoder.layers.24
297
- vision_tower.vision_model.encoder.layers.24.layer_norm1
298
- vision_tower.vision_model.encoder.layers.24.self_attn
299
- vision_tower.vision_model.encoder.layers.24.self_attn.k_proj
300
- vision_tower.vision_model.encoder.layers.24.self_attn.v_proj
301
- vision_tower.vision_model.encoder.layers.24.self_attn.q_proj
302
- vision_tower.vision_model.encoder.layers.24.self_attn.out_proj
303
- vision_tower.vision_model.encoder.layers.24.layer_norm2
304
- vision_tower.vision_model.encoder.layers.24.mlp
305
- vision_tower.vision_model.encoder.layers.24.mlp.activation_fn
306
- vision_tower.vision_model.encoder.layers.24.mlp.fc1
307
- vision_tower.vision_model.encoder.layers.24.mlp.fc2
308
- vision_tower.vision_model.encoder.layers.25
309
- vision_tower.vision_model.encoder.layers.25.layer_norm1
310
- vision_tower.vision_model.encoder.layers.25.self_attn
311
- vision_tower.vision_model.encoder.layers.25.self_attn.k_proj
312
- vision_tower.vision_model.encoder.layers.25.self_attn.v_proj
313
- vision_tower.vision_model.encoder.layers.25.self_attn.q_proj
314
- vision_tower.vision_model.encoder.layers.25.self_attn.out_proj
315
- vision_tower.vision_model.encoder.layers.25.layer_norm2
316
- vision_tower.vision_model.encoder.layers.25.mlp
317
- vision_tower.vision_model.encoder.layers.25.mlp.activation_fn
318
- vision_tower.vision_model.encoder.layers.25.mlp.fc1
319
- vision_tower.vision_model.encoder.layers.25.mlp.fc2
320
- vision_tower.vision_model.encoder.layers.26
321
- vision_tower.vision_model.encoder.layers.26.layer_norm1
322
- vision_tower.vision_model.encoder.layers.26.self_attn
323
- vision_tower.vision_model.encoder.layers.26.self_attn.k_proj
324
- vision_tower.vision_model.encoder.layers.26.self_attn.v_proj
325
- vision_tower.vision_model.encoder.layers.26.self_attn.q_proj
326
- vision_tower.vision_model.encoder.layers.26.self_attn.out_proj
327
- vision_tower.vision_model.encoder.layers.26.layer_norm2
328
- vision_tower.vision_model.encoder.layers.26.mlp
329
- vision_tower.vision_model.encoder.layers.26.mlp.activation_fn
330
- vision_tower.vision_model.encoder.layers.26.mlp.fc1
331
- vision_tower.vision_model.encoder.layers.26.mlp.fc2
332
- vision_tower.vision_model.post_layernorm
333
- multi_modal_projector
334
- multi_modal_projector.linear
335
- language_model
336
- language_model.model
337
- language_model.model.embed_tokens
338
- language_model.model.layers
339
- language_model.model.layers.0
340
- language_model.model.layers.0.self_attn
341
- language_model.model.layers.0.self_attn.q_proj
342
- language_model.model.layers.0.self_attn.k_proj
343
- language_model.model.layers.0.self_attn.v_proj
344
- language_model.model.layers.0.self_attn.o_proj
345
- language_model.model.layers.0.mlp
346
- language_model.model.layers.0.mlp.gate_proj
347
- language_model.model.layers.0.mlp.up_proj
348
- language_model.model.layers.0.mlp.down_proj
349
- language_model.model.layers.0.mlp.act_fn
350
- language_model.model.layers.0.input_layernorm
351
- language_model.model.layers.0.post_attention_layernorm
352
- language_model.model.layers.1
353
- language_model.model.layers.1.self_attn
354
- language_model.model.layers.1.self_attn.q_proj
355
- language_model.model.layers.1.self_attn.k_proj
356
- language_model.model.layers.1.self_attn.v_proj
357
- language_model.model.layers.1.self_attn.o_proj
358
- language_model.model.layers.1.mlp
359
- language_model.model.layers.1.mlp.gate_proj
360
- language_model.model.layers.1.mlp.up_proj
361
- language_model.model.layers.1.mlp.down_proj
362
- language_model.model.layers.1.mlp.act_fn
363
- language_model.model.layers.1.input_layernorm
364
- language_model.model.layers.1.post_attention_layernorm
365
- language_model.model.layers.2
366
- language_model.model.layers.2.self_attn
367
- language_model.model.layers.2.self_attn.q_proj
368
- language_model.model.layers.2.self_attn.k_proj
369
- language_model.model.layers.2.self_attn.v_proj
370
- language_model.model.layers.2.self_attn.o_proj
371
- language_model.model.layers.2.mlp
372
- language_model.model.layers.2.mlp.gate_proj
373
- language_model.model.layers.2.mlp.up_proj
374
- language_model.model.layers.2.mlp.down_proj
375
- language_model.model.layers.2.mlp.act_fn
376
- language_model.model.layers.2.input_layernorm
377
- language_model.model.layers.2.post_attention_layernorm
378
- language_model.model.layers.3
379
- language_model.model.layers.3.self_attn
380
- language_model.model.layers.3.self_attn.q_proj
381
- language_model.model.layers.3.self_attn.k_proj
382
- language_model.model.layers.3.self_attn.v_proj
383
- language_model.model.layers.3.self_attn.o_proj
384
- language_model.model.layers.3.mlp
385
- language_model.model.layers.3.mlp.gate_proj
386
- language_model.model.layers.3.mlp.up_proj
387
- language_model.model.layers.3.mlp.down_proj
388
- language_model.model.layers.3.mlp.act_fn
389
- language_model.model.layers.3.input_layernorm
390
- language_model.model.layers.3.post_attention_layernorm
391
- language_model.model.layers.4
392
- language_model.model.layers.4.self_attn
393
- language_model.model.layers.4.self_attn.q_proj
394
- language_model.model.layers.4.self_attn.k_proj
395
- language_model.model.layers.4.self_attn.v_proj
396
- language_model.model.layers.4.self_attn.o_proj
397
- language_model.model.layers.4.mlp
398
- language_model.model.layers.4.mlp.gate_proj
399
- language_model.model.layers.4.mlp.up_proj
400
- language_model.model.layers.4.mlp.down_proj
401
- language_model.model.layers.4.mlp.act_fn
402
- language_model.model.layers.4.input_layernorm
403
- language_model.model.layers.4.post_attention_layernorm
404
- language_model.model.layers.5
405
- language_model.model.layers.5.self_attn
406
- language_model.model.layers.5.self_attn.q_proj
407
- language_model.model.layers.5.self_attn.k_proj
408
- language_model.model.layers.5.self_attn.v_proj
409
- language_model.model.layers.5.self_attn.o_proj
410
- language_model.model.layers.5.mlp
411
- language_model.model.layers.5.mlp.gate_proj
412
- language_model.model.layers.5.mlp.up_proj
413
- language_model.model.layers.5.mlp.down_proj
414
- language_model.model.layers.5.mlp.act_fn
415
- language_model.model.layers.5.input_layernorm
416
- language_model.model.layers.5.post_attention_layernorm
417
- language_model.model.layers.6
418
- language_model.model.layers.6.self_attn
419
- language_model.model.layers.6.self_attn.q_proj
420
- language_model.model.layers.6.self_attn.k_proj
421
- language_model.model.layers.6.self_attn.v_proj
422
- language_model.model.layers.6.self_attn.o_proj
423
- language_model.model.layers.6.mlp
424
- language_model.model.layers.6.mlp.gate_proj
425
- language_model.model.layers.6.mlp.up_proj
426
- language_model.model.layers.6.mlp.down_proj
427
- language_model.model.layers.6.mlp.act_fn
428
- language_model.model.layers.6.input_layernorm
429
- language_model.model.layers.6.post_attention_layernorm
430
- language_model.model.layers.7
431
- language_model.model.layers.7.self_attn
432
- language_model.model.layers.7.self_attn.q_proj
433
- language_model.model.layers.7.self_attn.k_proj
434
- language_model.model.layers.7.self_attn.v_proj
435
- language_model.model.layers.7.self_attn.o_proj
436
- language_model.model.layers.7.mlp
437
- language_model.model.layers.7.mlp.gate_proj
438
- language_model.model.layers.7.mlp.up_proj
439
- language_model.model.layers.7.mlp.down_proj
440
- language_model.model.layers.7.mlp.act_fn
441
- language_model.model.layers.7.input_layernorm
442
- language_model.model.layers.7.post_attention_layernorm
443
- language_model.model.layers.8
444
- language_model.model.layers.8.self_attn
445
- language_model.model.layers.8.self_attn.q_proj
446
- language_model.model.layers.8.self_attn.k_proj
447
- language_model.model.layers.8.self_attn.v_proj
448
- language_model.model.layers.8.self_attn.o_proj
449
- language_model.model.layers.8.mlp
450
- language_model.model.layers.8.mlp.gate_proj
451
- language_model.model.layers.8.mlp.up_proj
452
- language_model.model.layers.8.mlp.down_proj
453
- language_model.model.layers.8.mlp.act_fn
454
- language_model.model.layers.8.input_layernorm
455
- language_model.model.layers.8.post_attention_layernorm
456
- language_model.model.layers.9
457
- language_model.model.layers.9.self_attn
458
- language_model.model.layers.9.self_attn.q_proj
459
- language_model.model.layers.9.self_attn.k_proj
460
- language_model.model.layers.9.self_attn.v_proj
461
- language_model.model.layers.9.self_attn.o_proj
462
- language_model.model.layers.9.mlp
463
- language_model.model.layers.9.mlp.gate_proj
464
- language_model.model.layers.9.mlp.up_proj
465
- language_model.model.layers.9.mlp.down_proj
466
- language_model.model.layers.9.mlp.act_fn
467
- language_model.model.layers.9.input_layernorm
468
- language_model.model.layers.9.post_attention_layernorm
469
- language_model.model.layers.10
470
- language_model.model.layers.10.self_attn
471
- language_model.model.layers.10.self_attn.q_proj
472
- language_model.model.layers.10.self_attn.k_proj
473
- language_model.model.layers.10.self_attn.v_proj
474
- language_model.model.layers.10.self_attn.o_proj
475
- language_model.model.layers.10.mlp
476
- language_model.model.layers.10.mlp.gate_proj
477
- language_model.model.layers.10.mlp.up_proj
478
- language_model.model.layers.10.mlp.down_proj
479
- language_model.model.layers.10.mlp.act_fn
480
- language_model.model.layers.10.input_layernorm
481
- language_model.model.layers.10.post_attention_layernorm
482
- language_model.model.layers.11
483
- language_model.model.layers.11.self_attn
484
- language_model.model.layers.11.self_attn.q_proj
485
- language_model.model.layers.11.self_attn.k_proj
486
- language_model.model.layers.11.self_attn.v_proj
487
- language_model.model.layers.11.self_attn.o_proj
488
- language_model.model.layers.11.mlp
489
- language_model.model.layers.11.mlp.gate_proj
490
- language_model.model.layers.11.mlp.up_proj
491
- language_model.model.layers.11.mlp.down_proj
492
- language_model.model.layers.11.mlp.act_fn
493
- language_model.model.layers.11.input_layernorm
494
- language_model.model.layers.11.post_attention_layernorm
495
- language_model.model.layers.12
496
- language_model.model.layers.12.self_attn
497
- language_model.model.layers.12.self_attn.q_proj
498
- language_model.model.layers.12.self_attn.k_proj
499
- language_model.model.layers.12.self_attn.v_proj
500
- language_model.model.layers.12.self_attn.o_proj
501
- language_model.model.layers.12.mlp
502
- language_model.model.layers.12.mlp.gate_proj
503
- language_model.model.layers.12.mlp.up_proj
504
- language_model.model.layers.12.mlp.down_proj
505
- language_model.model.layers.12.mlp.act_fn
506
- language_model.model.layers.12.input_layernorm
507
- language_model.model.layers.12.post_attention_layernorm
508
- language_model.model.layers.13
509
- language_model.model.layers.13.self_attn
510
- language_model.model.layers.13.self_attn.q_proj
511
- language_model.model.layers.13.self_attn.k_proj
512
- language_model.model.layers.13.self_attn.v_proj
513
- language_model.model.layers.13.self_attn.o_proj
514
- language_model.model.layers.13.mlp
515
- language_model.model.layers.13.mlp.gate_proj
516
- language_model.model.layers.13.mlp.up_proj
517
- language_model.model.layers.13.mlp.down_proj
518
- language_model.model.layers.13.mlp.act_fn
519
- language_model.model.layers.13.input_layernorm
520
- language_model.model.layers.13.post_attention_layernorm
521
- language_model.model.layers.14
522
- language_model.model.layers.14.self_attn
523
- language_model.model.layers.14.self_attn.q_proj
524
- language_model.model.layers.14.self_attn.k_proj
525
- language_model.model.layers.14.self_attn.v_proj
526
- language_model.model.layers.14.self_attn.o_proj
527
- language_model.model.layers.14.mlp
528
- language_model.model.layers.14.mlp.gate_proj
529
- language_model.model.layers.14.mlp.up_proj
530
- language_model.model.layers.14.mlp.down_proj
531
- language_model.model.layers.14.mlp.act_fn
532
- language_model.model.layers.14.input_layernorm
533
- language_model.model.layers.14.post_attention_layernorm
534
- language_model.model.layers.15
535
- language_model.model.layers.15.self_attn
536
- language_model.model.layers.15.self_attn.q_proj
537
- language_model.model.layers.15.self_attn.k_proj
538
- language_model.model.layers.15.self_attn.v_proj
539
- language_model.model.layers.15.self_attn.o_proj
540
- language_model.model.layers.15.mlp
541
- language_model.model.layers.15.mlp.gate_proj
542
- language_model.model.layers.15.mlp.up_proj
543
- language_model.model.layers.15.mlp.down_proj
544
- language_model.model.layers.15.mlp.act_fn
545
- language_model.model.layers.15.input_layernorm
546
- language_model.model.layers.15.post_attention_layernorm
547
- language_model.model.layers.16
548
- language_model.model.layers.16.self_attn
549
- language_model.model.layers.16.self_attn.q_proj
550
- language_model.model.layers.16.self_attn.k_proj
551
- language_model.model.layers.16.self_attn.v_proj
552
- language_model.model.layers.16.self_attn.o_proj
553
- language_model.model.layers.16.mlp
554
- language_model.model.layers.16.mlp.gate_proj
555
- language_model.model.layers.16.mlp.up_proj
556
- language_model.model.layers.16.mlp.down_proj
557
- language_model.model.layers.16.mlp.act_fn
558
- language_model.model.layers.16.input_layernorm
559
- language_model.model.layers.16.post_attention_layernorm
560
- language_model.model.layers.17
561
- language_model.model.layers.17.self_attn
562
- language_model.model.layers.17.self_attn.q_proj
563
- language_model.model.layers.17.self_attn.k_proj
564
- language_model.model.layers.17.self_attn.v_proj
565
- language_model.model.layers.17.self_attn.o_proj
566
- language_model.model.layers.17.mlp
567
- language_model.model.layers.17.mlp.gate_proj
568
- language_model.model.layers.17.mlp.up_proj
569
- language_model.model.layers.17.mlp.down_proj
570
- language_model.model.layers.17.mlp.act_fn
571
- language_model.model.layers.17.input_layernorm
572
- language_model.model.layers.17.post_attention_layernorm
573
- language_model.model.norm
574
- language_model.model.rotary_emb
575
- language_model.lm_head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/wonderwind271/MiniCPM-V-2.txt DELETED
@@ -1,1133 +0,0 @@
1
-
2
- llm
3
- llm.model
4
- llm.model.embed_tokens
5
- llm.model.layers
6
- llm.model.layers.0
7
- llm.model.layers.0.self_attn
8
- llm.model.layers.0.self_attn.q_proj
9
- llm.model.layers.0.self_attn.k_proj
10
- llm.model.layers.0.self_attn.v_proj
11
- llm.model.layers.0.self_attn.o_proj
12
- llm.model.layers.0.self_attn.rotary_emb
13
- llm.model.layers.0.mlp
14
- llm.model.layers.0.mlp.gate_proj
15
- llm.model.layers.0.mlp.up_proj
16
- llm.model.layers.0.mlp.down_proj
17
- llm.model.layers.0.mlp.act_fn
18
- llm.model.layers.0.input_layernorm
19
- llm.model.layers.0.post_attention_layernorm
20
- llm.model.layers.1
21
- llm.model.layers.1.self_attn
22
- llm.model.layers.1.self_attn.q_proj
23
- llm.model.layers.1.self_attn.k_proj
24
- llm.model.layers.1.self_attn.v_proj
25
- llm.model.layers.1.self_attn.o_proj
26
- llm.model.layers.1.self_attn.rotary_emb
27
- llm.model.layers.1.mlp
28
- llm.model.layers.1.mlp.gate_proj
29
- llm.model.layers.1.mlp.up_proj
30
- llm.model.layers.1.mlp.down_proj
31
- llm.model.layers.1.mlp.act_fn
32
- llm.model.layers.1.input_layernorm
33
- llm.model.layers.1.post_attention_layernorm
34
- llm.model.layers.2
35
- llm.model.layers.2.self_attn
36
- llm.model.layers.2.self_attn.q_proj
37
- llm.model.layers.2.self_attn.k_proj
38
- llm.model.layers.2.self_attn.v_proj
39
- llm.model.layers.2.self_attn.o_proj
40
- llm.model.layers.2.self_attn.rotary_emb
41
- llm.model.layers.2.mlp
42
- llm.model.layers.2.mlp.gate_proj
43
- llm.model.layers.2.mlp.up_proj
44
- llm.model.layers.2.mlp.down_proj
45
- llm.model.layers.2.mlp.act_fn
46
- llm.model.layers.2.input_layernorm
47
- llm.model.layers.2.post_attention_layernorm
48
- llm.model.layers.3
49
- llm.model.layers.3.self_attn
50
- llm.model.layers.3.self_attn.q_proj
51
- llm.model.layers.3.self_attn.k_proj
52
- llm.model.layers.3.self_attn.v_proj
53
- llm.model.layers.3.self_attn.o_proj
54
- llm.model.layers.3.self_attn.rotary_emb
55
- llm.model.layers.3.mlp
56
- llm.model.layers.3.mlp.gate_proj
57
- llm.model.layers.3.mlp.up_proj
58
- llm.model.layers.3.mlp.down_proj
59
- llm.model.layers.3.mlp.act_fn
60
- llm.model.layers.3.input_layernorm
61
- llm.model.layers.3.post_attention_layernorm
62
- llm.model.layers.4
63
- llm.model.layers.4.self_attn
64
- llm.model.layers.4.self_attn.q_proj
65
- llm.model.layers.4.self_attn.k_proj
66
- llm.model.layers.4.self_attn.v_proj
67
- llm.model.layers.4.self_attn.o_proj
68
- llm.model.layers.4.self_attn.rotary_emb
69
- llm.model.layers.4.mlp
70
- llm.model.layers.4.mlp.gate_proj
71
- llm.model.layers.4.mlp.up_proj
72
- llm.model.layers.4.mlp.down_proj
73
- llm.model.layers.4.mlp.act_fn
74
- llm.model.layers.4.input_layernorm
75
- llm.model.layers.4.post_attention_layernorm
76
- llm.model.layers.5
77
- llm.model.layers.5.self_attn
78
- llm.model.layers.5.self_attn.q_proj
79
- llm.model.layers.5.self_attn.k_proj
80
- llm.model.layers.5.self_attn.v_proj
81
- llm.model.layers.5.self_attn.o_proj
82
- llm.model.layers.5.self_attn.rotary_emb
83
- llm.model.layers.5.mlp
84
- llm.model.layers.5.mlp.gate_proj
85
- llm.model.layers.5.mlp.up_proj
86
- llm.model.layers.5.mlp.down_proj
87
- llm.model.layers.5.mlp.act_fn
88
- llm.model.layers.5.input_layernorm
89
- llm.model.layers.5.post_attention_layernorm
90
- llm.model.layers.6
91
- llm.model.layers.6.self_attn
92
- llm.model.layers.6.self_attn.q_proj
93
- llm.model.layers.6.self_attn.k_proj
94
- llm.model.layers.6.self_attn.v_proj
95
- llm.model.layers.6.self_attn.o_proj
96
- llm.model.layers.6.self_attn.rotary_emb
97
- llm.model.layers.6.mlp
98
- llm.model.layers.6.mlp.gate_proj
99
- llm.model.layers.6.mlp.up_proj
100
- llm.model.layers.6.mlp.down_proj
101
- llm.model.layers.6.mlp.act_fn
102
- llm.model.layers.6.input_layernorm
103
- llm.model.layers.6.post_attention_layernorm
104
- llm.model.layers.7
105
- llm.model.layers.7.self_attn
106
- llm.model.layers.7.self_attn.q_proj
107
- llm.model.layers.7.self_attn.k_proj
108
- llm.model.layers.7.self_attn.v_proj
109
- llm.model.layers.7.self_attn.o_proj
110
- llm.model.layers.7.self_attn.rotary_emb
111
- llm.model.layers.7.mlp
112
- llm.model.layers.7.mlp.gate_proj
113
- llm.model.layers.7.mlp.up_proj
114
- llm.model.layers.7.mlp.down_proj
115
- llm.model.layers.7.mlp.act_fn
116
- llm.model.layers.7.input_layernorm
117
- llm.model.layers.7.post_attention_layernorm
118
- llm.model.layers.8
119
- llm.model.layers.8.self_attn
120
- llm.model.layers.8.self_attn.q_proj
121
- llm.model.layers.8.self_attn.k_proj
122
- llm.model.layers.8.self_attn.v_proj
123
- llm.model.layers.8.self_attn.o_proj
124
- llm.model.layers.8.self_attn.rotary_emb
125
- llm.model.layers.8.mlp
126
- llm.model.layers.8.mlp.gate_proj
127
- llm.model.layers.8.mlp.up_proj
128
- llm.model.layers.8.mlp.down_proj
129
- llm.model.layers.8.mlp.act_fn
130
- llm.model.layers.8.input_layernorm
131
- llm.model.layers.8.post_attention_layernorm
132
- llm.model.layers.9
133
- llm.model.layers.9.self_attn
134
- llm.model.layers.9.self_attn.q_proj
135
- llm.model.layers.9.self_attn.k_proj
136
- llm.model.layers.9.self_attn.v_proj
137
- llm.model.layers.9.self_attn.o_proj
138
- llm.model.layers.9.self_attn.rotary_emb
139
- llm.model.layers.9.mlp
140
- llm.model.layers.9.mlp.gate_proj
141
- llm.model.layers.9.mlp.up_proj
142
- llm.model.layers.9.mlp.down_proj
143
- llm.model.layers.9.mlp.act_fn
144
- llm.model.layers.9.input_layernorm
145
- llm.model.layers.9.post_attention_layernorm
146
- llm.model.layers.10
147
- llm.model.layers.10.self_attn
148
- llm.model.layers.10.self_attn.q_proj
149
- llm.model.layers.10.self_attn.k_proj
150
- llm.model.layers.10.self_attn.v_proj
151
- llm.model.layers.10.self_attn.o_proj
152
- llm.model.layers.10.self_attn.rotary_emb
153
- llm.model.layers.10.mlp
154
- llm.model.layers.10.mlp.gate_proj
155
- llm.model.layers.10.mlp.up_proj
156
- llm.model.layers.10.mlp.down_proj
157
- llm.model.layers.10.mlp.act_fn
158
- llm.model.layers.10.input_layernorm
159
- llm.model.layers.10.post_attention_layernorm
160
- llm.model.layers.11
161
- llm.model.layers.11.self_attn
162
- llm.model.layers.11.self_attn.q_proj
163
- llm.model.layers.11.self_attn.k_proj
164
- llm.model.layers.11.self_attn.v_proj
165
- llm.model.layers.11.self_attn.o_proj
166
- llm.model.layers.11.self_attn.rotary_emb
167
- llm.model.layers.11.mlp
168
- llm.model.layers.11.mlp.gate_proj
169
- llm.model.layers.11.mlp.up_proj
170
- llm.model.layers.11.mlp.down_proj
171
- llm.model.layers.11.mlp.act_fn
172
- llm.model.layers.11.input_layernorm
173
- llm.model.layers.11.post_attention_layernorm
174
- llm.model.layers.12
175
- llm.model.layers.12.self_attn
176
- llm.model.layers.12.self_attn.q_proj
177
- llm.model.layers.12.self_attn.k_proj
178
- llm.model.layers.12.self_attn.v_proj
179
- llm.model.layers.12.self_attn.o_proj
180
- llm.model.layers.12.self_attn.rotary_emb
181
- llm.model.layers.12.mlp
182
- llm.model.layers.12.mlp.gate_proj
183
- llm.model.layers.12.mlp.up_proj
184
- llm.model.layers.12.mlp.down_proj
185
- llm.model.layers.12.mlp.act_fn
186
- llm.model.layers.12.input_layernorm
187
- llm.model.layers.12.post_attention_layernorm
188
- llm.model.layers.13
189
- llm.model.layers.13.self_attn
190
- llm.model.layers.13.self_attn.q_proj
191
- llm.model.layers.13.self_attn.k_proj
192
- llm.model.layers.13.self_attn.v_proj
193
- llm.model.layers.13.self_attn.o_proj
194
- llm.model.layers.13.self_attn.rotary_emb
195
- llm.model.layers.13.mlp
196
- llm.model.layers.13.mlp.gate_proj
197
- llm.model.layers.13.mlp.up_proj
198
- llm.model.layers.13.mlp.down_proj
199
- llm.model.layers.13.mlp.act_fn
200
- llm.model.layers.13.input_layernorm
201
- llm.model.layers.13.post_attention_layernorm
202
- llm.model.layers.14
203
- llm.model.layers.14.self_attn
204
- llm.model.layers.14.self_attn.q_proj
205
- llm.model.layers.14.self_attn.k_proj
206
- llm.model.layers.14.self_attn.v_proj
207
- llm.model.layers.14.self_attn.o_proj
208
- llm.model.layers.14.self_attn.rotary_emb
209
- llm.model.layers.14.mlp
210
- llm.model.layers.14.mlp.gate_proj
211
- llm.model.layers.14.mlp.up_proj
212
- llm.model.layers.14.mlp.down_proj
213
- llm.model.layers.14.mlp.act_fn
214
- llm.model.layers.14.input_layernorm
215
- llm.model.layers.14.post_attention_layernorm
216
- llm.model.layers.15
217
- llm.model.layers.15.self_attn
218
- llm.model.layers.15.self_attn.q_proj
219
- llm.model.layers.15.self_attn.k_proj
220
- llm.model.layers.15.self_attn.v_proj
221
- llm.model.layers.15.self_attn.o_proj
222
- llm.model.layers.15.self_attn.rotary_emb
223
- llm.model.layers.15.mlp
224
- llm.model.layers.15.mlp.gate_proj
225
- llm.model.layers.15.mlp.up_proj
226
- llm.model.layers.15.mlp.down_proj
227
- llm.model.layers.15.mlp.act_fn
228
- llm.model.layers.15.input_layernorm
229
- llm.model.layers.15.post_attention_layernorm
230
- llm.model.layers.16
231
- llm.model.layers.16.self_attn
232
- llm.model.layers.16.self_attn.q_proj
233
- llm.model.layers.16.self_attn.k_proj
234
- llm.model.layers.16.self_attn.v_proj
235
- llm.model.layers.16.self_attn.o_proj
236
- llm.model.layers.16.self_attn.rotary_emb
237
- llm.model.layers.16.mlp
238
- llm.model.layers.16.mlp.gate_proj
239
- llm.model.layers.16.mlp.up_proj
240
- llm.model.layers.16.mlp.down_proj
241
- llm.model.layers.16.mlp.act_fn
242
- llm.model.layers.16.input_layernorm
243
- llm.model.layers.16.post_attention_layernorm
244
- llm.model.layers.17
245
- llm.model.layers.17.self_attn
246
- llm.model.layers.17.self_attn.q_proj
247
- llm.model.layers.17.self_attn.k_proj
248
- llm.model.layers.17.self_attn.v_proj
249
- llm.model.layers.17.self_attn.o_proj
250
- llm.model.layers.17.self_attn.rotary_emb
251
- llm.model.layers.17.mlp
252
- llm.model.layers.17.mlp.gate_proj
253
- llm.model.layers.17.mlp.up_proj
254
- llm.model.layers.17.mlp.down_proj
255
- llm.model.layers.17.mlp.act_fn
256
- llm.model.layers.17.input_layernorm
257
- llm.model.layers.17.post_attention_layernorm
258
- llm.model.layers.18
259
- llm.model.layers.18.self_attn
260
- llm.model.layers.18.self_attn.q_proj
261
- llm.model.layers.18.self_attn.k_proj
262
- llm.model.layers.18.self_attn.v_proj
263
- llm.model.layers.18.self_attn.o_proj
264
- llm.model.layers.18.self_attn.rotary_emb
265
- llm.model.layers.18.mlp
266
- llm.model.layers.18.mlp.gate_proj
267
- llm.model.layers.18.mlp.up_proj
268
- llm.model.layers.18.mlp.down_proj
269
- llm.model.layers.18.mlp.act_fn
270
- llm.model.layers.18.input_layernorm
271
- llm.model.layers.18.post_attention_layernorm
272
- llm.model.layers.19
273
- llm.model.layers.19.self_attn
274
- llm.model.layers.19.self_attn.q_proj
275
- llm.model.layers.19.self_attn.k_proj
276
- llm.model.layers.19.self_attn.v_proj
277
- llm.model.layers.19.self_attn.o_proj
278
- llm.model.layers.19.self_attn.rotary_emb
279
- llm.model.layers.19.mlp
280
- llm.model.layers.19.mlp.gate_proj
281
- llm.model.layers.19.mlp.up_proj
282
- llm.model.layers.19.mlp.down_proj
283
- llm.model.layers.19.mlp.act_fn
284
- llm.model.layers.19.input_layernorm
285
- llm.model.layers.19.post_attention_layernorm
286
- llm.model.layers.20
287
- llm.model.layers.20.self_attn
288
- llm.model.layers.20.self_attn.q_proj
289
- llm.model.layers.20.self_attn.k_proj
290
- llm.model.layers.20.self_attn.v_proj
291
- llm.model.layers.20.self_attn.o_proj
292
- llm.model.layers.20.self_attn.rotary_emb
293
- llm.model.layers.20.mlp
294
- llm.model.layers.20.mlp.gate_proj
295
- llm.model.layers.20.mlp.up_proj
296
- llm.model.layers.20.mlp.down_proj
297
- llm.model.layers.20.mlp.act_fn
298
- llm.model.layers.20.input_layernorm
299
- llm.model.layers.20.post_attention_layernorm
300
- llm.model.layers.21
301
- llm.model.layers.21.self_attn
302
- llm.model.layers.21.self_attn.q_proj
303
- llm.model.layers.21.self_attn.k_proj
304
- llm.model.layers.21.self_attn.v_proj
305
- llm.model.layers.21.self_attn.o_proj
306
- llm.model.layers.21.self_attn.rotary_emb
307
- llm.model.layers.21.mlp
308
- llm.model.layers.21.mlp.gate_proj
309
- llm.model.layers.21.mlp.up_proj
310
- llm.model.layers.21.mlp.down_proj
311
- llm.model.layers.21.mlp.act_fn
312
- llm.model.layers.21.input_layernorm
313
- llm.model.layers.21.post_attention_layernorm
314
- llm.model.layers.22
315
- llm.model.layers.22.self_attn
316
- llm.model.layers.22.self_attn.q_proj
317
- llm.model.layers.22.self_attn.k_proj
318
- llm.model.layers.22.self_attn.v_proj
319
- llm.model.layers.22.self_attn.o_proj
320
- llm.model.layers.22.self_attn.rotary_emb
321
- llm.model.layers.22.mlp
322
- llm.model.layers.22.mlp.gate_proj
323
- llm.model.layers.22.mlp.up_proj
324
- llm.model.layers.22.mlp.down_proj
325
- llm.model.layers.22.mlp.act_fn
326
- llm.model.layers.22.input_layernorm
327
- llm.model.layers.22.post_attention_layernorm
328
- llm.model.layers.23
329
- llm.model.layers.23.self_attn
330
- llm.model.layers.23.self_attn.q_proj
331
- llm.model.layers.23.self_attn.k_proj
332
- llm.model.layers.23.self_attn.v_proj
333
- llm.model.layers.23.self_attn.o_proj
334
- llm.model.layers.23.self_attn.rotary_emb
335
- llm.model.layers.23.mlp
336
- llm.model.layers.23.mlp.gate_proj
337
- llm.model.layers.23.mlp.up_proj
338
- llm.model.layers.23.mlp.down_proj
339
- llm.model.layers.23.mlp.act_fn
340
- llm.model.layers.23.input_layernorm
341
- llm.model.layers.23.post_attention_layernorm
342
- llm.model.layers.24
343
- llm.model.layers.24.self_attn
344
- llm.model.layers.24.self_attn.q_proj
345
- llm.model.layers.24.self_attn.k_proj
346
- llm.model.layers.24.self_attn.v_proj
347
- llm.model.layers.24.self_attn.o_proj
348
- llm.model.layers.24.self_attn.rotary_emb
349
- llm.model.layers.24.mlp
350
- llm.model.layers.24.mlp.gate_proj
351
- llm.model.layers.24.mlp.up_proj
352
- llm.model.layers.24.mlp.down_proj
353
- llm.model.layers.24.mlp.act_fn
354
- llm.model.layers.24.input_layernorm
355
- llm.model.layers.24.post_attention_layernorm
356
- llm.model.layers.25
357
- llm.model.layers.25.self_attn
358
- llm.model.layers.25.self_attn.q_proj
359
- llm.model.layers.25.self_attn.k_proj
360
- llm.model.layers.25.self_attn.v_proj
361
- llm.model.layers.25.self_attn.o_proj
362
- llm.model.layers.25.self_attn.rotary_emb
363
- llm.model.layers.25.mlp
364
- llm.model.layers.25.mlp.gate_proj
365
- llm.model.layers.25.mlp.up_proj
366
- llm.model.layers.25.mlp.down_proj
367
- llm.model.layers.25.mlp.act_fn
368
- llm.model.layers.25.input_layernorm
369
- llm.model.layers.25.post_attention_layernorm
370
- llm.model.layers.26
371
- llm.model.layers.26.self_attn
372
- llm.model.layers.26.self_attn.q_proj
373
- llm.model.layers.26.self_attn.k_proj
374
- llm.model.layers.26.self_attn.v_proj
375
- llm.model.layers.26.self_attn.o_proj
376
- llm.model.layers.26.self_attn.rotary_emb
377
- llm.model.layers.26.mlp
378
- llm.model.layers.26.mlp.gate_proj
379
- llm.model.layers.26.mlp.up_proj
380
- llm.model.layers.26.mlp.down_proj
381
- llm.model.layers.26.mlp.act_fn
382
- llm.model.layers.26.input_layernorm
383
- llm.model.layers.26.post_attention_layernorm
384
- llm.model.layers.27
385
- llm.model.layers.27.self_attn
386
- llm.model.layers.27.self_attn.q_proj
387
- llm.model.layers.27.self_attn.k_proj
388
- llm.model.layers.27.self_attn.v_proj
389
- llm.model.layers.27.self_attn.o_proj
390
- llm.model.layers.27.self_attn.rotary_emb
391
- llm.model.layers.27.mlp
392
- llm.model.layers.27.mlp.gate_proj
393
- llm.model.layers.27.mlp.up_proj
394
- llm.model.layers.27.mlp.down_proj
395
- llm.model.layers.27.mlp.act_fn
396
- llm.model.layers.27.input_layernorm
397
- llm.model.layers.27.post_attention_layernorm
398
- llm.model.layers.28
399
- llm.model.layers.28.self_attn
400
- llm.model.layers.28.self_attn.q_proj
401
- llm.model.layers.28.self_attn.k_proj
402
- llm.model.layers.28.self_attn.v_proj
403
- llm.model.layers.28.self_attn.o_proj
404
- llm.model.layers.28.self_attn.rotary_emb
405
- llm.model.layers.28.mlp
406
- llm.model.layers.28.mlp.gate_proj
407
- llm.model.layers.28.mlp.up_proj
408
- llm.model.layers.28.mlp.down_proj
409
- llm.model.layers.28.mlp.act_fn
410
- llm.model.layers.28.input_layernorm
411
- llm.model.layers.28.post_attention_layernorm
412
- llm.model.layers.29
413
- llm.model.layers.29.self_attn
414
- llm.model.layers.29.self_attn.q_proj
415
- llm.model.layers.29.self_attn.k_proj
416
- llm.model.layers.29.self_attn.v_proj
417
- llm.model.layers.29.self_attn.o_proj
418
- llm.model.layers.29.self_attn.rotary_emb
419
- llm.model.layers.29.mlp
420
- llm.model.layers.29.mlp.gate_proj
421
- llm.model.layers.29.mlp.up_proj
422
- llm.model.layers.29.mlp.down_proj
423
- llm.model.layers.29.mlp.act_fn
424
- llm.model.layers.29.input_layernorm
425
- llm.model.layers.29.post_attention_layernorm
426
- llm.model.layers.30
427
- llm.model.layers.30.self_attn
428
- llm.model.layers.30.self_attn.q_proj
429
- llm.model.layers.30.self_attn.k_proj
430
- llm.model.layers.30.self_attn.v_proj
431
- llm.model.layers.30.self_attn.o_proj
432
- llm.model.layers.30.self_attn.rotary_emb
433
- llm.model.layers.30.mlp
434
- llm.model.layers.30.mlp.gate_proj
435
- llm.model.layers.30.mlp.up_proj
436
- llm.model.layers.30.mlp.down_proj
437
- llm.model.layers.30.mlp.act_fn
438
- llm.model.layers.30.input_layernorm
439
- llm.model.layers.30.post_attention_layernorm
440
- llm.model.layers.31
441
- llm.model.layers.31.self_attn
442
- llm.model.layers.31.self_attn.q_proj
443
- llm.model.layers.31.self_attn.k_proj
444
- llm.model.layers.31.self_attn.v_proj
445
- llm.model.layers.31.self_attn.o_proj
446
- llm.model.layers.31.self_attn.rotary_emb
447
- llm.model.layers.31.mlp
448
- llm.model.layers.31.mlp.gate_proj
449
- llm.model.layers.31.mlp.up_proj
450
- llm.model.layers.31.mlp.down_proj
451
- llm.model.layers.31.mlp.act_fn
452
- llm.model.layers.31.input_layernorm
453
- llm.model.layers.31.post_attention_layernorm
454
- llm.model.layers.32
455
- llm.model.layers.32.self_attn
456
- llm.model.layers.32.self_attn.q_proj
457
- llm.model.layers.32.self_attn.k_proj
458
- llm.model.layers.32.self_attn.v_proj
459
- llm.model.layers.32.self_attn.o_proj
460
- llm.model.layers.32.self_attn.rotary_emb
461
- llm.model.layers.32.mlp
462
- llm.model.layers.32.mlp.gate_proj
463
- llm.model.layers.32.mlp.up_proj
464
- llm.model.layers.32.mlp.down_proj
465
- llm.model.layers.32.mlp.act_fn
466
- llm.model.layers.32.input_layernorm
467
- llm.model.layers.32.post_attention_layernorm
468
- llm.model.layers.33
469
- llm.model.layers.33.self_attn
470
- llm.model.layers.33.self_attn.q_proj
471
- llm.model.layers.33.self_attn.k_proj
472
- llm.model.layers.33.self_attn.v_proj
473
- llm.model.layers.33.self_attn.o_proj
474
- llm.model.layers.33.self_attn.rotary_emb
475
- llm.model.layers.33.mlp
476
- llm.model.layers.33.mlp.gate_proj
477
- llm.model.layers.33.mlp.up_proj
478
- llm.model.layers.33.mlp.down_proj
479
- llm.model.layers.33.mlp.act_fn
480
- llm.model.layers.33.input_layernorm
481
- llm.model.layers.33.post_attention_layernorm
482
- llm.model.layers.34
483
- llm.model.layers.34.self_attn
484
- llm.model.layers.34.self_attn.q_proj
485
- llm.model.layers.34.self_attn.k_proj
486
- llm.model.layers.34.self_attn.v_proj
487
- llm.model.layers.34.self_attn.o_proj
488
- llm.model.layers.34.self_attn.rotary_emb
489
- llm.model.layers.34.mlp
490
- llm.model.layers.34.mlp.gate_proj
491
- llm.model.layers.34.mlp.up_proj
492
- llm.model.layers.34.mlp.down_proj
493
- llm.model.layers.34.mlp.act_fn
494
- llm.model.layers.34.input_layernorm
495
- llm.model.layers.34.post_attention_layernorm
496
- llm.model.layers.35
497
- llm.model.layers.35.self_attn
498
- llm.model.layers.35.self_attn.q_proj
499
- llm.model.layers.35.self_attn.k_proj
500
- llm.model.layers.35.self_attn.v_proj
501
- llm.model.layers.35.self_attn.o_proj
502
- llm.model.layers.35.self_attn.rotary_emb
503
- llm.model.layers.35.mlp
504
- llm.model.layers.35.mlp.gate_proj
505
- llm.model.layers.35.mlp.up_proj
506
- llm.model.layers.35.mlp.down_proj
507
- llm.model.layers.35.mlp.act_fn
508
- llm.model.layers.35.input_layernorm
509
- llm.model.layers.35.post_attention_layernorm
510
- llm.model.layers.36
511
- llm.model.layers.36.self_attn
512
- llm.model.layers.36.self_attn.q_proj
513
- llm.model.layers.36.self_attn.k_proj
514
- llm.model.layers.36.self_attn.v_proj
515
- llm.model.layers.36.self_attn.o_proj
516
- llm.model.layers.36.self_attn.rotary_emb
517
- llm.model.layers.36.mlp
518
- llm.model.layers.36.mlp.gate_proj
519
- llm.model.layers.36.mlp.up_proj
520
- llm.model.layers.36.mlp.down_proj
521
- llm.model.layers.36.mlp.act_fn
522
- llm.model.layers.36.input_layernorm
523
- llm.model.layers.36.post_attention_layernorm
524
- llm.model.layers.37
525
- llm.model.layers.37.self_attn
526
- llm.model.layers.37.self_attn.q_proj
527
- llm.model.layers.37.self_attn.k_proj
528
- llm.model.layers.37.self_attn.v_proj
529
- llm.model.layers.37.self_attn.o_proj
530
- llm.model.layers.37.self_attn.rotary_emb
531
- llm.model.layers.37.mlp
532
- llm.model.layers.37.mlp.gate_proj
533
- llm.model.layers.37.mlp.up_proj
534
- llm.model.layers.37.mlp.down_proj
535
- llm.model.layers.37.mlp.act_fn
536
- llm.model.layers.37.input_layernorm
537
- llm.model.layers.37.post_attention_layernorm
538
- llm.model.layers.38
539
- llm.model.layers.38.self_attn
540
- llm.model.layers.38.self_attn.q_proj
541
- llm.model.layers.38.self_attn.k_proj
542
- llm.model.layers.38.self_attn.v_proj
543
- llm.model.layers.38.self_attn.o_proj
544
- llm.model.layers.38.self_attn.rotary_emb
545
- llm.model.layers.38.mlp
546
- llm.model.layers.38.mlp.gate_proj
547
- llm.model.layers.38.mlp.up_proj
548
- llm.model.layers.38.mlp.down_proj
549
- llm.model.layers.38.mlp.act_fn
550
- llm.model.layers.38.input_layernorm
551
- llm.model.layers.38.post_attention_layernorm
552
- llm.model.layers.39
553
- llm.model.layers.39.self_attn
554
- llm.model.layers.39.self_attn.q_proj
555
- llm.model.layers.39.self_attn.k_proj
556
- llm.model.layers.39.self_attn.v_proj
557
- llm.model.layers.39.self_attn.o_proj
558
- llm.model.layers.39.self_attn.rotary_emb
559
- llm.model.layers.39.mlp
560
- llm.model.layers.39.mlp.gate_proj
561
- llm.model.layers.39.mlp.up_proj
562
- llm.model.layers.39.mlp.down_proj
563
- llm.model.layers.39.mlp.act_fn
564
- llm.model.layers.39.input_layernorm
565
- llm.model.layers.39.post_attention_layernorm
566
- llm.model.norm
567
- llm.lm_head
568
- vpm
569
- vpm.patch_embed
570
- vpm.patch_embed.proj
571
- vpm.patch_embed.norm
572
- vpm.pos_drop
573
- vpm.patch_drop
574
- vpm.norm_pre
575
- vpm.blocks
576
- vpm.blocks.0
577
- vpm.blocks.0.norm1
578
- vpm.blocks.0.attn
579
- vpm.blocks.0.attn.qkv
580
- vpm.blocks.0.attn.q_norm
581
- vpm.blocks.0.attn.k_norm
582
- vpm.blocks.0.attn.attn_drop
583
- vpm.blocks.0.attn.proj
584
- vpm.blocks.0.attn.proj_drop
585
- vpm.blocks.0.ls1
586
- vpm.blocks.0.drop_path1
587
- vpm.blocks.0.norm2
588
- vpm.blocks.0.mlp
589
- vpm.blocks.0.mlp.fc1
590
- vpm.blocks.0.mlp.act
591
- vpm.blocks.0.mlp.drop1
592
- vpm.blocks.0.mlp.norm
593
- vpm.blocks.0.mlp.fc2
594
- vpm.blocks.0.mlp.drop2
595
- vpm.blocks.0.ls2
596
- vpm.blocks.0.drop_path2
597
- vpm.blocks.1
598
- vpm.blocks.1.norm1
599
- vpm.blocks.1.attn
600
- vpm.blocks.1.attn.qkv
601
- vpm.blocks.1.attn.q_norm
602
- vpm.blocks.1.attn.k_norm
603
- vpm.blocks.1.attn.attn_drop
604
- vpm.blocks.1.attn.proj
605
- vpm.blocks.1.attn.proj_drop
606
- vpm.blocks.1.ls1
607
- vpm.blocks.1.drop_path1
608
- vpm.blocks.1.norm2
609
- vpm.blocks.1.mlp
610
- vpm.blocks.1.mlp.fc1
611
- vpm.blocks.1.mlp.act
612
- vpm.blocks.1.mlp.drop1
613
- vpm.blocks.1.mlp.norm
614
- vpm.blocks.1.mlp.fc2
615
- vpm.blocks.1.mlp.drop2
616
- vpm.blocks.1.ls2
617
- vpm.blocks.1.drop_path2
618
- vpm.blocks.2
619
- vpm.blocks.2.norm1
620
- vpm.blocks.2.attn
621
- vpm.blocks.2.attn.qkv
622
- vpm.blocks.2.attn.q_norm
623
- vpm.blocks.2.attn.k_norm
624
- vpm.blocks.2.attn.attn_drop
625
- vpm.blocks.2.attn.proj
626
- vpm.blocks.2.attn.proj_drop
627
- vpm.blocks.2.ls1
628
- vpm.blocks.2.drop_path1
629
- vpm.blocks.2.norm2
630
- vpm.blocks.2.mlp
631
- vpm.blocks.2.mlp.fc1
632
- vpm.blocks.2.mlp.act
633
- vpm.blocks.2.mlp.drop1
634
- vpm.blocks.2.mlp.norm
635
- vpm.blocks.2.mlp.fc2
636
- vpm.blocks.2.mlp.drop2
637
- vpm.blocks.2.ls2
638
- vpm.blocks.2.drop_path2
639
- vpm.blocks.3
640
- vpm.blocks.3.norm1
641
- vpm.blocks.3.attn
642
- vpm.blocks.3.attn.qkv
643
- vpm.blocks.3.attn.q_norm
644
- vpm.blocks.3.attn.k_norm
645
- vpm.blocks.3.attn.attn_drop
646
- vpm.blocks.3.attn.proj
647
- vpm.blocks.3.attn.proj_drop
648
- vpm.blocks.3.ls1
649
- vpm.blocks.3.drop_path1
650
- vpm.blocks.3.norm2
651
- vpm.blocks.3.mlp
652
- vpm.blocks.3.mlp.fc1
653
- vpm.blocks.3.mlp.act
654
- vpm.blocks.3.mlp.drop1
655
- vpm.blocks.3.mlp.norm
656
- vpm.blocks.3.mlp.fc2
657
- vpm.blocks.3.mlp.drop2
658
- vpm.blocks.3.ls2
659
- vpm.blocks.3.drop_path2
660
- vpm.blocks.4
661
- vpm.blocks.4.norm1
662
- vpm.blocks.4.attn
663
- vpm.blocks.4.attn.qkv
664
- vpm.blocks.4.attn.q_norm
665
- vpm.blocks.4.attn.k_norm
666
- vpm.blocks.4.attn.attn_drop
667
- vpm.blocks.4.attn.proj
668
- vpm.blocks.4.attn.proj_drop
669
- vpm.blocks.4.ls1
670
- vpm.blocks.4.drop_path1
671
- vpm.blocks.4.norm2
672
- vpm.blocks.4.mlp
673
- vpm.blocks.4.mlp.fc1
674
- vpm.blocks.4.mlp.act
675
- vpm.blocks.4.mlp.drop1
676
- vpm.blocks.4.mlp.norm
677
- vpm.blocks.4.mlp.fc2
678
- vpm.blocks.4.mlp.drop2
679
- vpm.blocks.4.ls2
680
- vpm.blocks.4.drop_path2
681
- vpm.blocks.5
682
- vpm.blocks.5.norm1
683
- vpm.blocks.5.attn
684
- vpm.blocks.5.attn.qkv
685
- vpm.blocks.5.attn.q_norm
686
- vpm.blocks.5.attn.k_norm
687
- vpm.blocks.5.attn.attn_drop
688
- vpm.blocks.5.attn.proj
689
- vpm.blocks.5.attn.proj_drop
690
- vpm.blocks.5.ls1
691
- vpm.blocks.5.drop_path1
692
- vpm.blocks.5.norm2
693
- vpm.blocks.5.mlp
694
- vpm.blocks.5.mlp.fc1
695
- vpm.blocks.5.mlp.act
696
- vpm.blocks.5.mlp.drop1
697
- vpm.blocks.5.mlp.norm
698
- vpm.blocks.5.mlp.fc2
699
- vpm.blocks.5.mlp.drop2
700
- vpm.blocks.5.ls2
701
- vpm.blocks.5.drop_path2
702
- vpm.blocks.6
703
- vpm.blocks.6.norm1
704
- vpm.blocks.6.attn
705
- vpm.blocks.6.attn.qkv
706
- vpm.blocks.6.attn.q_norm
707
- vpm.blocks.6.attn.k_norm
708
- vpm.blocks.6.attn.attn_drop
709
- vpm.blocks.6.attn.proj
710
- vpm.blocks.6.attn.proj_drop
711
- vpm.blocks.6.ls1
712
- vpm.blocks.6.drop_path1
713
- vpm.blocks.6.norm2
714
- vpm.blocks.6.mlp
715
- vpm.blocks.6.mlp.fc1
716
- vpm.blocks.6.mlp.act
717
- vpm.blocks.6.mlp.drop1
718
- vpm.blocks.6.mlp.norm
719
- vpm.blocks.6.mlp.fc2
720
- vpm.blocks.6.mlp.drop2
721
- vpm.blocks.6.ls2
722
- vpm.blocks.6.drop_path2
723
- vpm.blocks.7
724
- vpm.blocks.7.norm1
725
- vpm.blocks.7.attn
726
- vpm.blocks.7.attn.qkv
727
- vpm.blocks.7.attn.q_norm
728
- vpm.blocks.7.attn.k_norm
729
- vpm.blocks.7.attn.attn_drop
730
- vpm.blocks.7.attn.proj
731
- vpm.blocks.7.attn.proj_drop
732
- vpm.blocks.7.ls1
733
- vpm.blocks.7.drop_path1
734
- vpm.blocks.7.norm2
735
- vpm.blocks.7.mlp
736
- vpm.blocks.7.mlp.fc1
737
- vpm.blocks.7.mlp.act
738
- vpm.blocks.7.mlp.drop1
739
- vpm.blocks.7.mlp.norm
740
- vpm.blocks.7.mlp.fc2
741
- vpm.blocks.7.mlp.drop2
742
- vpm.blocks.7.ls2
743
- vpm.blocks.7.drop_path2
744
- vpm.blocks.8
745
- vpm.blocks.8.norm1
746
- vpm.blocks.8.attn
747
- vpm.blocks.8.attn.qkv
748
- vpm.blocks.8.attn.q_norm
749
- vpm.blocks.8.attn.k_norm
750
- vpm.blocks.8.attn.attn_drop
751
- vpm.blocks.8.attn.proj
752
- vpm.blocks.8.attn.proj_drop
753
- vpm.blocks.8.ls1
754
- vpm.blocks.8.drop_path1
755
- vpm.blocks.8.norm2
756
- vpm.blocks.8.mlp
757
- vpm.blocks.8.mlp.fc1
758
- vpm.blocks.8.mlp.act
759
- vpm.blocks.8.mlp.drop1
760
- vpm.blocks.8.mlp.norm
761
- vpm.blocks.8.mlp.fc2
762
- vpm.blocks.8.mlp.drop2
763
- vpm.blocks.8.ls2
764
- vpm.blocks.8.drop_path2
765
- vpm.blocks.9
766
- vpm.blocks.9.norm1
767
- vpm.blocks.9.attn
768
- vpm.blocks.9.attn.qkv
769
- vpm.blocks.9.attn.q_norm
770
- vpm.blocks.9.attn.k_norm
771
- vpm.blocks.9.attn.attn_drop
772
- vpm.blocks.9.attn.proj
773
- vpm.blocks.9.attn.proj_drop
774
- vpm.blocks.9.ls1
775
- vpm.blocks.9.drop_path1
776
- vpm.blocks.9.norm2
777
- vpm.blocks.9.mlp
778
- vpm.blocks.9.mlp.fc1
779
- vpm.blocks.9.mlp.act
780
- vpm.blocks.9.mlp.drop1
781
- vpm.blocks.9.mlp.norm
782
- vpm.blocks.9.mlp.fc2
783
- vpm.blocks.9.mlp.drop2
784
- vpm.blocks.9.ls2
785
- vpm.blocks.9.drop_path2
786
- vpm.blocks.10
787
- vpm.blocks.10.norm1
788
- vpm.blocks.10.attn
789
- vpm.blocks.10.attn.qkv
790
- vpm.blocks.10.attn.q_norm
791
- vpm.blocks.10.attn.k_norm
792
- vpm.blocks.10.attn.attn_drop
793
- vpm.blocks.10.attn.proj
794
- vpm.blocks.10.attn.proj_drop
795
- vpm.blocks.10.ls1
796
- vpm.blocks.10.drop_path1
797
- vpm.blocks.10.norm2
798
- vpm.blocks.10.mlp
799
- vpm.blocks.10.mlp.fc1
800
- vpm.blocks.10.mlp.act
801
- vpm.blocks.10.mlp.drop1
802
- vpm.blocks.10.mlp.norm
803
- vpm.blocks.10.mlp.fc2
804
- vpm.blocks.10.mlp.drop2
805
- vpm.blocks.10.ls2
806
- vpm.blocks.10.drop_path2
807
- vpm.blocks.11
808
- vpm.blocks.11.norm1
809
- vpm.blocks.11.attn
810
- vpm.blocks.11.attn.qkv
811
- vpm.blocks.11.attn.q_norm
812
- vpm.blocks.11.attn.k_norm
813
- vpm.blocks.11.attn.attn_drop
814
- vpm.blocks.11.attn.proj
815
- vpm.blocks.11.attn.proj_drop
816
- vpm.blocks.11.ls1
817
- vpm.blocks.11.drop_path1
818
- vpm.blocks.11.norm2
819
- vpm.blocks.11.mlp
820
- vpm.blocks.11.mlp.fc1
821
- vpm.blocks.11.mlp.act
822
- vpm.blocks.11.mlp.drop1
823
- vpm.blocks.11.mlp.norm
824
- vpm.blocks.11.mlp.fc2
825
- vpm.blocks.11.mlp.drop2
826
- vpm.blocks.11.ls2
827
- vpm.blocks.11.drop_path2
828
- vpm.blocks.12
829
- vpm.blocks.12.norm1
830
- vpm.blocks.12.attn
831
- vpm.blocks.12.attn.qkv
832
- vpm.blocks.12.attn.q_norm
833
- vpm.blocks.12.attn.k_norm
834
- vpm.blocks.12.attn.attn_drop
835
- vpm.blocks.12.attn.proj
836
- vpm.blocks.12.attn.proj_drop
837
- vpm.blocks.12.ls1
838
- vpm.blocks.12.drop_path1
839
- vpm.blocks.12.norm2
840
- vpm.blocks.12.mlp
841
- vpm.blocks.12.mlp.fc1
842
- vpm.blocks.12.mlp.act
843
- vpm.blocks.12.mlp.drop1
844
- vpm.blocks.12.mlp.norm
845
- vpm.blocks.12.mlp.fc2
846
- vpm.blocks.12.mlp.drop2
847
- vpm.blocks.12.ls2
848
- vpm.blocks.12.drop_path2
849
- vpm.blocks.13
850
- vpm.blocks.13.norm1
851
- vpm.blocks.13.attn
852
- vpm.blocks.13.attn.qkv
853
- vpm.blocks.13.attn.q_norm
854
- vpm.blocks.13.attn.k_norm
855
- vpm.blocks.13.attn.attn_drop
856
- vpm.blocks.13.attn.proj
857
- vpm.blocks.13.attn.proj_drop
858
- vpm.blocks.13.ls1
859
- vpm.blocks.13.drop_path1
860
- vpm.blocks.13.norm2
861
- vpm.blocks.13.mlp
862
- vpm.blocks.13.mlp.fc1
863
- vpm.blocks.13.mlp.act
864
- vpm.blocks.13.mlp.drop1
865
- vpm.blocks.13.mlp.norm
866
- vpm.blocks.13.mlp.fc2
867
- vpm.blocks.13.mlp.drop2
868
- vpm.blocks.13.ls2
869
- vpm.blocks.13.drop_path2
870
- vpm.blocks.14
871
- vpm.blocks.14.norm1
872
- vpm.blocks.14.attn
873
- vpm.blocks.14.attn.qkv
874
- vpm.blocks.14.attn.q_norm
875
- vpm.blocks.14.attn.k_norm
876
- vpm.blocks.14.attn.attn_drop
877
- vpm.blocks.14.attn.proj
878
- vpm.blocks.14.attn.proj_drop
879
- vpm.blocks.14.ls1
880
- vpm.blocks.14.drop_path1
881
- vpm.blocks.14.norm2
882
- vpm.blocks.14.mlp
883
- vpm.blocks.14.mlp.fc1
884
- vpm.blocks.14.mlp.act
885
- vpm.blocks.14.mlp.drop1
886
- vpm.blocks.14.mlp.norm
887
- vpm.blocks.14.mlp.fc2
888
- vpm.blocks.14.mlp.drop2
889
- vpm.blocks.14.ls2
890
- vpm.blocks.14.drop_path2
891
- vpm.blocks.15
892
- vpm.blocks.15.norm1
893
- vpm.blocks.15.attn
894
- vpm.blocks.15.attn.qkv
895
- vpm.blocks.15.attn.q_norm
896
- vpm.blocks.15.attn.k_norm
897
- vpm.blocks.15.attn.attn_drop
898
- vpm.blocks.15.attn.proj
899
- vpm.blocks.15.attn.proj_drop
900
- vpm.blocks.15.ls1
901
- vpm.blocks.15.drop_path1
902
- vpm.blocks.15.norm2
903
- vpm.blocks.15.mlp
904
- vpm.blocks.15.mlp.fc1
905
- vpm.blocks.15.mlp.act
906
- vpm.blocks.15.mlp.drop1
907
- vpm.blocks.15.mlp.norm
908
- vpm.blocks.15.mlp.fc2
909
- vpm.blocks.15.mlp.drop2
910
- vpm.blocks.15.ls2
911
- vpm.blocks.15.drop_path2
912
- vpm.blocks.16
913
- vpm.blocks.16.norm1
914
- vpm.blocks.16.attn
915
- vpm.blocks.16.attn.qkv
916
- vpm.blocks.16.attn.q_norm
917
- vpm.blocks.16.attn.k_norm
918
- vpm.blocks.16.attn.attn_drop
919
- vpm.blocks.16.attn.proj
920
- vpm.blocks.16.attn.proj_drop
921
- vpm.blocks.16.ls1
922
- vpm.blocks.16.drop_path1
923
- vpm.blocks.16.norm2
924
- vpm.blocks.16.mlp
925
- vpm.blocks.16.mlp.fc1
926
- vpm.blocks.16.mlp.act
927
- vpm.blocks.16.mlp.drop1
928
- vpm.blocks.16.mlp.norm
929
- vpm.blocks.16.mlp.fc2
930
- vpm.blocks.16.mlp.drop2
931
- vpm.blocks.16.ls2
932
- vpm.blocks.16.drop_path2
933
- vpm.blocks.17
934
- vpm.blocks.17.norm1
935
- vpm.blocks.17.attn
936
- vpm.blocks.17.attn.qkv
937
- vpm.blocks.17.attn.q_norm
938
- vpm.blocks.17.attn.k_norm
939
- vpm.blocks.17.attn.attn_drop
940
- vpm.blocks.17.attn.proj
941
- vpm.blocks.17.attn.proj_drop
942
- vpm.blocks.17.ls1
943
- vpm.blocks.17.drop_path1
944
- vpm.blocks.17.norm2
945
- vpm.blocks.17.mlp
946
- vpm.blocks.17.mlp.fc1
947
- vpm.blocks.17.mlp.act
948
- vpm.blocks.17.mlp.drop1
949
- vpm.blocks.17.mlp.norm
950
- vpm.blocks.17.mlp.fc2
951
- vpm.blocks.17.mlp.drop2
952
- vpm.blocks.17.ls2
953
- vpm.blocks.17.drop_path2
954
- vpm.blocks.18
955
- vpm.blocks.18.norm1
956
- vpm.blocks.18.attn
957
- vpm.blocks.18.attn.qkv
958
- vpm.blocks.18.attn.q_norm
959
- vpm.blocks.18.attn.k_norm
960
- vpm.blocks.18.attn.attn_drop
961
- vpm.blocks.18.attn.proj
962
- vpm.blocks.18.attn.proj_drop
963
- vpm.blocks.18.ls1
964
- vpm.blocks.18.drop_path1
965
- vpm.blocks.18.norm2
966
- vpm.blocks.18.mlp
967
- vpm.blocks.18.mlp.fc1
968
- vpm.blocks.18.mlp.act
969
- vpm.blocks.18.mlp.drop1
970
- vpm.blocks.18.mlp.norm
971
- vpm.blocks.18.mlp.fc2
972
- vpm.blocks.18.mlp.drop2
973
- vpm.blocks.18.ls2
974
- vpm.blocks.18.drop_path2
975
- vpm.blocks.19
976
- vpm.blocks.19.norm1
977
- vpm.blocks.19.attn
978
- vpm.blocks.19.attn.qkv
979
- vpm.blocks.19.attn.q_norm
980
- vpm.blocks.19.attn.k_norm
981
- vpm.blocks.19.attn.attn_drop
982
- vpm.blocks.19.attn.proj
983
- vpm.blocks.19.attn.proj_drop
984
- vpm.blocks.19.ls1
985
- vpm.blocks.19.drop_path1
986
- vpm.blocks.19.norm2
987
- vpm.blocks.19.mlp
988
- vpm.blocks.19.mlp.fc1
989
- vpm.blocks.19.mlp.act
990
- vpm.blocks.19.mlp.drop1
991
- vpm.blocks.19.mlp.norm
992
- vpm.blocks.19.mlp.fc2
993
- vpm.blocks.19.mlp.drop2
994
- vpm.blocks.19.ls2
995
- vpm.blocks.19.drop_path2
996
- vpm.blocks.20
997
- vpm.blocks.20.norm1
998
- vpm.blocks.20.attn
999
- vpm.blocks.20.attn.qkv
1000
- vpm.blocks.20.attn.q_norm
1001
- vpm.blocks.20.attn.k_norm
1002
- vpm.blocks.20.attn.attn_drop
1003
- vpm.blocks.20.attn.proj
1004
- vpm.blocks.20.attn.proj_drop
1005
- vpm.blocks.20.ls1
1006
- vpm.blocks.20.drop_path1
1007
- vpm.blocks.20.norm2
1008
- vpm.blocks.20.mlp
1009
- vpm.blocks.20.mlp.fc1
1010
- vpm.blocks.20.mlp.act
1011
- vpm.blocks.20.mlp.drop1
1012
- vpm.blocks.20.mlp.norm
1013
- vpm.blocks.20.mlp.fc2
1014
- vpm.blocks.20.mlp.drop2
1015
- vpm.blocks.20.ls2
1016
- vpm.blocks.20.drop_path2
1017
- vpm.blocks.21
1018
- vpm.blocks.21.norm1
1019
- vpm.blocks.21.attn
1020
- vpm.blocks.21.attn.qkv
1021
- vpm.blocks.21.attn.q_norm
1022
- vpm.blocks.21.attn.k_norm
1023
- vpm.blocks.21.attn.attn_drop
1024
- vpm.blocks.21.attn.proj
1025
- vpm.blocks.21.attn.proj_drop
1026
- vpm.blocks.21.ls1
1027
- vpm.blocks.21.drop_path1
1028
- vpm.blocks.21.norm2
1029
- vpm.blocks.21.mlp
1030
- vpm.blocks.21.mlp.fc1
1031
- vpm.blocks.21.mlp.act
1032
- vpm.blocks.21.mlp.drop1
1033
- vpm.blocks.21.mlp.norm
1034
- vpm.blocks.21.mlp.fc2
1035
- vpm.blocks.21.mlp.drop2
1036
- vpm.blocks.21.ls2
1037
- vpm.blocks.21.drop_path2
1038
- vpm.blocks.22
1039
- vpm.blocks.22.norm1
1040
- vpm.blocks.22.attn
1041
- vpm.blocks.22.attn.qkv
1042
- vpm.blocks.22.attn.q_norm
1043
- vpm.blocks.22.attn.k_norm
1044
- vpm.blocks.22.attn.attn_drop
1045
- vpm.blocks.22.attn.proj
1046
- vpm.blocks.22.attn.proj_drop
1047
- vpm.blocks.22.ls1
1048
- vpm.blocks.22.drop_path1
1049
- vpm.blocks.22.norm2
1050
- vpm.blocks.22.mlp
1051
- vpm.blocks.22.mlp.fc1
1052
- vpm.blocks.22.mlp.act
1053
- vpm.blocks.22.mlp.drop1
1054
- vpm.blocks.22.mlp.norm
1055
- vpm.blocks.22.mlp.fc2
1056
- vpm.blocks.22.mlp.drop2
1057
- vpm.blocks.22.ls2
1058
- vpm.blocks.22.drop_path2
1059
- vpm.blocks.23
1060
- vpm.blocks.23.norm1
1061
- vpm.blocks.23.attn
1062
- vpm.blocks.23.attn.qkv
1063
- vpm.blocks.23.attn.q_norm
1064
- vpm.blocks.23.attn.k_norm
1065
- vpm.blocks.23.attn.attn_drop
1066
- vpm.blocks.23.attn.proj
1067
- vpm.blocks.23.attn.proj_drop
1068
- vpm.blocks.23.ls1
1069
- vpm.blocks.23.drop_path1
1070
- vpm.blocks.23.norm2
1071
- vpm.blocks.23.mlp
1072
- vpm.blocks.23.mlp.fc1
1073
- vpm.blocks.23.mlp.act
1074
- vpm.blocks.23.mlp.drop1
1075
- vpm.blocks.23.mlp.norm
1076
- vpm.blocks.23.mlp.fc2
1077
- vpm.blocks.23.mlp.drop2
1078
- vpm.blocks.23.ls2
1079
- vpm.blocks.23.drop_path2
1080
- vpm.blocks.24
1081
- vpm.blocks.24.norm1
1082
- vpm.blocks.24.attn
1083
- vpm.blocks.24.attn.qkv
1084
- vpm.blocks.24.attn.q_norm
1085
- vpm.blocks.24.attn.k_norm
1086
- vpm.blocks.24.attn.attn_drop
1087
- vpm.blocks.24.attn.proj
1088
- vpm.blocks.24.attn.proj_drop
1089
- vpm.blocks.24.ls1
1090
- vpm.blocks.24.drop_path1
1091
- vpm.blocks.24.norm2
1092
- vpm.blocks.24.mlp
1093
- vpm.blocks.24.mlp.fc1
1094
- vpm.blocks.24.mlp.act
1095
- vpm.blocks.24.mlp.drop1
1096
- vpm.blocks.24.mlp.norm
1097
- vpm.blocks.24.mlp.fc2
1098
- vpm.blocks.24.mlp.drop2
1099
- vpm.blocks.24.ls2
1100
- vpm.blocks.24.drop_path2
1101
- vpm.blocks.25
1102
- vpm.blocks.25.norm1
1103
- vpm.blocks.25.attn
1104
- vpm.blocks.25.attn.qkv
1105
- vpm.blocks.25.attn.q_norm
1106
- vpm.blocks.25.attn.k_norm
1107
- vpm.blocks.25.attn.attn_drop
1108
- vpm.blocks.25.attn.proj
1109
- vpm.blocks.25.attn.proj_drop
1110
- vpm.blocks.25.ls1
1111
- vpm.blocks.25.drop_path1
1112
- vpm.blocks.25.norm2
1113
- vpm.blocks.25.mlp
1114
- vpm.blocks.25.mlp.fc1
1115
- vpm.blocks.25.mlp.act
1116
- vpm.blocks.25.mlp.drop1
1117
- vpm.blocks.25.mlp.norm
1118
- vpm.blocks.25.mlp.fc2
1119
- vpm.blocks.25.mlp.drop2
1120
- vpm.blocks.25.ls2
1121
- vpm.blocks.25.drop_path2
1122
- vpm.norm
1123
- vpm.attn_pool
1124
- vpm.fc_norm
1125
- vpm.head_drop
1126
- vpm.head
1127
- resampler
1128
- resampler.kv_proj
1129
- resampler.attn
1130
- resampler.attn.out_proj
1131
- resampler.ln_q
1132
- resampler.ln_kv
1133
- resampler.ln_post