Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import re
|
|
| 7 |
import config
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
from typing import Dict
|
|
|
|
|
|
|
| 10 |
from leaderboard import (
|
| 11 |
get_current_leaderboard,
|
| 12 |
update_leaderboard,
|
|
@@ -419,6 +421,90 @@ def continue_conversation(prompt, left_chat, right_chat, left_model, right_model
|
|
| 419 |
tie_count
|
| 420 |
)
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
# Initialize Gradio Blocks
|
| 423 |
with gr.Blocks(css="""
|
| 424 |
#dice-button {
|
|
@@ -561,6 +647,32 @@ with gr.Blocks(css="""
|
|
| 561 |
outputs=[release_notes]
|
| 562 |
)
|
| 563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
# Define interactions
|
| 565 |
submit_btn.click(
|
| 566 |
battle_arena,
|
|
|
|
| 7 |
import config
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
from typing import Dict
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
from leaderboard import (
|
| 13 |
get_current_leaderboard,
|
| 14 |
update_leaderboard,
|
|
|
|
| 421 |
tie_count
|
| 422 |
)
|
| 423 |
|
| 424 |
+
def normalize_parameter_size(param_size: str) -> str:
|
| 425 |
+
"""Convert parameter size to billions (B) format."""
|
| 426 |
+
try:
|
| 427 |
+
# Remove any spaces and convert to uppercase for consistency
|
| 428 |
+
param_size = param_size.replace(" ", "").upper()
|
| 429 |
+
|
| 430 |
+
# Extract the number and unit
|
| 431 |
+
if 'M' in param_size:
|
| 432 |
+
# Convert millions to billions
|
| 433 |
+
number = float(param_size.replace('M', '').replace(',', ''))
|
| 434 |
+
return f"{number/1000:.2f}B"
|
| 435 |
+
elif 'B' in param_size:
|
| 436 |
+
# Already in billions, just format consistently
|
| 437 |
+
number = float(param_size.replace('B', '').replace(',', ''))
|
| 438 |
+
return f"{number:.2f}B"
|
| 439 |
+
else:
|
| 440 |
+
# If no unit or unrecognized format, try to convert the raw number
|
| 441 |
+
number = float(param_size.replace(',', ''))
|
| 442 |
+
if number >= 1000000000:
|
| 443 |
+
return f"{number/1000000000:.2f}B"
|
| 444 |
+
elif number >= 1000000:
|
| 445 |
+
return f"{number/1000000000:.2f}B"
|
| 446 |
+
else:
|
| 447 |
+
return f"{number/1000000000:.2f}B"
|
| 448 |
+
except:
|
| 449 |
+
return param_size # Return original if conversion fails
|
| 450 |
+
|
| 451 |
+
def load_latest_model_stats():
|
| 452 |
+
"""Load model stats from the model_stats.json file."""
|
| 453 |
+
try:
|
| 454 |
+
# Read directly from model_stats.json in root directory
|
| 455 |
+
with open('model_stats.json', 'r') as f:
|
| 456 |
+
stats = json.load(f)
|
| 457 |
+
|
| 458 |
+
# Convert stats to table format
|
| 459 |
+
table_data = []
|
| 460 |
+
headers = ["Model", "VRAM (GB)", "Size", "Parameters", "Quantization", "Tokens/sec", "Gen Tokens/sec", "Total Tokens", "Response Time (s)"]
|
| 461 |
+
|
| 462 |
+
for model in stats:
|
| 463 |
+
if not model.get("success", False): # Skip failed tests
|
| 464 |
+
continue
|
| 465 |
+
|
| 466 |
+
perf = model.get("performance", {})
|
| 467 |
+
info = model.get("model_info", {})
|
| 468 |
+
|
| 469 |
+
try:
|
| 470 |
+
# Format numeric values with 2 decimal places
|
| 471 |
+
model_size = float(info.get("size", 0)) # Get raw size
|
| 472 |
+
vram_gb = round(model_size/1024/1024/1024, 2) # Convert to GB
|
| 473 |
+
tokens_per_sec = round(float(perf.get("tokens_per_second", 0)), 2)
|
| 474 |
+
gen_tokens_per_sec = round(float(perf.get("generation_tokens_per_second", 0)), 2)
|
| 475 |
+
total_tokens = perf.get("total_tokens", 0)
|
| 476 |
+
response_time = round(float(perf.get("response_time", 0)), 2)
|
| 477 |
+
|
| 478 |
+
# Normalize parameter size to billions format
|
| 479 |
+
param_size = normalize_parameter_size(info.get("parameter_size", "Unknown"))
|
| 480 |
+
|
| 481 |
+
row = [
|
| 482 |
+
model.get("model_name", "Unknown"), # String
|
| 483 |
+
vram_gb, # Number (2 decimals)
|
| 484 |
+
model_size, # Number (bytes)
|
| 485 |
+
param_size, # String (normalized to B)
|
| 486 |
+
info.get("quantization_level", "Unknown"), # String
|
| 487 |
+
tokens_per_sec, # Number (2 decimals)
|
| 488 |
+
gen_tokens_per_sec, # Number (2 decimals)
|
| 489 |
+
total_tokens, # Number (integer)
|
| 490 |
+
response_time # Number (2 decimals)
|
| 491 |
+
]
|
| 492 |
+
table_data.append(row)
|
| 493 |
+
except Exception as row_error:
|
| 494 |
+
logger.warning(f"Skipping model {model.get('model_name', 'Unknown')}: {str(row_error)}")
|
| 495 |
+
continue
|
| 496 |
+
|
| 497 |
+
if not table_data:
|
| 498 |
+
return None, "No valid model stats found"
|
| 499 |
+
|
| 500 |
+
# Sort by tokens per second (numerically)
|
| 501 |
+
table_data.sort(key=lambda x: float(x[5]) if isinstance(x[5], (int, float)) else 0, reverse=True)
|
| 502 |
+
|
| 503 |
+
return headers, table_data
|
| 504 |
+
except Exception as e:
|
| 505 |
+
logger.error(f"Error in load_latest_model_stats: {str(e)}")
|
| 506 |
+
return None, f"Error loading model stats: {str(e)}"
|
| 507 |
+
|
| 508 |
# Initialize Gradio Blocks
|
| 509 |
with gr.Blocks(css="""
|
| 510 |
#dice-button {
|
|
|
|
| 647 |
outputs=[release_notes]
|
| 648 |
)
|
| 649 |
|
| 650 |
+
# Model Stats Tab
|
| 651 |
+
with gr.Tab("Model Stats"):
|
| 652 |
+
gr.Markdown("""
|
| 653 |
+
### Model Performance Statistics
|
| 654 |
+
|
| 655 |
+
This tab shows detailed performance metrics for each model, tested using a creative writing prompt.
|
| 656 |
+
The tests were performed on an **AMD Radeon RX 7600 XT 16GB GPU**.
|
| 657 |
+
|
| 658 |
+
For detailed information about the testing methodology, parameters, and hardware setup, please refer to the
|
| 659 |
+
[README_model_stats.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README_model_stats.md).
|
| 660 |
+
|
| 661 |
+
""")
|
| 662 |
+
|
| 663 |
+
headers, table_data = load_latest_model_stats()
|
| 664 |
+
if headers:
|
| 665 |
+
model_stats_table = gr.Dataframe(
|
| 666 |
+
headers=headers,
|
| 667 |
+
value=table_data,
|
| 668 |
+
row_count=len(table_data),
|
| 669 |
+
col_count=len(headers),
|
| 670 |
+
interactive=False,
|
| 671 |
+
label="Model Performance Statistics"
|
| 672 |
+
)
|
| 673 |
+
else:
|
| 674 |
+
gr.Markdown(f"⚠️ {table_data}") # Show error message if loading failed
|
| 675 |
+
|
| 676 |
# Define interactions
|
| 677 |
submit_btn.click(
|
| 678 |
battle_arena,
|