Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -444,63 +444,6 @@ def normalize_parameter_size(param_size: str) -> str:
|
|
| 444 |
except:
|
| 445 |
return param_size # Return original if conversion fails
|
| 446 |
|
| 447 |
-
def load_latest_model_stats():
|
| 448 |
-
"""Load model stats from the model_stats.json file."""
|
| 449 |
-
try:
|
| 450 |
-
# Read directly from model_stats.json in root directory
|
| 451 |
-
with open('model_stats.json', 'r') as f:
|
| 452 |
-
stats = json.load(f)
|
| 453 |
-
|
| 454 |
-
# Convert stats to table format
|
| 455 |
-
table_data = []
|
| 456 |
-
headers = ["Model", "VRAM (GB)", "Size", "Parameters", "Quantization", "Tokens/sec", "Gen Tokens/sec", "Total Tokens", "Response Time (s)"]
|
| 457 |
-
|
| 458 |
-
for model in stats:
|
| 459 |
-
if not model.get("success", False): # Skip failed tests
|
| 460 |
-
continue
|
| 461 |
-
|
| 462 |
-
perf = model.get("performance", {})
|
| 463 |
-
info = model.get("model_info", {})
|
| 464 |
-
|
| 465 |
-
try:
|
| 466 |
-
# Format numeric values with 2 decimal places
|
| 467 |
-
model_size = float(info.get("size", 0)) # Get raw size
|
| 468 |
-
vram_gb = round(model_size/1024/1024/1024, 2) # Convert to GB
|
| 469 |
-
tokens_per_sec = round(float(perf.get("tokens_per_second", 0)), 2)
|
| 470 |
-
gen_tokens_per_sec = round(float(perf.get("generation_tokens_per_second", 0)), 2)
|
| 471 |
-
total_tokens = perf.get("total_tokens", 0)
|
| 472 |
-
response_time = round(float(perf.get("response_time", 0)), 2)
|
| 473 |
-
|
| 474 |
-
# Normalize parameter size to billions format
|
| 475 |
-
param_size = normalize_parameter_size(info.get("parameter_size", "Unknown"))
|
| 476 |
-
|
| 477 |
-
row = [
|
| 478 |
-
model.get("model_name", "Unknown"), # String
|
| 479 |
-
vram_gb, # Number (2 decimals)
|
| 480 |
-
model_size, # Number (bytes)
|
| 481 |
-
param_size, # String (normalized to B)
|
| 482 |
-
info.get("quantization_level", "Unknown"), # String
|
| 483 |
-
tokens_per_sec, # Number (2 decimals)
|
| 484 |
-
gen_tokens_per_sec, # Number (2 decimals)
|
| 485 |
-
total_tokens, # Number (integer)
|
| 486 |
-
response_time # Number (2 decimals)
|
| 487 |
-
]
|
| 488 |
-
table_data.append(row)
|
| 489 |
-
except Exception as row_error:
|
| 490 |
-
logger.warning(f"Skipping model {model.get('model_name', 'Unknown')}: {str(row_error)}")
|
| 491 |
-
continue
|
| 492 |
-
|
| 493 |
-
if not table_data:
|
| 494 |
-
return None, "No valid model stats found"
|
| 495 |
-
|
| 496 |
-
# Sort by tokens per second (numerically)
|
| 497 |
-
table_data.sort(key=lambda x: float(x[5]) if isinstance(x[5], (int, float)) else 0, reverse=True)
|
| 498 |
-
|
| 499 |
-
return headers, table_data
|
| 500 |
-
except Exception as e:
|
| 501 |
-
logger.error(f"Error in load_latest_model_stats: {str(e)}")
|
| 502 |
-
return None, f"Error loading model stats: {str(e)}"
|
| 503 |
-
|
| 504 |
# Initialize Gradio Blocks
|
| 505 |
with gr.Blocks(css="""
|
| 506 |
#dice-button {
|
|
@@ -575,7 +518,7 @@ with gr.Blocks(css="""
|
|
| 575 |
with gr.Tab("Battle Arena"):
|
| 576 |
with gr.Row():
|
| 577 |
prompt_input = gr.Textbox(
|
| 578 |
-
label="Enter your prompt",
|
| 579 |
placeholder="Type your prompt here...",
|
| 580 |
scale=20
|
| 581 |
)
|
|
@@ -601,8 +544,8 @@ with gr.Blocks(css="""
|
|
| 601 |
right_vote_btn = gr.Button(f"Vote for {right_output.label}", interactive=False)
|
| 602 |
|
| 603 |
result = gr.Textbox(
|
| 604 |
-
label="Status",
|
| 605 |
-
interactive=False,
|
| 606 |
value="Generate responses to start the battle! 🚀",
|
| 607 |
visible=True # Always visible
|
| 608 |
)
|
|
@@ -642,38 +585,12 @@ with gr.Blocks(css="""
|
|
| 642 |
outputs=[release_notes]
|
| 643 |
)
|
| 644 |
|
| 645 |
-
# Model Stats Tab
|
| 646 |
-
with gr.Tab("Model Stats"):
|
| 647 |
-
gr.Markdown("""
|
| 648 |
-
### Model Performance Statistics
|
| 649 |
-
|
| 650 |
-
This tab shows detailed performance metrics for each model, tested using a creative writing prompt.
|
| 651 |
-
The tests were performed on an **AMD Radeon RX 7600 XT 16GB GPU**.
|
| 652 |
-
|
| 653 |
-
For detailed information about the testing methodology, parameters, and hardware setup, please refer to the
|
| 654 |
-
[README_model_stats.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README_model_stats.md).
|
| 655 |
-
|
| 656 |
-
""")
|
| 657 |
-
|
| 658 |
-
headers, table_data = load_latest_model_stats()
|
| 659 |
-
if headers:
|
| 660 |
-
model_stats_table = gr.Dataframe(
|
| 661 |
-
headers=headers,
|
| 662 |
-
value=table_data,
|
| 663 |
-
row_count=len(table_data),
|
| 664 |
-
col_count=len(headers),
|
| 665 |
-
interactive=True,
|
| 666 |
-
label="Model Performance Statistics"
|
| 667 |
-
)
|
| 668 |
-
else:
|
| 669 |
-
gr.Markdown(f"⚠️ {table_data}") # Show error message if loading failed
|
| 670 |
-
|
| 671 |
# Define interactions
|
| 672 |
submit_btn.click(
|
| 673 |
battle_arena,
|
| 674 |
inputs=prompt_input,
|
| 675 |
outputs=[
|
| 676 |
-
left_output, right_output, left_model, right_model,
|
| 677 |
left_output, right_output, left_vote_btn, right_vote_btn,
|
| 678 |
tie_btn, previous_prompt, tie_count, model_names_row, result
|
| 679 |
]
|
|
@@ -682,14 +599,14 @@ with gr.Blocks(css="""
|
|
| 682 |
left_vote_btn.click(
|
| 683 |
lambda *args: record_vote(*args, "Left is better"),
|
| 684 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 685 |
-
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 686 |
right_vote_btn, tie_btn, model_names_row]
|
| 687 |
)
|
| 688 |
|
| 689 |
right_vote_btn.click(
|
| 690 |
lambda *args: record_vote(*args, "Right is better"),
|
| 691 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 692 |
-
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 693 |
right_vote_btn, tie_btn, model_names_row]
|
| 694 |
)
|
| 695 |
|
|
@@ -701,7 +618,7 @@ with gr.Blocks(css="""
|
|
| 701 |
|
| 702 |
new_battle_btn.click(
|
| 703 |
new_battle,
|
| 704 |
-
outputs=[prompt_input, left_output, right_output, left_model,
|
| 705 |
right_model, left_vote_btn, right_vote_btn, tie_btn,
|
| 706 |
result, leaderboard, model_names_row, elo_leaderboard, tie_count]
|
| 707 |
)
|
|
|
|
| 444 |
except:
|
| 445 |
return param_size # Return original if conversion fails
|
| 446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
# Initialize Gradio Blocks
|
| 448 |
with gr.Blocks(css="""
|
| 449 |
#dice-button {
|
|
|
|
| 518 |
with gr.Tab("Battle Arena"):
|
| 519 |
with gr.Row():
|
| 520 |
prompt_input = gr.Textbox(
|
| 521 |
+
label="Enter your prompt",
|
| 522 |
placeholder="Type your prompt here...",
|
| 523 |
scale=20
|
| 524 |
)
|
|
|
|
| 544 |
right_vote_btn = gr.Button(f"Vote for {right_output.label}", interactive=False)
|
| 545 |
|
| 546 |
result = gr.Textbox(
|
| 547 |
+
label="Status",
|
| 548 |
+
interactive=False,
|
| 549 |
value="Generate responses to start the battle! 🚀",
|
| 550 |
visible=True # Always visible
|
| 551 |
)
|
|
|
|
| 585 |
outputs=[release_notes]
|
| 586 |
)
|
| 587 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
# Define interactions
|
| 589 |
submit_btn.click(
|
| 590 |
battle_arena,
|
| 591 |
inputs=prompt_input,
|
| 592 |
outputs=[
|
| 593 |
+
left_output, right_output, left_model, right_model,
|
| 594 |
left_output, right_output, left_vote_btn, right_vote_btn,
|
| 595 |
tie_btn, previous_prompt, tie_count, model_names_row, result
|
| 596 |
]
|
|
|
|
| 599 |
left_vote_btn.click(
|
| 600 |
lambda *args: record_vote(*args, "Left is better"),
|
| 601 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 602 |
+
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 603 |
right_vote_btn, tie_btn, model_names_row]
|
| 604 |
)
|
| 605 |
|
| 606 |
right_vote_btn.click(
|
| 607 |
lambda *args: record_vote(*args, "Right is better"),
|
| 608 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 609 |
+
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 610 |
right_vote_btn, tie_btn, model_names_row]
|
| 611 |
)
|
| 612 |
|
|
|
|
| 618 |
|
| 619 |
new_battle_btn.click(
|
| 620 |
new_battle,
|
| 621 |
+
outputs=[prompt_input, left_output, right_output, left_model,
|
| 622 |
right_model, left_vote_btn, right_vote_btn, tie_btn,
|
| 623 |
result, leaderboard, model_names_row, elo_leaderboard, tie_count]
|
| 624 |
)
|