Spaces:
Running
Running
burtenshaw
commited on
Commit
·
1c7c01e
1
Parent(s):
b5eec3d
update app to use lighteval format
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from datasets import load_dataset
|
|
| 9 |
abs_path = Path(__file__).parent
|
| 10 |
submissions = json.load(open(abs_path / "submissions.json"))
|
| 11 |
|
| 12 |
-
TASKS = ["
|
| 13 |
TYPES = [
|
| 14 |
"markdown",
|
| 15 |
"markdown",
|
|
@@ -21,14 +21,45 @@ COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"]
|
|
| 21 |
WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def load_submissions():
|
| 25 |
leaderboard = []
|
| 26 |
-
|
| 27 |
for submission in submissions["submissions"]:
|
| 28 |
ds = load_dataset(submission["results-dataset"], "results")
|
| 29 |
-
ds = ds.filter(lambda x: x["task"] in TASKS)
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
leaderboard_row = {}
|
| 34 |
|
|
@@ -40,11 +71,12 @@ def load_submissions():
|
|
| 40 |
f"[{submission['model_name']}](https://huggingface.co/{submission['model_name']})"
|
| 41 |
)
|
| 42 |
|
| 43 |
-
for result in
|
| 44 |
-
leaderboard_row[
|
| 45 |
-
all_accuracy.append(result["accuracy"])
|
| 46 |
|
| 47 |
-
leaderboard_row["Average ⬆️"] = sum(
|
|
|
|
|
|
|
| 48 |
|
| 49 |
leaderboard_row["results-dataset"] = (
|
| 50 |
f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
|
|
|
|
| 9 |
abs_path = Path(__file__).parent
|
| 10 |
submissions = json.load(open(abs_path / "submissions.json"))
|
| 11 |
|
| 12 |
+
TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")]
|
| 13 |
TYPES = [
|
| 14 |
"markdown",
|
| 15 |
"markdown",
|
|
|
|
| 21 |
WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
|
| 22 |
|
| 23 |
|
| 24 |
+
def load_results(dataset):
|
| 25 |
+
results = []
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
output = dataset["latest"]["results"]
|
| 29 |
+
output = output[-1]
|
| 30 |
+
except KeyError as e:
|
| 31 |
+
raise ValueError("Cannot find 'latest' key in the dataset")
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
output = json.loads(output)
|
| 35 |
+
except ValueError as e:
|
| 36 |
+
raise ValueError("Cannot parse the output as JSON")
|
| 37 |
+
|
| 38 |
+
for name, task, metric in TASKS:
|
| 39 |
+
try:
|
| 40 |
+
output = output[task]
|
| 41 |
+
except KeyError as e:
|
| 42 |
+
raise ValueError(f"Cannot find '{task}' key in the dataset")
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
output = (name, output[metric])
|
| 46 |
+
except KeyError as e:
|
| 47 |
+
raise ValueError("Cannot find 'extractive_match' key in the dataset")
|
| 48 |
+
|
| 49 |
+
results.append(output)
|
| 50 |
+
|
| 51 |
+
return results
|
| 52 |
+
|
| 53 |
+
|
| 54 |
def load_submissions():
|
| 55 |
leaderboard = []
|
|
|
|
| 56 |
for submission in submissions["submissions"]:
|
| 57 |
ds = load_dataset(submission["results-dataset"], "results")
|
|
|
|
| 58 |
|
| 59 |
+
try:
|
| 60 |
+
results = load_results(ds)
|
| 61 |
+
except ValueError as e:
|
| 62 |
+
raise ValueError(f"Cannot load results for {ds['results-dataset']}") from e
|
| 63 |
|
| 64 |
leaderboard_row = {}
|
| 65 |
|
|
|
|
| 71 |
f"[{submission['model_name']}](https://huggingface.co/{submission['model_name']})"
|
| 72 |
)
|
| 73 |
|
| 74 |
+
for name, result in results:
|
| 75 |
+
leaderboard_row[name] = result
|
|
|
|
| 76 |
|
| 77 |
+
leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len(
|
| 78 |
+
results
|
| 79 |
+
)
|
| 80 |
|
| 81 |
leaderboard_row["results-dataset"] = (
|
| 82 |
f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
|
docs.md
CHANGED
|
@@ -65,7 +65,14 @@ Open a pull request on the [leaderboard space](https://huggingface.co/spaces/smo
|
|
| 65 |
```json
|
| 66 |
{
|
| 67 |
"submissions": [
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
... # existing submissions
|
| 70 |
|
| 71 |
{
|
|
|
|
| 65 |
```json
|
| 66 |
{
|
| 67 |
"submissions": [
|
| 68 |
+
{
|
| 69 |
+
"username": "HuggingFaceTB",
|
| 70 |
+
"model_name": "SmolLM3-3B",
|
| 71 |
+
"chapter": "1",
|
| 72 |
+
"submission_date": "2025-09-02",
|
| 73 |
+
"results-dataset": "smol-course/details_HuggingFaceTB__SmolLM3-3B_private"
|
| 74 |
+
},
|
| 75 |
+
|
| 76 |
... # existing submissions
|
| 77 |
|
| 78 |
{
|