open-agent-leaderboard

Running

App Files Files Community

open-agent-leaderboard / app.py

qq-hzlh

add requirements.txt;

bbcdbca 12 months ago

raw

history blame

7.27 kB

	import abc
	import gradio as gr

	from gen_table import *
	from meta_data import *

	# import pandas as pd
	# pd.set_option('display.max_colwidth', 0)

	head_style = """
	<style>
	@media (min-width: 1536px)
	{
	.gradio-container {
	min-width: var(--size-full) !important;
	}
	}

	/* 添加复选框样式 */
	.gr-checkbox {
	accent-color: rgb(59, 130, 246) !important; /* 蓝色 */
	}

	.gr-checkbox-group label input[type="checkbox"] {
	accent-color: rgb(59, 130, 246) !important;
	}

	.gr-checkbox-group input[type="checkbox"]:checked {
	background-color: rgb(59, 130, 246) !important;
	border-color: rgb(59, 130, 246) !important;
	}
	</style>
	"""

	with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
	struct = load_results(OVERALL_MATH_SCORE_FILE)
	timestamp = struct['time']
	EVAL_TIME = format_timestamp(timestamp)
	results = struct['results']
	N_MODEL = len(results)
	N_DATA = len(results['IO'])
	DATASETS = list(results['IO'])
	DATASETS.remove('META')
	print(DATASETS)



	with gr.Tabs() as tabs:
	gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME))

	with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'):
	gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
	check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
	table = generate_table(results, DEFAULT_MATH_BENCH)

	type_map = check_box['type_map']
	type_map['Rank'] = 'number'

	checkbox_group = gr.CheckboxGroup(
	choices=check_box['all'],
	value=check_box['required'],
	label='Evaluation Dimension',
	interactive=True,
	)

	headers = ['Rank'] + check_box['essential'] + checkbox_group.value
	data_component = gr.components.DataFrame(
	value=table[headers],
	type='pandas',
	datatype=[type_map[x] for x in headers],
	interactive=False,
	wrap=True,
	visible=True)

	def filter_df(fields, *args):
	# 获取基础列和选中的列
	headers = ['Rank'] + check_box['essential'] + fields
	df = table.copy()

	comp = gr.components.DataFrame(
	value=table[headers], # 只显示选中的列
	type='pandas',
	datatype=[type_map[x] for x in headers],
	interactive=False,
	wrap=True,
	visible=True)
	return comp

	# checkbox_group的change事件只需要传入checkbox_group
	checkbox_group.change(
	fn=filter_df,
	inputs=[checkbox_group],
	outputs=data_component
	)

	with gr.Tab(label='🏅 Open Agent Detail Math Leaderboard'):
	gr.Markdown(LEADERBOARD_MD['MATH_DETAIL'])
	struct_detail = load_results(DETAIL_MATH_SCORE_FILE)
	timestamp = struct_detail['time']
	EVAL_TIME = format_timestamp(timestamp)
	results_detail = struct_detail['results']

	table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
	# table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)

	type_map = check_box['type_map']
	type_map['Rank'] = 'number'

	checkbox_group = gr.CheckboxGroup(
	choices=check_box['all'],
	value=check_box['required'],
	label='Evaluation Dimension',
	interactive=True,
	)

	headers = ['Rank'] + checkbox_group.value
	with gr.Row():

	algo_name = gr.CheckboxGroup(
	choices=ALGORITHMS,
	value=ALGORITHMS,
	label='Algorithm',
	interactive=True
	)

	dataset_name = gr.CheckboxGroup(
	choices=DATASETS,
	value=DATASETS,
	label='Datasets',
	interactive=True
	)

	llm_name = gr.CheckboxGroup(
	choices=LLM,
	value=LLM,
	label='LLM',
	interactive=True
	)

	data_component = gr.components.DataFrame(
	value=table[headers],
	type='pandas',
	datatype=[type_map[x] for x in headers],
	interactive=False,
	wrap=True,
	visible=True)

	def filter_df(fields, algos, datasets, llms):
	headers = ['Rank'] + check_box['essential'] + fields
	df = table.copy()

	# 过滤数据
	df['flag'] = df.apply(lambda row: (
	row['Algorithm'] in algos and
	row['Dataset'] in datasets and
	row['LLM'] in llms
	), axis=1)

	df = df[df['flag']].copy()
	df.pop('flag')

	# 按数据集分组，在每个组内根据Score排序并计算排名
	if 'Score' in df.columns:
	# 创建一个临时的排名列
	df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False)

	# 确保排名为整数
	df['Rank'] = df['Rank'].astype(int)

	comp = gr.components.DataFrame(
	value=df[headers],
	type='pandas',
	datatype=[type_map[x] for x in headers],
	interactive=False,
	wrap=True,
	visible=True)
	return comp

	# 为所有复选框组添加change事件
	checkbox_group.change(
	fn=filter_df,
	inputs=[checkbox_group, algo_name, dataset_name, llm_name],
	outputs=data_component
	)

	algo_name.change(
	fn=filter_df,
	inputs=[checkbox_group, algo_name, dataset_name, llm_name],
	outputs=data_component
	)

	dataset_name.change(
	fn=filter_df,
	inputs=[checkbox_group, algo_name, dataset_name, llm_name],
	outputs=data_component
	)

	llm_name.change(
	fn=filter_df,
	inputs=[checkbox_group, algo_name, dataset_name, llm_name],
	outputs=data_component
	)


	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	gr.Textbox(
	value=CITATION_BUTTON_TEXT, lines=7,
	label="Copy the BibTeX snippet to cite this source",
	elem_id="citation-button",
	show_copy_button=True,
	)


	if __name__ == '__main__':
	demo.launch(server_name='0.0.0.0')