Spaces:
Running
Running
| import abc | |
| import gradio as gr | |
| from gen_table import * | |
| from meta_data import * | |
| # import pandas as pd | |
| # pd.set_option('display.max_colwidth', 0) | |
| head_style = """ | |
| <style> | |
| @media (min-width: 1536px) | |
| { | |
| .gradio-container { | |
| min-width: var(--size-full) !important; | |
| } | |
| } | |
| /* 添加复选框样式 */ | |
| .gr-checkbox { | |
| accent-color: rgb(59, 130, 246) !important; /* 蓝色 */ | |
| } | |
| .gr-checkbox-group label input[type="checkbox"] { | |
| accent-color: rgb(59, 130, 246) !important; | |
| } | |
| .gr-checkbox-group input[type="checkbox"]:checked { | |
| background-color: rgb(59, 130, 246) !important; | |
| border-color: rgb(59, 130, 246) !important; | |
| } | |
| </style> | |
| """ | |
| with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo: | |
| struct = load_results(OVERALL_MATH_SCORE_FILE) | |
| timestamp = struct['time'] | |
| EVAL_TIME = format_timestamp(timestamp) | |
| results = struct['results'] | |
| N_MODEL = len(results) | |
| N_DATA = len(results['IO']) | |
| DATASETS = list(results['IO']) | |
| DATASETS.remove('META') | |
| print(DATASETS) | |
| with gr.Tabs() as tabs: | |
| gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME)) | |
| with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'): | |
| gr.Markdown(LEADERBOARD_MD['MATH_MAIN']) | |
| check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH) | |
| table = generate_table(results, DEFAULT_MATH_BENCH) | |
| type_map = check_box['type_map'] | |
| type_map['Rank'] = 'number' | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=check_box['all'], | |
| value=check_box['required'], | |
| label='Evaluation Dimension', | |
| interactive=True, | |
| ) | |
| headers = ['Rank'] + check_box['essential'] + checkbox_group.value | |
| data_component = gr.components.DataFrame( | |
| value=table[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| def filter_df(fields, *args): | |
| # 获取基础列和选中的列 | |
| headers = ['Rank'] + check_box['essential'] + fields | |
| df = table.copy() | |
| comp = gr.components.DataFrame( | |
| value=table[headers], # 只显示选中的列 | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| return comp | |
| # checkbox_group的change事件只需要传入checkbox_group | |
| checkbox_group.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group], | |
| outputs=data_component | |
| ) | |
| with gr.Tab(label='🏅 Open Agent Detail Math Leaderboard'): | |
| gr.Markdown(LEADERBOARD_MD['MATH_DETAIL']) | |
| struct_detail = load_results(DETAIL_MATH_SCORE_FILE) | |
| timestamp = struct_detail['time'] | |
| EVAL_TIME = format_timestamp(timestamp) | |
| results_detail = struct_detail['results'] | |
| table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH) | |
| # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH) | |
| type_map = check_box['type_map'] | |
| type_map['Rank'] = 'number' | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=check_box['all'], | |
| value=check_box['required'], | |
| label='Evaluation Dimension', | |
| interactive=True, | |
| ) | |
| headers = ['Rank'] + checkbox_group.value | |
| with gr.Row(): | |
| algo_name = gr.CheckboxGroup( | |
| choices=ALGORITHMS, | |
| value=ALGORITHMS, | |
| label='Algorithm', | |
| interactive=True | |
| ) | |
| dataset_name = gr.CheckboxGroup( | |
| choices=DATASETS, | |
| value=DATASETS, | |
| label='Datasets', | |
| interactive=True | |
| ) | |
| llm_name = gr.CheckboxGroup( | |
| choices=LLM, | |
| value=LLM, | |
| label='LLM', | |
| interactive=True | |
| ) | |
| data_component = gr.components.DataFrame( | |
| value=table[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| def filter_df(fields, algos, datasets, llms): | |
| headers = ['Rank'] + check_box['essential'] + fields | |
| df = table.copy() | |
| # 过滤数据 | |
| df['flag'] = df.apply(lambda row: ( | |
| row['Algorithm'] in algos and | |
| row['Dataset'] in datasets and | |
| row['LLM'] in llms | |
| ), axis=1) | |
| df = df[df['flag']].copy() | |
| df.pop('flag') | |
| # 按数据集分组,在每个组内根据Score排序并计算排名 | |
| if 'Score' in df.columns: | |
| # 创建一个临时的排名列 | |
| df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False) | |
| # 确保排名为整数 | |
| df['Rank'] = df['Rank'].astype(int) | |
| comp = gr.components.DataFrame( | |
| value=df[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| return comp | |
| # 为所有复选框组添加change事件 | |
| checkbox_group.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| algo_name.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| dataset_name.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| llm_name.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("📙 Citation", open=False): | |
| gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, lines=7, | |
| label="Copy the BibTeX snippet to cite this source", | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| if __name__ == '__main__': | |
| demo.launch(server_name='0.0.0.0') |