Spaces:
Running
Running
| # CONSTANTS-URL | |
| OVERALL_MATH_SCORE_FILE = "src/overall_math_score.json" | |
| DETAIL_MATH_SCORE_FILE = "src/detail_math_score.json" | |
| # CONSTANTS-TEXT | |
| LEADERBORAD_INTRODUCTION = """# Open Agent Leaderboard | |
| ### Welcome to the Open Agent Leaderboard! We share the evaluation results of open agents: COT, SC_COT, POT, ReAct, etc. The agents are impletemented by the OpenSource Framework: [*OmAgent*](https://github.com/om-ai-lab/OmAgent) | |
| This leaderboard was last updated: {}. | |
| To add your own agent to the leaderboard, please create a PR in [*OmAgent*](https://github.com/om-ai-lab/OmAgent), then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us. | |
| """ | |
| DEFAULT_MATH_BENCH = [ | |
| 'gsm8k', 'AQuA' | |
| ] | |
| # The README file for each benchmark | |
| LEADERBOARD_MD = {} | |
| LEADERBOARD_MD['MATH_MAIN'] = f""" | |
| ## Math task main Evaluation Results | |
| - Metrics: | |
| - Avg Score: The average score on all math Benchmarks (normalized to 0 - 100, the higher the better). | |
| - Rank: The average rank on all math Benchmarks (the lower the better). | |
| - Score: The evaluation score on each math Benchmarks (the higher the better). | |
| - Cost: The cost on each math Benchmarks (the lower the better). | |
| - By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}, sorted by the descending order of Avg Score. | |
| """ | |
| LEADERBOARD_MD['MATH_DETAIL'] = f""" | |
| ## Math task detail Evaluation Results | |
| - By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)} | |
| - default parameters: temperature=0.0 | |
| - LLM prices: | |
| - gpt-3.5-turbo: | |
| - 0.0005$/1M tokens (input) | |
| - 0.0015$/1M tokens (output) | |
| - Doubao-lite-32k (1 USD = 7.3249 CNY): | |
| - 0.00004096$/1M tokens (input) | |
| - 0.0001$/1M tokens (output) | |
| - ReAct-Pro*: We modified ReAct to ReAct-Pro, following the Reflexion repository. Implementation details can be found in the [*OmAgent*](https://github.com/om-ai-lab/OmAgent) repository. | |
| """ | |
| META_FIELDS = [ | |
| 'Algorithm', 'LLM', 'Eval Date' | |
| ] | |
| DATASETS = [ | |
| 'gsm8k', 'AQuA' | |
| ] | |
| LLM = [ | |
| 'Doubao-lite-32k', 'gpt-3.5-turbo' | |
| ] | |
| ALGORITHMS = [ | |
| 'IO', 'COT', 'SC_COT', 'POT', 'ReAct-Pro*' | |
| ] | |
| CITATION_BUTTON_TEXT = r"""@article{zhang2024omagent, | |
| title={OmAgent: A Multi-modal Agent Framework for Complex Video Understanding with Task Divide-and-Conquer}, | |
| author={Zhang, Lu and Zhao, Tiancheng and Ying, Heting and Ma, Yibo and Lee, Kyusong}, | |
| journal={arXiv preprint arXiv:2406.16620}, | |
| year={2024} | |
| }""" |