| import json | |
| import requests | |
| import csv | |
| offset = 0 | |
| limit = 1000 | |
| max_count = 4944 | |
| base_url = 'https://api.openreview.net' | |
| all_papers = [] | |
| while offset < max_count: | |
| limit = min(limit, max_count - offset) | |
| print(offset, limit) | |
| url = base_url + f"/notes?details=invitation%2Coriginal&offset={offset}&limit={limit}&invitation=ICLR.cc%2F2023%2FConference%2F-%2FBlind_Submission" | |
| response = requests.get(url) | |
| papers = json.loads(response.text)['notes'] | |
| all_papers += papers | |
| offset += limit | |
| with open('iclr_submissions.csv', 'w', encoding='UTF8', newline='') as f: | |
| header = ['title', 'url', 'pdf', 'tldr', 'abstract', 'keywords'] | |
| writer = csv.writer(f) | |
| writer.writerow(header) | |
| for paper in all_papers: | |
| content = paper['content'] | |
| title = content['title'] | |
| url = f'https://openreview.net/forum?id={paper["forum"]}' | |
| pdf = f'https://openreview.net/pdf?id={paper["forum"]}' | |
| tldr = content.get('TL;DR', '') | |
| abstract = content['abstract'] | |
| keywords = ', '.join(content['keywords']) | |
| writer.writerow([title, url, pdf, tldr, abstract, keywords]) |