Spaces:
Paused
Paused
| """Thin wrapper around the arXiv API for the NexaSci tool server.""" | |
| from __future__ import annotations | |
| from datetime import datetime | |
| from typing import List, Optional | |
| import arxiv | |
| from tools.schemas import PaperMetadata, PaperSearchRequest | |
| class ArxivClient: | |
| """Provides search and fetch helpers over the arXiv API.""" | |
| def __init__(self) -> None: | |
| self._client = arxiv.Client() | |
| def search(self, request: PaperSearchRequest) -> List[PaperMetadata]: | |
| """Execute a free-form search query against arXiv.""" | |
| search = arxiv.Search( | |
| query=request.query, | |
| max_results=request.top_k, | |
| sort_by=arxiv.SortCriterion.Relevance, | |
| ) | |
| results: List[PaperMetadata] = [] | |
| for entry in self._client.results(search): | |
| results.append(self._to_metadata(entry)) | |
| return results | |
| def fetch(self, *, arxiv_id: Optional[str] = None, doi: Optional[str] = None) -> Optional[PaperMetadata]: | |
| """Fetch a single paper by arXiv identifier or DOI.""" | |
| if arxiv_id: | |
| search = arxiv.Search(id_list=[arxiv_id]) | |
| elif doi: | |
| search = arxiv.Search(query=f"doi:{doi}", max_results=1) | |
| else: | |
| raise ValueError("Either arxiv_id or doi must be provided.") | |
| for entry in self._client.results(search): | |
| return self._to_metadata(entry) | |
| return None | |
| def _to_metadata(entry: arxiv.Result) -> PaperMetadata: | |
| """Convert the arxiv library's Result into PaperMetadata.""" | |
| published = None | |
| if entry.published: | |
| published = datetime.fromtimestamp(entry.published.timestamp()) | |
| authors = [author.name for author in entry.authors] | |
| return PaperMetadata( | |
| title=entry.title.strip(), | |
| abstract=entry.summary.strip() or None, | |
| authors=authors, | |
| doi=entry.doi, | |
| arxiv_id=entry.get_short_id(), | |
| published=published, | |
| primary_category=str(entry.primary_category) if entry.primary_category else None, | |
| url=entry.entry_id, | |
| source="arxiv", | |
| ) | |