"""Thin wrapper around the arXiv API for the NexaSci tool server.""" from __future__ import annotations from datetime import datetime from typing import List, Optional import arxiv from tools.schemas import PaperMetadata, PaperSearchRequest class ArxivClient: """Provides search and fetch helpers over the arXiv API.""" def __init__(self) -> None: self._client = arxiv.Client() def search(self, request: PaperSearchRequest) -> List[PaperMetadata]: """Execute a free-form search query against arXiv.""" search = arxiv.Search( query=request.query, max_results=request.top_k, sort_by=arxiv.SortCriterion.Relevance, ) results: List[PaperMetadata] = [] for entry in self._client.results(search): results.append(self._to_metadata(entry)) return results def fetch(self, *, arxiv_id: Optional[str] = None, doi: Optional[str] = None) -> Optional[PaperMetadata]: """Fetch a single paper by arXiv identifier or DOI.""" if arxiv_id: search = arxiv.Search(id_list=[arxiv_id]) elif doi: search = arxiv.Search(query=f"doi:{doi}", max_results=1) else: raise ValueError("Either arxiv_id or doi must be provided.") for entry in self._client.results(search): return self._to_metadata(entry) return None @staticmethod def _to_metadata(entry: arxiv.Result) -> PaperMetadata: """Convert the arxiv library's Result into PaperMetadata.""" published = None if entry.published: published = datetime.fromtimestamp(entry.published.timestamp()) authors = [author.name for author in entry.authors] return PaperMetadata( title=entry.title.strip(), abstract=entry.summary.strip() or None, authors=authors, doi=entry.doi, arxiv_id=entry.get_short_id(), published=published, primary_category=str(entry.primary_category) if entry.primary_category else None, url=entry.entry_id, source="arxiv", )