Nexa_Labs / tools /paper_sources /arxiv_client.py
Allanatrix's picture
Upload 57 files
d8328bf verified
"""Thin wrapper around the arXiv API for the NexaSci tool server."""
from __future__ import annotations
from datetime import datetime
from typing import List, Optional
import arxiv
from tools.schemas import PaperMetadata, PaperSearchRequest
class ArxivClient:
"""Provides search and fetch helpers over the arXiv API."""
def __init__(self) -> None:
self._client = arxiv.Client()
def search(self, request: PaperSearchRequest) -> List[PaperMetadata]:
"""Execute a free-form search query against arXiv."""
search = arxiv.Search(
query=request.query,
max_results=request.top_k,
sort_by=arxiv.SortCriterion.Relevance,
)
results: List[PaperMetadata] = []
for entry in self._client.results(search):
results.append(self._to_metadata(entry))
return results
def fetch(self, *, arxiv_id: Optional[str] = None, doi: Optional[str] = None) -> Optional[PaperMetadata]:
"""Fetch a single paper by arXiv identifier or DOI."""
if arxiv_id:
search = arxiv.Search(id_list=[arxiv_id])
elif doi:
search = arxiv.Search(query=f"doi:{doi}", max_results=1)
else:
raise ValueError("Either arxiv_id or doi must be provided.")
for entry in self._client.results(search):
return self._to_metadata(entry)
return None
@staticmethod
def _to_metadata(entry: arxiv.Result) -> PaperMetadata:
"""Convert the arxiv library's Result into PaperMetadata."""
published = None
if entry.published:
published = datetime.fromtimestamp(entry.published.timestamp())
authors = [author.name for author in entry.authors]
return PaperMetadata(
title=entry.title.strip(),
abstract=entry.summary.strip() or None,
authors=authors,
doi=entry.doi,
arxiv_id=entry.get_short_id(),
published=published,
primary_category=str(entry.primary_category) if entry.primary_category else None,
url=entry.entry_id,
source="arxiv",
)