File size: 2,168 Bytes
d8328bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Thin wrapper around the arXiv API for the NexaSci tool server."""

from __future__ import annotations

from datetime import datetime
from typing import List, Optional

import arxiv

from tools.schemas import PaperMetadata, PaperSearchRequest


class ArxivClient:
    """Provides search and fetch helpers over the arXiv API."""

    def __init__(self) -> None:
        self._client = arxiv.Client()

    def search(self, request: PaperSearchRequest) -> List[PaperMetadata]:
        """Execute a free-form search query against arXiv."""

        search = arxiv.Search(
            query=request.query,
            max_results=request.top_k,
            sort_by=arxiv.SortCriterion.Relevance,
        )
        results: List[PaperMetadata] = []
        for entry in self._client.results(search):
            results.append(self._to_metadata(entry))
        return results

    def fetch(self, *, arxiv_id: Optional[str] = None, doi: Optional[str] = None) -> Optional[PaperMetadata]:
        """Fetch a single paper by arXiv identifier or DOI."""

        if arxiv_id:
            search = arxiv.Search(id_list=[arxiv_id])
        elif doi:
            search = arxiv.Search(query=f"doi:{doi}", max_results=1)
        else:
            raise ValueError("Either arxiv_id or doi must be provided.")

        for entry in self._client.results(search):
            return self._to_metadata(entry)
        return None

    @staticmethod
    def _to_metadata(entry: arxiv.Result) -> PaperMetadata:
        """Convert the arxiv library's Result into PaperMetadata."""

        published = None
        if entry.published:
            published = datetime.fromtimestamp(entry.published.timestamp())
        authors = [author.name for author in entry.authors]
        return PaperMetadata(
            title=entry.title.strip(),
            abstract=entry.summary.strip() or None,
            authors=authors,
            doi=entry.doi,
            arxiv_id=entry.get_short_id(),
            published=published,
            primary_category=str(entry.primary_category) if entry.primary_category else None,
            url=entry.entry_id,
            source="arxiv",
        )