calibre-web/cps/metadata_provider/scholar.py

# -*- coding: utf-8 -*-

#  This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
#    Copyright (C) 2021 OzzieIsaacs
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
import itertools
from typing import Dict, List, Optional
from urllib.parse import quote, unquote

try:
    from fake_useragent.errors import FakeUserAgentError
except (ImportError):
    FakeUserAgentError = BaseException
try:
    from scholarly import scholarly
except FakeUserAgentError:
    raise ImportError("No module named 'scholarly'")

from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata

log = logger.create()


class scholar(Metadata):
    __name__ = "Google Scholar"
    __id__ = "googlescholar"
    META_URL = "https://scholar.google.com/"

    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        val = list()
        if self.active:
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
            if title_tokens:
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
                query = " ".join(tokens)
            try:
                scholarly.set_timeout(20)
                scholarly.set_retries(2)
                scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
            except Exception as e:
                log.warning(e)
                return None
            for result in scholar_gen:
                match = self._parse_search_result(
                    result=result, generic_cover="", locale=locale
                )
                val.append(match)
        return val

    def _parse_search_result(
        self, result: Dict, generic_cover: str, locale: str
    ) -> MetaRecord:
        match = MetaRecord(
            id=result.get("pub_url", result.get("eprint_url", "")),
            title=result["bib"].get("title"),
            authors=result["bib"].get("author", []),
            url=result.get("pub_url", result.get("eprint_url", "")),
            source=MetaSourceInfo(
                id=self.__id__, description=self.__name__, link=scholar.META_URL
            ),
        )

        match.cover = result.get("image", {}).get("original_url", generic_cover)
        match.description = unquote(result["bib"].get("abstract", ""))
        match.publisher = result["bib"].get("venue", "")
        match.publishedDate = result["bib"].get("pub_year") + "-01-01"
        match.identifiers = {"scholar": match.id}
        return match