2021-07-05 16:55:54 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
|
|
|
# Copyright (C) 2021 OzzieIsaacs
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-12-13 16:21:41 +00:00
|
|
|
import itertools
|
|
|
|
from typing import Dict, List, Optional
|
2022-02-12 11:41:29 +00:00
|
|
|
from urllib.parse import quote, unquote
|
2021-07-05 16:55:54 +00:00
|
|
|
|
2022-01-31 17:09:23 +00:00
|
|
|
try:
|
|
|
|
from fake_useragent.errors import FakeUserAgentError
|
|
|
|
except (ImportError):
|
|
|
|
FakeUserAgentError = BaseException
|
|
|
|
try:
|
|
|
|
from scholarly import scholarly
|
|
|
|
except FakeUserAgentError:
|
|
|
|
raise ImportError("No module named 'scholarly'")
|
2021-07-05 16:55:54 +00:00
|
|
|
|
2022-02-25 04:18:07 +00:00
|
|
|
from cps import logger
|
2021-12-13 16:21:41 +00:00
|
|
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
2021-07-06 18:24:27 +00:00
|
|
|
|
2022-02-25 04:18:07 +00:00
|
|
|
log = logger.create()
|
|
|
|
|
|
|
|
|
2021-07-06 18:24:27 +00:00
|
|
|
class scholar(Metadata):
|
2021-07-08 17:14:38 +00:00
|
|
|
__name__ = "Google Scholar"
|
|
|
|
__id__ = "googlescholar"
|
2021-12-13 16:21:41 +00:00
|
|
|
META_URL = "https://scholar.google.com/"
|
2021-07-05 16:55:54 +00:00
|
|
|
|
2021-12-13 16:21:41 +00:00
|
|
|
def search(
|
|
|
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
|
|
|
) -> Optional[List[MetaRecord]]:
|
2021-07-07 19:10:38 +00:00
|
|
|
val = list()
|
2021-07-05 16:55:54 +00:00
|
|
|
if self.active:
|
2021-12-13 16:21:41 +00:00
|
|
|
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
|
|
|
if title_tokens:
|
|
|
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
|
|
|
query = " ".join(tokens)
|
2022-02-25 04:18:07 +00:00
|
|
|
try:
|
2022-07-23 05:23:13 +00:00
|
|
|
scholarly.set_timeout(20)
|
|
|
|
scholarly.set_retries(2)
|
2022-02-25 04:18:07 +00:00
|
|
|
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
|
|
|
|
except Exception as e:
|
|
|
|
log.warning(e)
|
2022-10-02 13:05:07 +00:00
|
|
|
return list()
|
2021-12-13 16:21:41 +00:00
|
|
|
for result in scholar_gen:
|
|
|
|
match = self._parse_search_result(
|
2022-02-12 11:32:35 +00:00
|
|
|
result=result, generic_cover="", locale=locale
|
2021-12-13 16:21:41 +00:00
|
|
|
)
|
|
|
|
val.append(match)
|
2021-07-07 19:10:38 +00:00
|
|
|
return val
|
2021-07-06 18:24:27 +00:00
|
|
|
|
2021-12-13 16:21:41 +00:00
|
|
|
def _parse_search_result(
|
|
|
|
self, result: Dict, generic_cover: str, locale: str
|
|
|
|
) -> MetaRecord:
|
|
|
|
match = MetaRecord(
|
|
|
|
id=result.get("pub_url", result.get("eprint_url", "")),
|
|
|
|
title=result["bib"].get("title"),
|
|
|
|
authors=result["bib"].get("author", []),
|
|
|
|
url=result.get("pub_url", result.get("eprint_url", "")),
|
|
|
|
source=MetaSourceInfo(
|
|
|
|
id=self.__id__, description=self.__name__, link=scholar.META_URL
|
|
|
|
),
|
|
|
|
)
|
2021-07-05 16:55:54 +00:00
|
|
|
|
2021-12-13 16:21:41 +00:00
|
|
|
match.cover = result.get("image", {}).get("original_url", generic_cover)
|
2022-02-12 11:41:29 +00:00
|
|
|
match.description = unquote(result["bib"].get("abstract", ""))
|
2021-12-13 16:21:41 +00:00
|
|
|
match.publisher = result["bib"].get("venue", "")
|
|
|
|
match.publishedDate = result["bib"].get("pub_year") + "-01-01"
|
|
|
|
match.identifiers = {"scholar": match.id}
|
|
|
|
return match
|