calibre-web/cps/metadata_provider/google.py

# -*- coding: utf-8 -*-

#  This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
#    Copyright (C) 2021 OzzieIsaacs
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program. If not, see <http://www.gnu.org/licenses/>.

# Google Books api document: https://developers.google.com/books/docs/v1/using
from typing import Dict, List, Optional
from urllib.parse import quote

import requests

from cps import logger
from cps.isoLanguages import get_lang3, get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata

log = logger.create()


class Google(Metadata):
    __name__ = "Google"
    __id__ = "google"
    DESCRIPTION = "Google Books"
    META_URL = "https://books.google.com/"
    BOOK_URL = "https://books.google.com/books?id="
    SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
    ISBN_TYPE = "ISBN_13"

    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        val = list()    
        if self.active:

            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
            if title_tokens:
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
                query = "+".join(tokens)
            try:
                results = requests.get(Google.SEARCH_URL + query)
                results.raise_for_status()
            except Exception as e:
                log.warning(e)
                return None
            for result in results.json().get("items", []):
                val.append(
                    self._parse_search_result(
                        result=result, generic_cover=generic_cover, locale=locale
                    )
                )
        return val

    def _parse_search_result(
        self, result: Dict, generic_cover: str, locale: str
    ) -> MetaRecord:
        match = MetaRecord(
            id=result["id"],
            title=result["volumeInfo"]["title"],
            authors=result["volumeInfo"].get("authors", []),
            url=Google.BOOK_URL + result["id"],
            source=MetaSourceInfo(
                id=self.__id__,
                description=Google.DESCRIPTION,
                link=Google.META_URL,
            ),
        )

        match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
        match.description = result["volumeInfo"].get("description", "")
        match.languages = self._parse_languages(result=result, locale=locale)
        match.publisher = result["volumeInfo"].get("publisher", "")
        match.publishedDate = result["volumeInfo"].get("publishedDate", "")
        match.rating = result["volumeInfo"].get("averageRating", 0)
        match.series, match.series_index = "", 1
        match.tags = result["volumeInfo"].get("categories", [])

        match.identifiers = {"google": match.id}
        match = self._parse_isbn(result=result, match=match)
        return match

    @staticmethod
    def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
        identifiers = result["volumeInfo"].get("industryIdentifiers", [])
        for identifier in identifiers:
            if identifier.get("type") == Google.ISBN_TYPE:
                match.identifiers["isbn"] = identifier.get("identifier")
                break
        return match

    @staticmethod
    def _parse_cover(result: Dict, generic_cover: str) -> str:
        if result["volumeInfo"].get("imageLinks"):
            cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
            
            # strip curl in cover
            cover_url = cover_url.replace("&edge=curl", "")
            
            # request 800x900 cover image (higher resolution)
            cover_url += "&fife=w800-h900"
            
            return cover_url.replace("http://", "https://")
        return generic_cover

    @staticmethod
    def _parse_languages(result: Dict, locale: str) -> List[str]:
        language_iso2 = result["volumeInfo"].get("language", "")
        languages = (
            [get_language_name(locale, get_lang3(language_iso2))]
            if language_iso2
            else []
        )
        return languages
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00			`# -- coding: utf-8 --`

			`# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)`
			`# Copyright (C) 2021 OzzieIsaacs`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`

Update python search Metadata 2021-07-07 19:24:29 +00:00			`# Google Books api document: https://developers.google.com/books/docs/v1/using`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`from typing import Dict, List, Optional`
			`from urllib.parse import quote`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
			`import requests`
refactor and cleaning 2021-12-13 00:23:03 +00:00
Add exception handling and logger in metadata provider 2022-02-25 04:18:07 +00:00			`from cps import logger`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`from cps.isoLanguages import get_lang3, get_language_name`
unify scholar 2021-12-13 16:21:41 +00:00			`from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
Add exception handling and logger in metadata provider 2022-02-25 04:18:07 +00:00			`log = logger.create()`

Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
Fixes for grab metadata in python 2021-07-06 18:24:27 +00:00			`class Google(Metadata):`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00			`__name__ = "Google"`
Update python search Metadata 2021-07-08 17:14:38 +00:00			`__id__ = "google"`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`DESCRIPTION = "Google Books"`
			`META_URL = "https://books.google.com/"`
			`BOOK_URL = "https://books.google.com/books?id="`
			`SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="`
			`ISBN_TYPE = "ISBN_13"`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`def search(`
			`self, query: str, generic_cover: str = "", locale: str = "en"`
			`) -> Optional[List[MetaRecord]]:`
Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop # Conflicts: # optional-requirements.txt 2022-01-27 17:37:02 +00:00			`val = list()`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00			`if self.active:`

add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`title_tokens = list(self.get_title_tokens(query, strip_joiners=False))`
			`if title_tokens:`
			`tokens = [quote(t.encode("utf-8")) for t in title_tokens]`
			`query = "+".join(tokens)`
Add exception handling and logger in metadata provider 2022-02-25 04:18:07 +00:00			`try:`
			`results = requests.get(Google.SEARCH_URL + query)`
			`results.raise_for_status()`
			`except Exception as e:`
			`log.warning(e)`
			`return None`
Bugfix for empty search results from google 2022-02-12 11:34:54 +00:00			`for result in results.json().get("items", []):`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`val.append(`
			`self._parse_search_result(`
			`result=result, generic_cover=generic_cover, locale=locale`
			`)`
			`)`
Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop # Conflicts: # optional-requirements.txt 2022-01-27 17:37:02 +00:00			`return val`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00
			`def _parse_search_result(`
			`self, result: Dict, generic_cover: str, locale: str`
			`) -> MetaRecord:`
unify scholar 2021-12-13 16:21:41 +00:00			`match = MetaRecord(`
			`id=result["id"],`
			`title=result["volumeInfo"]["title"],`
			`authors=result["volumeInfo"].get("authors", []),`
			`url=Google.BOOK_URL + result["id"],`
			`source=MetaSourceInfo(`
			`id=self.__id__,`
			`description=Google.DESCRIPTION,`
			`link=Google.META_URL,`
			`),`
			`)`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00
unify scholar 2021-12-13 16:21:41 +00:00			`match.cover = self._parse_cover(result=result, generic_cover=generic_cover)`
			`match.description = result["volumeInfo"].get("description", "")`
			`match.languages = self._parse_languages(result=result, locale=locale)`
			`match.publisher = result["volumeInfo"].get("publisher", "")`
			`match.publishedDate = result["volumeInfo"].get("publishedDate", "")`
			`match.rating = result["volumeInfo"].get("averageRating", 0)`
			`match.series, match.series_index = "", 1`
			`match.tags = result["volumeInfo"].get("categories", [])`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00
unify scholar 2021-12-13 16:21:41 +00:00			`match.identifiers = {"google": match.id}`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`match = self._parse_isbn(result=result, match=match)`
			`return match`

			`@staticmethod`
unify scholar 2021-12-13 16:21:41 +00:00			`def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`identifiers = result["volumeInfo"].get("industryIdentifiers", [])`
			`for identifier in identifiers:`
			`if identifier.get("type") == Google.ISBN_TYPE:`
unify scholar 2021-12-13 16:21:41 +00:00			`match.identifiers["isbn"] = identifier.get("identifier")`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`break`
			`return match`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`@staticmethod`
			`def _parse_cover(result: Dict, generic_cover: str) -> str:`
			`if result["volumeInfo"].get("imageLinks"):`
			`cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]`
Google Covers: strip curl in thumbnail and request higher resolution image 2022-09-08 15:23:53 +00:00
			`# strip curl in cover`
			`cover_url = cover_url.replace("&edge=curl", "")`

			`# request 800x900 cover image (higher resolution)`
			`cover_url += "&fife=w800-h900"`

add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`return cover_url.replace("http://", "https://")`
			`return generic_cover`
Basic Metadata mechanism in python 2021-07-05 16:55:54 +00:00
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`@staticmethod`
			`def _parse_languages(result: Dict, locale: str) -> List[str]:`
unify scholar 2021-12-13 16:21:41 +00:00			`language_iso2 = result["volumeInfo"].get("language", "")`
add series, languages and isbn to google provider 2021-12-13 14:14:19 +00:00			`languages = (`
			`[get_language_name(locale, get_lang3(language_iso2))]`
			`if language_iso2`
			`else []`
			`)`
			`return languages`