mirror of
				https://github.com/janeczku/calibre-web
				synced 2025-11-04 01:03:02 +00:00 
			
		
		
		
	add series, languages and isbn to google provider
This commit is contained in:
		@@ -17,41 +17,93 @@
 | 
			
		||||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# Google Books api document: https://developers.google.com/books/docs/v1/using
 | 
			
		||||
from typing import Dict, List, Optional
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
from cps.isoLanguages import get_lang3, get_language_name
 | 
			
		||||
from cps.services.Metadata import MetaRecord, Metadata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Google(Metadata):
 | 
			
		||||
    __name__ = "Google"
 | 
			
		||||
    __id__ = "google"
 | 
			
		||||
    BASE_URL = "https://www.googleapis.com/books/v1/volumes?q="
 | 
			
		||||
    DESCRIPTION = "Google Books"
 | 
			
		||||
    META_URL = "https://books.google.com/"
 | 
			
		||||
    BOOK_URL = "https://books.google.com/books?id="
 | 
			
		||||
    SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
 | 
			
		||||
    ISBN_TYPE = "ISBN_13"
 | 
			
		||||
 | 
			
		||||
    def search(self, query, generic_cover=""):
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        if self.active:
 | 
			
		||||
            val = list()
 | 
			
		||||
            result = requests.get(Google.BASE_URL + query.replace(" ","+"))
 | 
			
		||||
            for r in result.json()['items']:
 | 
			
		||||
                v = dict()
 | 
			
		||||
                v['id'] = r['id']
 | 
			
		||||
                v['title'] = r['volumeInfo']['title']
 | 
			
		||||
                v['authors'] = r['volumeInfo'].get('authors', [])
 | 
			
		||||
                v['description'] = r['volumeInfo'].get('description', "")
 | 
			
		||||
                v['publisher'] = r['volumeInfo'].get('publisher', "")
 | 
			
		||||
                v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
 | 
			
		||||
                v['tags'] = r['volumeInfo'].get('categories', [])
 | 
			
		||||
                v['rating'] = r['volumeInfo'].get('averageRating', 0)
 | 
			
		||||
                if r['volumeInfo'].get('imageLinks'):
 | 
			
		||||
                    v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
 | 
			
		||||
                else:
 | 
			
		||||
                    # v['cover'] = "/../../../static/generic_cover.jpg"
 | 
			
		||||
                    v['cover'] = generic_cover
 | 
			
		||||
                v['source'] = {
 | 
			
		||||
                    "id": self.__id__,
 | 
			
		||||
                    "description": "Google Books",
 | 
			
		||||
                    "link": "https://books.google.com/"}
 | 
			
		||||
                v['url'] = "https://books.google.com/books?id=" + r['id']
 | 
			
		||||
                val.append(v)
 | 
			
		||||
            return val
 | 
			
		||||
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
 | 
			
		||||
            if title_tokens:
 | 
			
		||||
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
 | 
			
		||||
                query = "+".join(tokens)
 | 
			
		||||
            results = requests.get(Google.SEARCH_URL + query)
 | 
			
		||||
            for result in results.json()["items"]:
 | 
			
		||||
                val.append(
 | 
			
		||||
                    self._parse_search_result(
 | 
			
		||||
                        result=result, generic_cover=generic_cover, locale=locale
 | 
			
		||||
                    )
 | 
			
		||||
                )
 | 
			
		||||
                return val
 | 
			
		||||
 | 
			
		||||
    def _parse_search_result(
 | 
			
		||||
        self, result: Dict, generic_cover: str, locale: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
        match = dict()
 | 
			
		||||
        match["id"] = result["id"]
 | 
			
		||||
        match["title"] = result["volumeInfo"]["title"]
 | 
			
		||||
        match["authors"] = result["volumeInfo"].get("authors", [])
 | 
			
		||||
        match["url"] = Google.BOOK_URL + result["id"]
 | 
			
		||||
        match["cover"] = self._parse_cover(result=result, generic_cover=generic_cover)
 | 
			
		||||
        match["description"] = result["volumeInfo"].get("description", "")
 | 
			
		||||
        match["languages"] = self._parse_languages(result=result, locale=locale)
 | 
			
		||||
        match["publisher"] = result["volumeInfo"].get("publisher", "")
 | 
			
		||||
        match["publishedDate"] = result["volumeInfo"].get("publishedDate", "")
 | 
			
		||||
        match["rating"] = result["volumeInfo"].get("averageRating", 0)
 | 
			
		||||
        match["series"], match["series_index"] = "", 1
 | 
			
		||||
        match["tags"] = result["volumeInfo"].get("categories", [])
 | 
			
		||||
 | 
			
		||||
        match["source"] = {
 | 
			
		||||
            "id": self.__id__,
 | 
			
		||||
            "description": Google.DESCRIPTION,
 | 
			
		||||
            "link": Google.META_URL,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        match["identifiers"] = {
 | 
			
		||||
            "google": match.get("id"),
 | 
			
		||||
        }
 | 
			
		||||
        match = self._parse_isbn(result=result, match=match)
 | 
			
		||||
        return match
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_isbn(result: Dict, match: Dict) -> Dict:
 | 
			
		||||
        identifiers = result["volumeInfo"].get("industryIdentifiers", [])
 | 
			
		||||
        for identifier in identifiers:
 | 
			
		||||
            if identifier.get("type") == Google.ISBN_TYPE:
 | 
			
		||||
                match["identifiers"]["isbn"] = identifier.get("identifier")
 | 
			
		||||
                break
 | 
			
		||||
        return match
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_cover(result: Dict, generic_cover: str) -> str:
 | 
			
		||||
        if result["volumeInfo"].get("imageLinks"):
 | 
			
		||||
            cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
 | 
			
		||||
            return cover_url.replace("http://", "https://")
 | 
			
		||||
        return generic_cover
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_languages(result: Dict, locale: str) -> List[str]:
 | 
			
		||||
        language_iso2 = result.get("language", "")
 | 
			
		||||
        languages = (
 | 
			
		||||
            [get_language_name(locale, get_lang3(language_iso2))]
 | 
			
		||||
            if language_iso2
 | 
			
		||||
            else []
 | 
			
		||||
        )
 | 
			
		||||
        return languages
 | 
			
		||||
 
 | 
			
		||||
@@ -107,7 +107,9 @@ class LubimyCzytac(Metadata):
 | 
			
		||||
 | 
			
		||||
    SUMMARY = "//script[@type='application/ld+json']//text()"
 | 
			
		||||
 | 
			
		||||
    def search(self, query: str, generic_cover: str = "") -> Optional[List]:
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        if self.active:
 | 
			
		||||
            result = requests.get(self._prepare_query(title=query))
 | 
			
		||||
            root = fromstring(result.text)
 | 
			
		||||
@@ -117,10 +119,7 @@ class LubimyCzytac(Metadata):
 | 
			
		||||
                with ThreadPool(processes=10) as pool:
 | 
			
		||||
                    final_matches = pool.starmap(
 | 
			
		||||
                        lc_parser.parse_single_book,
 | 
			
		||||
                        [
 | 
			
		||||
                            (match, generic_cover)
 | 
			
		||||
                            for match in matches
 | 
			
		||||
                        ],
 | 
			
		||||
                        [(match, generic_cover) for match in matches],
 | 
			
		||||
                    )
 | 
			
		||||
                return final_matches
 | 
			
		||||
            return matches
 | 
			
		||||
@@ -192,26 +191,25 @@ class LubimyCzytacParser:
 | 
			
		||||
            )
 | 
			
		||||
        return matches
 | 
			
		||||
 | 
			
		||||
    def parse_single_book(
 | 
			
		||||
        self, match: Dict, generic_cover: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
    def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord:
 | 
			
		||||
        response = requests.get(match.get("url"))
 | 
			
		||||
        self.root = fromstring(response.text)
 | 
			
		||||
        match["series"], match["series_index"] = self._parse_series()
 | 
			
		||||
        match["tags"] = self._parse_tags()
 | 
			
		||||
        match["cover"] = self._parse_cover(generic_cover=generic_cover)
 | 
			
		||||
        match["description"] = self._parse_description()
 | 
			
		||||
        match["languages"] = self._parse_languages()
 | 
			
		||||
        match["publisher"] = self._parse_publisher()
 | 
			
		||||
        match["publishedDate"] = self._parse_from_summary(
 | 
			
		||||
            attribute_name="datePublished"
 | 
			
		||||
        )
 | 
			
		||||
        match["rating"] = self._parse_rating()
 | 
			
		||||
        match["description"] = self._parse_description()
 | 
			
		||||
        match["cover"] = self._parse_cover(generic_cover=generic_cover)
 | 
			
		||||
        match["series"], match["series_index"] = self._parse_series()
 | 
			
		||||
        match["tags"] = self._parse_tags()
 | 
			
		||||
 | 
			
		||||
        match["source"] = {
 | 
			
		||||
            "id": self.metadata.__id__,
 | 
			
		||||
            "description": self.metadata.__name__,
 | 
			
		||||
            "link": LubimyCzytac.BASE_URL,
 | 
			
		||||
        }
 | 
			
		||||
        match["languages"] = self._parse_languages()
 | 
			
		||||
        match["identifiers"] = {
 | 
			
		||||
            "isbn": self._parse_isbn(),
 | 
			
		||||
            "lubimyczytac": match["id"],
 | 
			
		||||
 
 | 
			
		||||
@@ -30,7 +30,7 @@ from sqlalchemy.exc import InvalidRequestError, OperationalError
 | 
			
		||||
from sqlalchemy.orm.attributes import flag_modified
 | 
			
		||||
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
from . import constants, logger, ub
 | 
			
		||||
from . import constants, get_locale, logger, ub
 | 
			
		||||
 | 
			
		||||
meta = Blueprint("metadata", __name__)
 | 
			
		||||
 | 
			
		||||
@@ -113,11 +113,12 @@ def metadata_search():
 | 
			
		||||
    query = request.form.to_dict().get("query")
 | 
			
		||||
    data = list()
 | 
			
		||||
    active = current_user.view_settings.get("metadata", {})
 | 
			
		||||
    locale = get_locale()
 | 
			
		||||
    if query:
 | 
			
		||||
        static_cover = url_for("static", filename="generic_cover.jpg")
 | 
			
		||||
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
 | 
			
		||||
            meta = {
 | 
			
		||||
                executor.submit(c.search, query, static_cover): c
 | 
			
		||||
                executor.submit(c.search, query, static_cover, locale): c
 | 
			
		||||
                for c in cl
 | 
			
		||||
                if active.get(c.__id__, True)
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
@@ -20,6 +20,30 @@ import re
 | 
			
		||||
from typing import Dict, Generator, List, Optional, TypedDict, Union
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MetaSourceInfo(TypedDict):
 | 
			
		||||
    id: str
 | 
			
		||||
    description: str
 | 
			
		||||
    link: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MetaRecord(TypedDict):
 | 
			
		||||
    id: Union[str, int]
 | 
			
		||||
    title: str
 | 
			
		||||
    authors: List[str]
 | 
			
		||||
    url: str
 | 
			
		||||
    cover: str
 | 
			
		||||
    series: Optional[str]
 | 
			
		||||
    series_index: Optional[Union[int, float]]
 | 
			
		||||
    tags: Optional[List[str]]
 | 
			
		||||
    publisher: Optional[str]
 | 
			
		||||
    publishedDate: Optional[str]
 | 
			
		||||
    rating: Optional[int]
 | 
			
		||||
    description: Optional[str]
 | 
			
		||||
    source: MetaSourceInfo
 | 
			
		||||
    languages: Optional[List[str]]
 | 
			
		||||
    identifiers: Dict[str, Union[str, int]]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Metadata:
 | 
			
		||||
    __name__ = "Generic"
 | 
			
		||||
    __id__ = "generic"
 | 
			
		||||
@@ -31,7 +55,9 @@ class Metadata:
 | 
			
		||||
        self.active = state
 | 
			
		||||
 | 
			
		||||
    @abc.abstractmethod
 | 
			
		||||
    def search(self, query: str, generic_cover: str = ""):
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
@@ -73,27 +99,3 @@ class Metadata:
 | 
			
		||||
                not strip_joiners or token.lower() not in ("a", "and", "the", "&")
 | 
			
		||||
            ):
 | 
			
		||||
                yield token
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MetaSourceInfo(TypedDict):
 | 
			
		||||
    id: str
 | 
			
		||||
    description: str
 | 
			
		||||
    link: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MetaRecord(TypedDict):
 | 
			
		||||
    id: Union[str, int]
 | 
			
		||||
    title: str
 | 
			
		||||
    authors: List[str]
 | 
			
		||||
    url: str
 | 
			
		||||
    cover: str
 | 
			
		||||
    series: Optional[str]
 | 
			
		||||
    series_index: Optional[Union[int, float]]
 | 
			
		||||
    tags: Optional[List[str]]
 | 
			
		||||
    publisher: Optional[str]
 | 
			
		||||
    publishedDate: Optional[str]
 | 
			
		||||
    rating: Optional[int]
 | 
			
		||||
    description: Optional[str]
 | 
			
		||||
    source: MetaSourceInfo
 | 
			
		||||
    languages: Optional[List[str]]
 | 
			
		||||
    identifiers: Dict[str, Union[str, int]]
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user