1
0
mirror of https://github.com/janeczku/calibre-web synced 2024-11-27 03:57:24 +00:00

Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop

# Conflicts:
#	optional-requirements.txt
This commit is contained in:
Ozzie Isaacs 2022-01-27 18:37:02 +01:00
commit 4f3c396450
11 changed files with 1163 additions and 1173 deletions

View File

@ -17,49 +17,68 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ComicVine api document: https://comicvine.gamespot.com/api/documentation
from typing import Dict, List, Optional
from urllib.parse import quote
import requests
from cps.services.Metadata import Metadata
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class ComicVine(Metadata):
__name__ = "ComicVine"
__id__ = "comicvine"
DESCRIPTION = "ComicVine Books"
META_URL = "https://comicvine.gamespot.com/"
API_KEY = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
BASE_URL = (
f"https://comicvine.gamespot.com/api/search?api_key={API_KEY}"
f"&resources=issue&query="
)
QUERY_PARAMS = "&sort=name:desc&format=json"
HEADERS = {"User-Agent": "Not Evil Browser"}
def search(self, query, generic_cover=""):
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
apikey = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
if self.active:
headers = {
'User-Agent': 'Not Evil Browser'
}
result = requests.get("https://comicvine.gamespot.com/api/search?api_key="
+ apikey + "&resources=issue&query=" + query + "&sort=name:desc&format=json", headers=headers)
for r in result.json().get('results'):
seriesTitle = r['volume'].get('name', "")
if r.get('store_date'):
dateFomers = r.get('store_date')
else:
dateFomers = r.get('date_added')
v = dict()
v['id'] = r['id']
v['title'] = seriesTitle + " #" + r.get('issue_number', "0") + " - " + ( r.get('name', "") or "")
v['authors'] = r.get('authors', [])
v['description'] = r.get('description', "")
v['publisher'] = ""
v['publishedDate'] = dateFomers
v['tags'] = ["Comics", seriesTitle]
v['rating'] = 0
v['series'] = seriesTitle
v['cover'] = r['image'].get('original_url')
v['source'] = {
"id": self.__id__,
"description": "ComicVine Books",
"link": "https://comicvine.gamespot.com/"
}
v['url'] = r.get('site_detail_url', "")
val.append(v)
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "%20".join(tokens)
result = requests.get(
f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
headers=ComicVine.HEADERS,
)
for result in result.json()["results"]:
match = self._parse_search_result(
result=result, generic_cover=generic_cover, locale=locale
)
val.append(match)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
series = result["volume"].get("name", "")
series_index = result.get("issue_number", 0)
issue_name = result.get("name", "")
match = MetaRecord(
id=result["id"],
title=f"{series}#{series_index} - {issue_name}",
authors=result.get("authors", []),
url=result.get("site_detail_url", ""),
source=MetaSourceInfo(
id=self.__id__,
description=ComicVine.DESCRIPTION,
link=ComicVine.META_URL,
),
series=series,
)
match.cover = result["image"].get("original_url", generic_cover)
match.description = result.get("description", "")
match.publishedDate = result.get("store_date", result.get("date_added"))
match.series_index = series_index
match.tags = ["Comics", series]
match.identifiers = {"comicvine": match.id}
return match

View File

@ -17,39 +17,93 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Google Books api document: https://developers.google.com/books/docs/v1/using
from typing import Dict, List, Optional
from urllib.parse import quote
import requests
from cps.services.Metadata import Metadata
from cps.isoLanguages import get_lang3, get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class Google(Metadata):
__name__ = "Google"
__id__ = "google"
DESCRIPTION = "Google Books"
META_URL = "https://books.google.com/"
BOOK_URL = "https://books.google.com/books?id="
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
ISBN_TYPE = "ISBN_13"
def search(self, query, generic_cover=""):
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
val = list()
result = requests.get("https://www.googleapis.com/books/v1/volumes?q="+query.replace(" ","+"))
for r in result.json().get('items'):
v = dict()
v['id'] = r['id']
v['title'] = r['volumeInfo'].get('title',"")
v['authors'] = r['volumeInfo'].get('authors', [])
v['description'] = r['volumeInfo'].get('description', "")
v['publisher'] = r['volumeInfo'].get('publisher', "")
v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
v['tags'] = r['volumeInfo'].get('categories', [])
v['rating'] = r['volumeInfo'].get('averageRating', 0)
if r['volumeInfo'].get('imageLinks'):
v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
else:
v['cover'] = "/../../../static/generic_cover.jpg"
v['source'] = {
"id": self.__id__,
"description": "Google Books",
"link": "https://books.google.com/"}
v['url'] = "https://books.google.com/books?id=" + r['id']
val.append(v)
return val
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "+".join(tokens)
results = requests.get(Google.SEARCH_URL + query)
for result in results.json()["items"]:
val.append(
self._parse_search_result(
result=result, generic_cover=generic_cover, locale=locale
)
)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = MetaRecord(
id=result["id"],
title=result["volumeInfo"]["title"],
authors=result["volumeInfo"].get("authors", []),
url=Google.BOOK_URL + result["id"],
source=MetaSourceInfo(
id=self.__id__,
description=Google.DESCRIPTION,
link=Google.META_URL,
),
)
match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
match.description = result["volumeInfo"].get("description", "")
match.languages = self._parse_languages(result=result, locale=locale)
match.publisher = result["volumeInfo"].get("publisher", "")
match.publishedDate = result["volumeInfo"].get("publishedDate", "")
match.rating = result["volumeInfo"].get("averageRating", 0)
match.series, match.series_index = "", 1
match.tags = result["volumeInfo"].get("categories", [])
match.identifiers = {"google": match.id}
match = self._parse_isbn(result=result, match=match)
return match
@staticmethod
def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
for identifier in identifiers:
if identifier.get("type") == Google.ISBN_TYPE:
match.identifiers["isbn"] = identifier.get("identifier")
break
return match
@staticmethod
def _parse_cover(result: Dict, generic_cover: str) -> str:
if result["volumeInfo"].get("imageLinks"):
cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
return cover_url.replace("http://", "https://")
return generic_cover
@staticmethod
def _parse_languages(result: Dict, locale: str) -> List[str]:
language_iso2 = result["volumeInfo"].get("language", "")
languages = (
[get_language_name(locale, get_lang3(language_iso2))]
if language_iso2
else []
)
return languages

View File

@ -0,0 +1,337 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2021 OzzieIsaacs
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import datetime
import json
import re
from multiprocessing.pool import ThreadPool
from typing import List, Optional, Tuple, Union
from urllib.parse import quote
import requests
from dateutil import parser
from html2text import HTML2Text
from lxml.html import HtmlElement, fromstring, tostring
from markdown2 import Markdown
from cps.isoLanguages import get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
SYMBOLS_TO_TRANSLATE = (
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
"oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
)
SYMBOL_TRANSLATION_MAP = dict(
[(ord(a), ord(b)) for (a, b) in zip(*SYMBOLS_TO_TRANSLATE)]
)
def get_int_or_float(value: str) -> Union[int, float]:
number_as_float = float(value)
number_as_int = int(number_as_float)
return number_as_int if number_as_float == number_as_int else number_as_float
def strip_accents(s: Optional[str]) -> Optional[str]:
return s.translate(SYMBOL_TRANSLATION_MAP) if s is not None else s
def sanitize_comments_html(html: str) -> str:
text = html2text(html)
md = Markdown()
html = md.convert(text)
return html
def html2text(html: str) -> str:
# replace <u> tags with <span> as <u> becomes emphasis in html2text
if isinstance(html, bytes):
html = html.decode("utf-8")
html = re.sub(
r"<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>",
r"<\g<solidus>span\g<rest>>",
html,
)
h2t = HTML2Text()
h2t.body_width = 0
h2t.single_line_break = True
h2t.emphasis_mark = "*"
return h2t.handle(html)
class LubimyCzytac(Metadata):
__name__ = "LubimyCzytac.pl"
__id__ = "lubimyczytac"
BASE_URL = "https://lubimyczytac.pl"
BOOK_SEARCH_RESULT_XPATH = (
"*//div[@class='listSearch']//div[@class='authorAllBooks__single']"
)
SINGLE_BOOK_RESULT_XPATH = ".//div[contains(@class,'authorAllBooks__singleText')]"
TITLE_PATH = "/div/a[contains(@class,'authorAllBooks__singleTextTitle')]"
TITLE_TEXT_PATH = f"{TITLE_PATH}//text()"
URL_PATH = f"{TITLE_PATH}/@href"
AUTHORS_PATH = "/div/a[contains(@href,'autor')]//text()"
SIBLINGS = "/following-sibling::dd"
CONTAINER = "//section[@class='container book']"
PUBLISHER = f"{CONTAINER}//dt[contains(text(),'Wydawnictwo:')]{SIBLINGS}/a/text()"
LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()"
DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']"
SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()"
DETAILS = "//div[@id='book-details']"
PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania"
FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()"
FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()"
TAGS = "//nav[@aria-label='breadcrumb']//a[contains(@href,'/ksiazki/k/')]/text()"
RATING = "//meta[@property='books:rating:value']/@content"
COVER = "//meta[@property='og:image']/@content"
ISBN = "//meta[@property='books:isbn']/@content"
META_TITLE = "//meta[@property='og:description']/@content"
SUMMARY = "//script[@type='application/ld+json']//text()"
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
if self.active:
result = requests.get(self._prepare_query(title=query))
root = fromstring(result.text)
lc_parser = LubimyCzytacParser(root=root, metadata=self)
matches = lc_parser.parse_search_results()
if matches:
with ThreadPool(processes=10) as pool:
final_matches = pool.starmap(
lc_parser.parse_single_book,
[(match, generic_cover, locale) for match in matches],
)
return final_matches
return matches
def _prepare_query(self, title: str) -> str:
query = ""
characters_to_remove = "\?()\/"
pattern = "[" + characters_to_remove + "]"
title = re.sub(pattern, "", title)
title = title.replace("_", " ")
if '"' in title or ",," in title:
title = title.split('"')[0].split(",,")[0]
if "/" in title:
title_tokens = [
token for token in title.lower().split(" ") if len(token) > 1
]
else:
title_tokens = list(self.get_title_tokens(title, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = query + "%20".join(tokens)
if not query:
return ""
return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
class LubimyCzytacParser:
PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>"
PUBLISH_DATE_PL_TEMPLATE = (
"<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>"
)
def __init__(self, root: HtmlElement, metadata: Metadata) -> None:
self.root = root
self.metadata = metadata
def parse_search_results(self) -> List[MetaRecord]:
matches = []
results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH)
for result in results:
title = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
f"{LubimyCzytac.TITLE_TEXT_PATH}",
)
book_url = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
f"{LubimyCzytac.URL_PATH}",
)
authors = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
f"{LubimyCzytac.AUTHORS_PATH}",
take_first=False,
)
if not all([title, book_url, authors]):
continue
matches.append(
MetaRecord(
id=book_url.replace(f"/ksiazka/", "").split("/")[0],
title=title,
authors=[strip_accents(author) for author in authors],
url=LubimyCzytac.BASE_URL + book_url,
source=MetaSourceInfo(
id=self.metadata.__id__,
description=self.metadata.__name__,
link=LubimyCzytac.BASE_URL,
),
)
)
return matches
def parse_single_book(
self, match: MetaRecord, generic_cover: str, locale: str
) -> MetaRecord:
response = requests.get(match.url)
self.root = fromstring(response.text)
match.cover = self._parse_cover(generic_cover=generic_cover)
match.description = self._parse_description()
match.languages = self._parse_languages(locale=locale)
match.publisher = self._parse_publisher()
match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
match.rating = self._parse_rating()
match.series, match.series_index = self._parse_series()
match.tags = self._parse_tags()
match.identifiers = {
"isbn": self._parse_isbn(),
"lubimyczytac": match.id,
}
return match
def _parse_xpath_node(
self,
xpath: str,
root: HtmlElement = None,
take_first: bool = True,
strip_element: bool = True,
) -> Optional[Union[str, List[str]]]:
root = root if root is not None else self.root
node = root.xpath(xpath)
if not node:
return None
return (
(node[0].strip() if strip_element else node[0])
if take_first
else [x.strip() for x in node]
)
def _parse_cover(self, generic_cover) -> Optional[str]:
return (
self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True)
or generic_cover
)
def _parse_publisher(self) -> Optional[str]:
return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
def _parse_languages(self, locale: str) -> List[str]:
languages = list()
lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
if lang:
if "polski" in lang:
languages.append("pol")
if "angielski" in lang:
languages.append("eng")
return [get_language_name(locale, language) for language in languages]
def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
series_index = 0
series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True)
if series:
if "tom " in series:
series_name, series_info = series.split(" (tom ", 1)
series_info = series_info.replace(" ", "").replace(")", "")
# Check if book is not a bundle, i.e. chapter 1-3
if "-" in series_info:
series_info = series_info.split("-", 1)[0]
if series_info.replace(".", "").isdigit() is True:
series_index = get_int_or_float(series_info)
return series_name, series_index
return None, None
def _parse_tags(self) -> List[str]:
tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False)
return [
strip_accents(w.replace(", itd.", " itd."))
for w in tags
if isinstance(w, str)
]
def _parse_from_summary(self, attribute_name: str) -> Optional[str]:
value = None
summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY)
if summary_text:
data = json.loads(summary_text)
value = data.get(attribute_name)
return value.strip() if value is not None else value
def _parse_rating(self) -> Optional[str]:
rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING)
return round(float(rating.replace(",", ".")) / 2) if rating else rating
def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]:
options = {
"first_publish": LubimyCzytac.FIRST_PUBLISH_DATE,
"first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL,
}
date = self._parse_xpath_node(xpath=options.get(xpath))
return parser.parse(date) if date else None
def _parse_isbn(self) -> Optional[str]:
return self._parse_xpath_node(xpath=LubimyCzytac.ISBN)
def _parse_description(self) -> str:
description = ""
description_node = self._parse_xpath_node(
xpath=LubimyCzytac.DESCRIPTION, strip_element=False
)
if description_node is not None:
for source in self.root.xpath('//p[@class="source"]'):
source.getparent().remove(source)
description = tostring(description_node, method="html")
description = sanitize_comments_html(description)
else:
description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE)
if description_node is not None:
description = description_node
description = sanitize_comments_html(description)
description = self._add_extra_info_to_description(description=description)
return description
def _add_extra_info_to_description(self, description: str) -> str:
pages = self._parse_from_summary(attribute_name="numberOfPages")
if pages:
description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages)
first_publish_date = self._parse_date()
if first_publish_date:
description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format(
first_publish_date.strftime("%d.%m.%Y")
)
first_publish_date_pl = self._parse_date(xpath="first_publish_pl")
if first_publish_date_pl:
description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format(
first_publish_date_pl.strftime("%d.%m.%Y")
)
return description

View File

@ -15,46 +15,52 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import itertools
from typing import Dict, List, Optional
from urllib.parse import quote
from scholarly import scholarly
from cps.services.Metadata import Metadata
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class scholar(Metadata):
__name__ = "Google Scholar"
__id__ = "googlescholar"
META_URL = "https://scholar.google.com/"
def search(self, query, generic_cover=""):
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
i = 0
for publication in scholar_gen:
v = dict()
v['id'] = publication['url_scholarbib'].split(':')[1]
v['title'] = publication['bib'].get('title')
v['authors'] = publication['bib'].get('author', [])
v['description'] = publication['bib'].get('abstract', "")
v['publisher'] = publication['bib'].get('venue', "")
if publication['bib'].get('pub_year'):
v['publishedDate'] = publication['bib'].get('pub_year')+"-01-01"
else:
v['publishedDate'] = ""
v['tags'] = []
v['rating'] = 0
v['series'] = ""
v['cover'] = ""
v['url'] = publication.get('pub_url') or publication.get('eprint_url') or "",
v['source'] = {
"id": self.__id__,
"description": "Google Scholar",
"link": "https://scholar.google.com/"
}
val.append(v)
i += 1
if (i >= 10):
break
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = " ".join(tokens)
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
for result in scholar_gen:
match = self._parse_search_result(
result=result, generic_cover=generic_cover, locale=locale
)
val.append(match)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = MetaRecord(
id=result.get("pub_url", result.get("eprint_url", "")),
title=result["bib"].get("title"),
authors=result["bib"].get("author", []),
url=result.get("pub_url", result.get("eprint_url", "")),
source=MetaSourceInfo(
id=self.__id__, description=self.__name__, link=scholar.META_URL
),
)
match.cover = result.get("image", {}).get("original_url", generic_cover)
match.description = result["bib"].get("abstract", "")
match.publisher = result["bib"].get("venue", "")
match.publishedDate = result["bib"].get("pub_year") + "-01-01"
match.identifiers = {"scholar": match.id}
return match

View File

@ -432,17 +432,9 @@ def feed_languagesindex():
if current_user.filter_language() == u"all":
languages = calibre_db.speaking_language()
else:
#try:
# cur_l = LC.parse(current_user.filter_language())
#except UnknownLocaleError:
# cur_l = None
languages = calibre_db.session.query(db.Languages).filter(
db.Languages.lang_code == current_user.filter_language()).all()
languages[0].name = isoLanguages.get_language_name(get_locale(), languages[0].lang_code)
#if cur_l:
# languages[0].name = cur_l.get_language_name(get_locale())
#else:
# languages[0].name = _(isoLanguages.get(part3=languages[0].lang_code).name)
pagination = Pagination((int(off) / (int(config.config_books_per_page)) + 1), config.config_books_per_page,
len(languages))
return render_xml_template('feed.xml', listelements=languages, folder='opds.feed_languages', pagination=pagination)
@ -530,7 +522,8 @@ def feed_search(term):
entries, __, ___ = calibre_db.get_search_results(term, config_read_column=config.config_read_column)
entries_count = len(entries) if len(entries) > 0 else 1
pagination = Pagination(1, entries_count, entries_count)
return render_xml_template('feed.xml', searchterm=term, entries=entries, pagination=pagination)
items = [entry[0] for entry in entries]
return render_xml_template('feed.xml', searchterm=term, entries=items, pagination=pagination)
else:
return render_xml_template('feed.xml', searchterm="")

View File

@ -16,25 +16,27 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import json
import importlib
import sys
import inspect
import datetime
import concurrent.futures
import importlib
import inspect
import json
import os
import sys
# from time import time
from dataclasses import asdict
from flask import Blueprint, request, Response, url_for
from flask import Blueprint, Response, request, url_for
from flask_login import current_user
from flask_login import login_required
from sqlalchemy.exc import InvalidRequestError, OperationalError
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.exc import OperationalError, InvalidRequestError
from . import constants, logger, ub
from cps.services.Metadata import Metadata
from . import constants, get_locale, logger, ub
# current_milli_time = lambda: int(round(time() * 1000))
meta = Blueprint('metadata', __name__)
meta = Blueprint("metadata", __name__)
log = logger.create()
@ -42,7 +44,7 @@ new_list = list()
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
for f in modules:
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
a = os.path.basename(f)[:-3]
try:
importlib.import_module("cps.metadata_provider." + a)
@ -51,34 +53,46 @@ for f in modules:
log.error("Import error for metadata source: {}".format(a))
pass
def list_classes(provider_list):
classes = list()
for element in provider_list:
for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
for name, obj in inspect.getmembers(
sys.modules["cps.metadata_provider." + element]
):
if (
inspect.isclass(obj)
and name != "Metadata"
and issubclass(obj, Metadata)
):
classes.append(obj())
return classes
cl = list_classes(new_list)
@meta.route("/metadata/provider")
@login_required
def metadata_provider():
active = current_user.view_settings.get('metadata', {})
active = current_user.view_settings.get("metadata", {})
provider = list()
for c in cl:
ac = active.get(c.__id__, True)
provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
return Response(json.dumps(provider), mimetype='application/json')
provider.append(
{"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
)
return Response(json.dumps(provider), mimetype="application/json")
@meta.route("/metadata/provider", methods=['POST'])
@meta.route("/metadata/provider/<prov_name>", methods=['POST'])
@meta.route("/metadata/provider", methods=["POST"])
@meta.route("/metadata/provider/<prov_name>", methods=["POST"])
@login_required
def metadata_change_active_provider(prov_name):
new_state = request.get_json()
active = current_user.view_settings.get('metadata', {})
active[new_state['id']] = new_state['value']
current_user.view_settings['metadata'] = active
active = current_user.view_settings.get("metadata", {})
active[new_state["id"]] = new_state["value"]
current_user.view_settings["metadata"] = active
try:
try:
flag_modified(current_user, "view_settings")
@ -89,29 +103,33 @@ def metadata_change_active_provider(prov_name):
log.error("Invalid request received: {}".format(request))
return "Invalid request", 400
if "initial" in new_state and prov_name:
for c in cl:
if c.__id__ == prov_name:
data = c.search(new_state.get('query', ""))
break
return Response(json.dumps(data), mimetype='application/json')
data = []
provider = next((c for c in cl if c.__id__ == prov_name), None)
if provider is not None:
data = provider.search(new_state.get("query", ""))
return Response(
json.dumps([asdict(x) for x in data]), mimetype="application/json"
)
return ""
@meta.route("/metadata/search", methods=['POST'])
@meta.route("/metadata/search", methods=["POST"])
@login_required
def metadata_search():
query = request.form.to_dict().get('query')
query = request.form.to_dict().get("query")
data = list()
active = current_user.view_settings.get('metadata', {})
active = current_user.view_settings.get("metadata", {})
locale = get_locale()
if query:
generic_cover = ""
static_cover = url_for("static", filename="generic_cover.jpg")
# start = current_milli_time()
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
meta = {executor.submit(c.search, query, generic_cover): c for c in cl if active.get(c.__id__, True)}
meta = {
executor.submit(c.search, query, static_cover, locale): c
for c in cl
if active.get(c.__id__, True)
}
for future in concurrent.futures.as_completed(meta):
data.extend(future.result())
return Response(json.dumps(data), mimetype='application/json')
data.extend([asdict(x) for x in future.result()])
# log.info({'Time elapsed {}'.format(current_milli_time()-start)})
return Response(json.dumps(data), mimetype="application/json")

View File

@ -15,13 +15,93 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import abc
import dataclasses
import os
import re
from typing import Dict, Generator, List, Optional, Union
from cps import constants
class Metadata():
@dataclasses.dataclass
class MetaSourceInfo:
id: str
description: str
link: str
@dataclasses.dataclass
class MetaRecord:
id: Union[str, int]
title: str
authors: List[str]
url: str
source: MetaSourceInfo
cover: str = os.path.join(constants.STATIC_DIR, 'generic_cover.jpg')
description: Optional[str] = ""
series: Optional[str] = None
series_index: Optional[Union[int, float]] = 0
identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
publisher: Optional[str] = None
publishedDate: Optional[str] = None
rating: Optional[int] = 0
languages: Optional[List[str]] = dataclasses.field(default_factory=list)
tags: Optional[List[str]] = dataclasses.field(default_factory=list)
class Metadata:
__name__ = "Generic"
__id__ = "generic"
def __init__(self):
self.active = True
def set_status(self, state):
self.active = state
@abc.abstractmethod
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
pass
@staticmethod
def get_title_tokens(
title: str, strip_joiners: bool = True
) -> Generator[str, None, None]:
"""
Taken from calibre source code
"""
title_patterns = [
(re.compile(pat, re.IGNORECASE), repl)
for pat, repl in [
# Remove things like: (2010) (Omnibus) etc.
(
r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
r"audiobook|audio\scd|paperback|turtleback|"
r"mass\s*market|edition|ed\.)[\])}]",
"",
),
# Remove any strings that contain the substring edition inside
# parentheses
(r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
# Remove commas used a separators in numbers
(r"(\d+),(\d+)", r"\1\2"),
# Remove hyphens only if they have whitespace before them
(r"(\s-)", " "),
# Replace other special chars with a space
(r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
]
]
for pat, repl in title_patterns:
title = pat.sub(repl, title)
tokens = title.split()
for token in tokens:
token = token.strip().strip('"').strip("'")
if token and (
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
):
yield token

View File

@ -26,19 +26,26 @@ $(function () {
)
};
function getUniqueValues(attribute_name, book){
var presentArray = $.map($("#"+attribute_name).val().split(","), $.trim);
if ( presentArray.length === 1 && presentArray[0] === "") {
presentArray = [];
}
$.each(book[attribute_name], function(i, el) {
if ($.inArray(el, presentArray) === -1) presentArray.push(el);
});
return presentArray
}
function populateForm (book) {
tinymce.get("description").setContent(book.description);
var uniqueTags = $.map($("#tags").val().split(","), $.trim);
if ( uniqueTags.length == 1 && uniqueTags[0] == "") {
uniqueTags = [];
}
$.each(book.tags, function(i, el) {
if ($.inArray(el, uniqueTags) === -1) uniqueTags.push(el);
});
var uniqueTags = getUniqueValues('tags', book)
var uniqueLanguages = getUniqueValues('languages', book)
var ampSeparatedAuthors = (book.authors || []).join(" & ");
$("#bookAuthor").val(ampSeparatedAuthors);
$("#book_title").val(book.title);
$("#tags").val(uniqueTags.join(", "));
$("#languages").val(uniqueLanguages.join(", "));
$("#rating").data("rating").setValue(Math.round(book.rating));
if(book.cover && $("#cover_url").length){
$(".cover img").attr("src", book.cover);
@ -48,7 +55,32 @@ $(function () {
$("#publisher").val(book.publisher);
if (typeof book.series !== "undefined") {
$("#series").val(book.series);
$("#series_index").val(book.series_index);
}
if (typeof book.identifiers !== "undefined") {
populateIdentifiers(book.identifiers)
}
}
function populateIdentifiers(identifiers){
for (const property in identifiers) {
console.log(`${property}: ${identifiers[property]}`);
if ($('input[name="identifier-type-'+property+'"]').length) {
$('input[name="identifier-val-'+property+'"]').val(identifiers[property])
}
else {
addIdentifier(property, identifiers[property])
}
}
}
function addIdentifier(name, value){
var line = '<tr>';
line += '<td><input type="text" class="form-control" name="identifier-type-'+ name +'" required="required" placeholder="' + _("Identifier Type") +'" value="'+ name +'"></td>';
line += '<td><input type="text" class="form-control" name="identifier-val-'+ name +'" required="required" placeholder="' + _("Identifier Value") +'" value="'+ value +'"></td>';
line += '<td><a class="btn btn-default" onclick="removeIdentifierLine(this)">'+_("Remove")+'</a></td>';
line += '</tr>';
$("#identifier-table").append(line);
}
function doSearch (keyword) {

View File

@ -40,35 +40,35 @@
{% if entries and entries[0] %}
{% for entry in entries %}
<entry>
<title>{{entry[0].title}}</title>
<id>urn:uuid:{{entry[0].uuid}}</id>
<updated>{{entry[0].atom_timestamp}}</updated>
{% if entry[0].authors.__len__() > 0 %}
<title>{{entry.title}}</title>
<id>urn:uuid:{{entry.uuid}}</id>
<updated>{{entry.atom_timestamp}}</updated>
{% if entry.authors.__len__() > 0 %}
<author>
<name>{{entry[0].authors[0].name}}</name>
<name>{{entry.authors[0].name}}</name>
</author>
{% endif %}
{% if entry[0].publishers.__len__() > 0 %}
{% if entry.publishers.__len__() > 0 %}
<publisher>
<name>{{entry[0].publishers[0].name}}</name>
<name>{{entry.publishers[0].name}}</name>
</publisher>
{% endif %}
{% for lang in entry[0].languages %}
{% for lang in entry.languages %}
<dcterms:language>{{lang.lang_code}}</dcterms:language>
{% endfor %}
{% for tag in entry[0].tags %}
{% for tag in entry.tags %}
<category scheme="http://www.bisg.org/standards/bisac_subject/index.html"
term="{{tag.name}}"
label="{{tag.name}}"/>
{% endfor %}
{% if entry[0].comments[0] %}<summary>{{entry[0].comments[0].text|striptags}}</summary>{% endif %}
{% if entry[0].has_cover %}
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image"/>
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image/thumbnail"/>
{% if entry.comments[0] %}<summary>{{entry.comments[0].text|striptags}}</summary>{% endif %}
{% if entry.has_cover %}
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image"/>
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image/thumbnail"/>
{% endif %}
{% for format in entry[0].data %}
<link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry[0].id, book_format=format.format|lower)}}"
length="{{format.uncompressed_size}}" mtime="{{entry[0].atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
{% for format in entry.data %}
<link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry.id, book_format=format.format|lower)}}"
length="{{format.uncompressed_size}}" mtime="{{entry.atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
{% endfor %}
</entry>
{% endfor %}

View File

@ -31,6 +31,9 @@ SQLAlchemy-Utils>=0.33.5,<0.39.0
# metadata extraction
rarfile>=2.7
scholarly>=1.2.0,<1.6
markdown2==2.4.2
html2text==2020.1.16
python-dateutil==2.8.2
# Comics
natsort>=2.2.0,<8.1.0

File diff suppressed because it is too large Load Diff