diff --git a/cps/metadata_provider/comicvine.py b/cps/metadata_provider/comicvine.py
index 195e68f8..56618d4b 100644
--- a/cps/metadata_provider/comicvine.py
+++ b/cps/metadata_provider/comicvine.py
@@ -17,49 +17,68 @@
# along with this program. If not, see .
# ComicVine api document: https://comicvine.gamespot.com/api/documentation
+from typing import Dict, List, Optional
+from urllib.parse import quote
import requests
-from cps.services.Metadata import Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class ComicVine(Metadata):
__name__ = "ComicVine"
__id__ = "comicvine"
+ DESCRIPTION = "ComicVine Books"
+ META_URL = "https://comicvine.gamespot.com/"
+ API_KEY = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
+ BASE_URL = (
+ f"https://comicvine.gamespot.com/api/search?api_key={API_KEY}"
+ f"&resources=issue&query="
+ )
+ QUERY_PARAMS = "&sort=name:desc&format=json"
+ HEADERS = {"User-Agent": "Not Evil Browser"}
- def search(self, query, generic_cover=""):
+ def search(
+ self, query: str, generic_cover: str = "", locale: str = "en"
+ ) -> Optional[List[MetaRecord]]:
val = list()
- apikey = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
if self.active:
- headers = {
- 'User-Agent': 'Not Evil Browser'
- }
-
- result = requests.get("https://comicvine.gamespot.com/api/search?api_key="
- + apikey + "&resources=issue&query=" + query + "&sort=name:desc&format=json", headers=headers)
- for r in result.json()['results']:
- seriesTitle = r['volume'].get('name', "")
- if r.get('store_date'):
- dateFomers = r.get('store_date')
- else:
- dateFomers = r.get('date_added')
- v = dict()
- v['id'] = r['id']
- v['title'] = seriesTitle + " #" + r.get('issue_number', "0") + " - " + ( r.get('name', "") or "")
- v['authors'] = r.get('authors', [])
- v['description'] = r.get('description', "")
- v['publisher'] = ""
- v['publishedDate'] = dateFomers
- v['tags'] = ["Comics", seriesTitle]
- v['rating'] = 0
- v['series'] = seriesTitle
- v['cover'] = r['image'].get('original_url', generic_cover)
- v['source'] = {
- "id": self.__id__,
- "description": "ComicVine Books",
- "link": "https://comicvine.gamespot.com/"
- }
- v['url'] = r.get('site_detail_url', "")
- val.append(v)
+ title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
+ if title_tokens:
+ tokens = [quote(t.encode("utf-8")) for t in title_tokens]
+ query = "%20".join(tokens)
+ result = requests.get(
+ f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
+ headers=ComicVine.HEADERS,
+ )
+ for result in result.json()["results"]:
+ match = self._parse_search_result(
+ result=result, generic_cover=generic_cover, locale=locale
+ )
+ val.append(match)
return val
-
+ def _parse_search_result(
+ self, result: Dict, generic_cover: str, locale: str
+ ) -> MetaRecord:
+ series = result["volume"].get("name", "")
+ series_index = result.get("issue_number", 0)
+ issue_name = result.get("name", "")
+ match = MetaRecord(
+ id=result["id"],
+ title=f"{series}#{series_index} - {issue_name}",
+ authors=result.get("authors", []),
+ url=result.get("site_detail_url", ""),
+ source=MetaSourceInfo(
+ id=self.__id__,
+ description=ComicVine.DESCRIPTION,
+ link=ComicVine.META_URL,
+ ),
+ series=series,
+ )
+ match.cover = result["image"].get("original_url", generic_cover)
+ match.description = result.get("description", "")
+ match.publishedDate = result.get("store_date", result.get("date_added"))
+ match.series_index = series_index
+ match.tags = ["Comics", series]
+ match.identifiers = {"comicvine": match.id}
+ return match
diff --git a/cps/metadata_provider/google.py b/cps/metadata_provider/google.py
index 1074fe3d..5ac3e7ee 100644
--- a/cps/metadata_provider/google.py
+++ b/cps/metadata_provider/google.py
@@ -23,7 +23,7 @@ from urllib.parse import quote
import requests
from cps.isoLanguages import get_lang3, get_language_name
-from cps.services.Metadata import MetaRecord, Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class Google(Metadata):
@@ -56,38 +56,37 @@ class Google(Metadata):
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
- match = dict()
- match["id"] = result["id"]
- match["title"] = result["volumeInfo"]["title"]
- match["authors"] = result["volumeInfo"].get("authors", [])
- match["url"] = Google.BOOK_URL + result["id"]
- match["cover"] = self._parse_cover(result=result, generic_cover=generic_cover)
- match["description"] = result["volumeInfo"].get("description", "")
- match["languages"] = self._parse_languages(result=result, locale=locale)
- match["publisher"] = result["volumeInfo"].get("publisher", "")
- match["publishedDate"] = result["volumeInfo"].get("publishedDate", "")
- match["rating"] = result["volumeInfo"].get("averageRating", 0)
- match["series"], match["series_index"] = "", 1
- match["tags"] = result["volumeInfo"].get("categories", [])
+ match = MetaRecord(
+ id=result["id"],
+ title=result["volumeInfo"]["title"],
+ authors=result["volumeInfo"].get("authors", []),
+ url=Google.BOOK_URL + result["id"],
+ source=MetaSourceInfo(
+ id=self.__id__,
+ description=Google.DESCRIPTION,
+ link=Google.META_URL,
+ ),
+ )
- match["source"] = {
- "id": self.__id__,
- "description": Google.DESCRIPTION,
- "link": Google.META_URL,
- }
+ match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
+ match.description = result["volumeInfo"].get("description", "")
+ match.languages = self._parse_languages(result=result, locale=locale)
+ match.publisher = result["volumeInfo"].get("publisher", "")
+ match.publishedDate = result["volumeInfo"].get("publishedDate", "")
+ match.rating = result["volumeInfo"].get("averageRating", 0)
+ match.series, match.series_index = "", 1
+ match.tags = result["volumeInfo"].get("categories", [])
- match["identifiers"] = {
- "google": match.get("id"),
- }
+ match.identifiers = {"google": match.id}
match = self._parse_isbn(result=result, match=match)
return match
@staticmethod
- def _parse_isbn(result: Dict, match: Dict) -> Dict:
+ def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
for identifier in identifiers:
if identifier.get("type") == Google.ISBN_TYPE:
- match["identifiers"]["isbn"] = identifier.get("identifier")
+ match.identifiers["isbn"] = identifier.get("identifier")
break
return match
@@ -100,7 +99,7 @@ class Google(Metadata):
@staticmethod
def _parse_languages(result: Dict, locale: str) -> List[str]:
- language_iso2 = result.get("language", "")
+ language_iso2 = result["volumeInfo"].get("language", "")
languages = (
[get_language_name(locale, get_lang3(language_iso2))]
if language_iso2
diff --git a/cps/metadata_provider/lubimyczytac.py b/cps/metadata_provider/lubimyczytac.py
index fd9ca4a7..4f6aca1e 100644
--- a/cps/metadata_provider/lubimyczytac.py
+++ b/cps/metadata_provider/lubimyczytac.py
@@ -27,7 +27,7 @@ from html2text import HTML2Text
from lxml.html import HtmlElement, fromstring, tostring
from markdown2 import Markdown
-from cps.services.Metadata import MetaRecord, Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
SYMBOLS_TO_TRANSLATE = (
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
@@ -158,61 +158,60 @@ class LubimyCzytacParser:
self.root = root
self.metadata = metadata
- def parse_search_results(self) -> List[Dict]:
+ def parse_search_results(self) -> List[MetaRecord]:
matches = []
results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH)
for result in results:
title = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
- f"{LubimyCzytac.TITLE_TEXT_PATH}",
+ f"{LubimyCzytac.TITLE_TEXT_PATH}",
)
book_url = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
- f"{LubimyCzytac.URL_PATH}",
+ f"{LubimyCzytac.URL_PATH}",
)
authors = self._parse_xpath_node(
root=result,
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
- f"{LubimyCzytac.AUTHORS_PATH}",
+ f"{LubimyCzytac.AUTHORS_PATH}",
take_first=False,
)
if not all([title, book_url, authors]):
continue
matches.append(
- {
- "id": book_url.replace(f"/ksiazka/", "").split("/")[0],
- "title": title,
- "authors": [strip_accents(author) for author in authors],
- "url": LubimyCzytac.BASE_URL + book_url,
- }
+ MetaRecord(
+ id=book_url.replace(f"/ksiazka/", "").split("/")[0],
+ title=title,
+ authors=[strip_accents(author) for author in authors],
+ url=LubimyCzytac.BASE_URL + book_url,
+ source=MetaSourceInfo(
+ id=self.metadata.__id__,
+ description=self.metadata.__name__,
+ link=LubimyCzytac.BASE_URL,
+ )
+ )
)
return matches
- def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord:
- response = requests.get(match.get("url"))
+ def parse_single_book(self, match: MetaRecord, generic_cover: str) -> MetaRecord:
+ response = requests.get(match.url)
self.root = fromstring(response.text)
- match["cover"] = self._parse_cover(generic_cover=generic_cover)
- match["description"] = self._parse_description()
- match["languages"] = self._parse_languages()
- match["publisher"] = self._parse_publisher()
- match["publishedDate"] = self._parse_from_summary(
+ match.cover = self._parse_cover(generic_cover=generic_cover)
+ match.description = self._parse_description()
+ match.languages = self._parse_languages()
+ match.publisher = self._parse_publisher()
+ match.publishedDate = self._parse_from_summary(
attribute_name="datePublished"
)
- match["rating"] = self._parse_rating()
- match["series"], match["series_index"] = self._parse_series()
- match["tags"] = self._parse_tags()
-
- match["source"] = {
- "id": self.metadata.__id__,
- "description": self.metadata.__name__,
- "link": LubimyCzytac.BASE_URL,
- }
- match["identifiers"] = {
+ match.rating = self._parse_rating()
+ match.series, match.series_index = self._parse_series()
+ match.tags = self._parse_tags()
+ match.identifiers = {
"isbn": self._parse_isbn(),
- "lubimyczytac": match["id"],
+ "lubimyczytac": match.id,
}
return match
diff --git a/cps/metadata_provider/scholar.py b/cps/metadata_provider/scholar.py
index 6e13c768..0becaef0 100644
--- a/cps/metadata_provider/scholar.py
+++ b/cps/metadata_provider/scholar.py
@@ -15,47 +15,53 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import itertools
+from typing import Dict, List, Optional
+from urllib.parse import quote
from scholarly import scholarly
-from cps.services.Metadata import Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
class scholar(Metadata):
__name__ = "Google Scholar"
__id__ = "googlescholar"
+ META_URL = "https://scholar.google.com/"
- def search(self, query, generic_cover=""):
+ def search(
+ self, query: str, generic_cover: str = "", locale: str = "en"
+ ) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
- scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
- i = 0
- for publication in scholar_gen:
- v = dict()
- v['id'] = "1234" # publication['bib'].get('title')
- v['title'] = publication['bib'].get('title')
- v['authors'] = publication['bib'].get('author', [])
- v['description'] = publication['bib'].get('abstract', "")
- v['publisher'] = publication['bib'].get('venue', "")
- if publication['bib'].get('pub_year'):
- v['publishedDate'] = publication['bib'].get('pub_year')+"-01-01"
- else:
- v['publishedDate'] = ""
- v['tags'] = ""
- v['ratings'] = 0
- v['series'] = ""
- v['cover'] = generic_cover
- v['url'] = publication.get('pub_url') or publication.get('eprint_url') or "",
- v['source'] = {
- "id": self.__id__,
- "description": "Google Scholar",
- "link": "https://scholar.google.com/"
- }
- val.append(v)
- i += 1
- if (i >= 10):
- break
+ title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
+ if title_tokens:
+ tokens = [quote(t.encode("utf-8")) for t in title_tokens]
+ query = " ".join(tokens)
+ scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
+ for result in scholar_gen:
+ match = self._parse_search_result(
+ result=result, generic_cover=generic_cover, locale=locale
+ )
+ val.append(match)
return val
+ def _parse_search_result(
+ self, result: Dict, generic_cover: str, locale: str
+ ) -> MetaRecord:
+ match = MetaRecord(
+ id=result.get("pub_url", result.get("eprint_url", "")),
+ title=result["bib"].get("title"),
+ authors=result["bib"].get("author", []),
+ url=result.get("pub_url", result.get("eprint_url", "")),
+ source=MetaSourceInfo(
+ id=self.__id__, description=self.__name__, link=scholar.META_URL
+ ),
+ )
-
+ match.cover = result.get("image", {}).get("original_url", generic_cover)
+ match.description = result["bib"].get("abstract", "")
+ match.publisher = result["bib"].get("venue", "")
+ match.publishedDate = result["bib"].get("pub_year") + "-01-01"
+ match.identifiers = {"scholar": match.id}
+ return match
diff --git a/cps/search_metadata.py b/cps/search_metadata.py
index a128f9ac..53cbf553 100644
--- a/cps/search_metadata.py
+++ b/cps/search_metadata.py
@@ -22,6 +22,7 @@ import inspect
import json
import os
import sys
+from dataclasses import asdict
from flask import Blueprint, Response, request, url_for
from flask_login import current_user
@@ -99,11 +100,13 @@ def metadata_change_active_provider(prov_name):
log.error("Invalid request received: {}".format(request))
return "Invalid request", 400
if "initial" in new_state and prov_name:
- for c in cl:
- if c.__id__ == prov_name:
- data = c.search(new_state.get("query", ""))
- break
- return Response(json.dumps(data), mimetype="application/json")
+ data = []
+ provider = next((c for c in cl if c.__id__ == prov_name), None)
+ if provider is not None:
+ data = provider.search(new_state.get("query", ""))
+ return Response(
+ json.dumps([asdict(x) for x in data]), mimetype="application/json"
+ )
return ""
@@ -123,5 +126,5 @@ def metadata_search():
if active.get(c.__id__, True)
}
for future in concurrent.futures.as_completed(meta):
- data.extend(future.result())
+ data.extend([asdict(x) for x in future.result()])
return Response(json.dumps(data), mimetype="application/json")
diff --git a/cps/services/Metadata.py b/cps/services/Metadata.py
index 09fc70ce..f4a5662c 100644
--- a/cps/services/Metadata.py
+++ b/cps/services/Metadata.py
@@ -16,32 +16,38 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import abc
+import dataclasses
+import os
import re
-from typing import Dict, Generator, List, Optional, TypedDict, Union
+from typing import Dict, Generator, List, Optional, Union
+
+from cps import constants
-class MetaSourceInfo(TypedDict):
+@dataclasses.dataclass
+class MetaSourceInfo:
id: str
description: str
link: str
-class MetaRecord(TypedDict):
+@dataclasses.dataclass
+class MetaRecord:
id: Union[str, int]
title: str
authors: List[str]
url: str
- cover: str
- series: Optional[str]
- series_index: Optional[Union[int, float]]
- tags: Optional[List[str]]
- publisher: Optional[str]
- publishedDate: Optional[str]
- rating: Optional[int]
- description: Optional[str]
source: MetaSourceInfo
- languages: Optional[List[str]]
- identifiers: Dict[str, Union[str, int]]
+ cover: str = os.path.join(constants.STATIC_DIR, 'generic_cover.jpg')
+ description: Optional[str] = ""
+ series: Optional[str] = None
+ series_index: Optional[Union[int, float]] = 0
+ identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
+ publisher: Optional[str] = None
+ publishedDate: Optional[str] = None
+ rating: Optional[int] = 0
+ languages: Optional[List[str]] = dataclasses.field(default_factory=list)
+ tags: Optional[List[str]] = dataclasses.field(default_factory=list)
class Metadata:
diff --git a/optional-requirements.txt b/optional-requirements.txt
index 03f58bb5..17c4b878 100644
--- a/optional-requirements.txt
+++ b/optional-requirements.txt
@@ -32,6 +32,9 @@ SQLAlchemy-Utils>=0.33.5,<0.38.0
# extracting metadata
rarfile>=2.7
scholarly>=1.2.0, <1.5
+markdown2==2.4.2
+html2text==2020.1.16
+python-dateutil==2.8.2
# other
natsort>=2.2.0,<8.1.0
diff --git a/requirements.txt b/requirements.txt
index d09c2157..1db961fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,3 @@ Wand>=0.4.4,<0.7.0
unidecode>=0.04.19,<1.3.0
lxml>=3.8.0,<4.7.0
flask-wtf>=0.14.2,<1.1.0
-markdown2==2.4.2
-html2text==2020.1.16
-python-dateutil==2.8.2