mirror of
https://github.com/janeczku/calibre-web
synced 2024-11-14 05:44:53 +00:00
Fix for load metadata from amazon
This commit is contained in:
parent
481237dc77
commit
88cb58c285
@ -38,14 +38,16 @@ class Amazon(Metadata):
|
|||||||
__name__ = "Amazon"
|
__name__ = "Amazon"
|
||||||
__id__ = "amazon"
|
__id__ = "amazon"
|
||||||
headers = {'upgrade-insecure-requests': '1',
|
headers = {'upgrade-insecure-requests': '1',
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
|
||||||
'sec-gpc': '1',
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
'sec-fetch-site': 'none',
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
'sec-fetch-mode': 'navigate',
|
'Sec-Fetch-User': '?1',
|
||||||
'sec-fetch-user': '?1',
|
'Sec-Fetch-Dest': 'document',
|
||||||
'sec-fetch-dest': 'document',
|
'Upgrade-Insecure-Requests': '1',
|
||||||
'accept-encoding': 'gzip, deflate, br',
|
'Alt-Used' : 'www.amazon.com',
|
||||||
|
'Priority' : 'u=0, i',
|
||||||
|
'accept-encoding': 'gzip, deflate, br, zstd',
|
||||||
'accept-language': 'en-US,en;q=0.9'}
|
'accept-language': 'en-US,en;q=0.9'}
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.headers=headers
|
session.headers=headers
|
||||||
@ -62,7 +64,7 @@ class Amazon(Metadata):
|
|||||||
log.warning(ex)
|
log.warning(ex)
|
||||||
return []
|
return []
|
||||||
long_soup = BS(r.text, "lxml") #~4sec :/
|
long_soup = BS(r.text, "lxml") #~4sec :/
|
||||||
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
|
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-ppd_csm_instrumentation_wrapper"})
|
||||||
if soup2 is None:
|
if soup2 is None:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
@ -106,7 +108,7 @@ class Amazon(Metadata):
|
|||||||
except (AttributeError, ValueError):
|
except (AttributeError, ValueError):
|
||||||
match.rating = 0
|
match.rating = 0
|
||||||
try:
|
try:
|
||||||
match.cover = soup2.find("img", attrs={"class": "a-dynamic-image frontImage"})["src"]
|
match.cover = soup2.find("img", attrs={"class": "a-dynamic-image"})["src"]
|
||||||
except (AttributeError, TypeError):
|
except (AttributeError, TypeError):
|
||||||
match.cover = ""
|
match.cover = ""
|
||||||
return match, index
|
return match, index
|
||||||
@ -132,7 +134,7 @@ class Amazon(Metadata):
|
|||||||
links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
|
links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
|
||||||
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:5])}
|
fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:3])}
|
||||||
val = list(map(lambda x : x.result(), concurrent.futures.as_completed(fut)))
|
val = list(map(lambda x : x.result(), concurrent.futures.as_completed(fut)))
|
||||||
result = list(filter(lambda x: x, val))
|
result = list(filter(lambda x: x, val))
|
||||||
return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
|
return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
|
||||||
|
@ -33,7 +33,6 @@ from cps.services.Metadata import Metadata
|
|||||||
from . import constants, logger, ub, web_server
|
from . import constants, logger, ub, web_server
|
||||||
from .usermanagement import user_login_required
|
from .usermanagement import user_login_required
|
||||||
|
|
||||||
# current_milli_time = lambda: int(round(time() * 1000))
|
|
||||||
|
|
||||||
meta = Blueprint("metadata", __name__)
|
meta = Blueprint("metadata", __name__)
|
||||||
|
|
||||||
@ -130,7 +129,7 @@ def metadata_search():
|
|||||||
locale = get_locale()
|
locale = get_locale()
|
||||||
if query:
|
if query:
|
||||||
static_cover = url_for("static", filename="generic_cover.jpg")
|
static_cover = url_for("static", filename="generic_cover.jpg")
|
||||||
# start = current_milli_time()
|
# ret = cl[0].search(query, static_cover, locale)
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
meta = {
|
meta = {
|
||||||
executor.submit(c.search, query, static_cover, locale): c
|
executor.submit(c.search, query, static_cover, locale): c
|
||||||
@ -139,5 +138,4 @@ def metadata_search():
|
|||||||
}
|
}
|
||||||
for future in concurrent.futures.as_completed(meta):
|
for future in concurrent.futures.as_completed(meta):
|
||||||
data.extend([asdict(x) for x in future.result() if x])
|
data.extend([asdict(x) for x in future.result() if x])
|
||||||
# log.info({'Time elapsed {}'.format(current_milli_time()-start)})
|
|
||||||
return Response(json.dumps(data), mimetype="application/json")
|
return Response(json.dumps(data), mimetype="application/json")
|
||||||
|
Loading…
Reference in New Issue
Block a user