1
0
mirror of https://github.com/janeczku/calibre-web synced 2024-12-25 01:20:32 +00:00

Bugfix for goodreads (html formated info for authors now visible)

This commit is contained in:
Ozzie Isaacs 2024-05-11 07:10:41 +02:00
parent 5c49c8cdd7
commit 7e85894b3a
7 changed files with 212 additions and 106 deletions

53
cps/clean_html.py Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2018-2019 OzzieIsaacs
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from . import logger
from lxml.etree import ParserError
try:
# at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments
from bleach import clean_text as clean_html
BLEACH = True
except ImportError:
try:
BLEACH = False
from nh3 import clean as clean_html
except ImportError:
try:
BLEACH = False
from lxml.html.clean import clean_html
except ImportError:
clean_html = None
log = logger.create()
def clean_string(unsafe_text, book_id=0):
try:
if BLEACH:
safe_text = clean_html(unsafe_text, tags=set(), attributes=set())
else:
safe_text = clean_html(unsafe_text)
except ParserError as e:
log.error("Comments of book {} are corrupted: {}".format(book_id, e))
safe_text = ""
except TypeError as e:
log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e))
safe_text = ""
return safe_text

View File

@ -27,22 +27,22 @@ from shutil import copyfile
from uuid import uuid4
from markupsafe import escape, Markup # dependency of flask
from functools import wraps
from lxml.etree import ParserError
# from lxml.etree import ParserError
try:
# at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments
from bleach import clean_text as clean_html
BLEACH = True
except ImportError:
try:
BLEACH = False
from nh3 import clean as clean_html
except ImportError:
try:
BLEACH = False
from lxml.html.clean import clean_html
except ImportError:
clean_html = None
#try:
# # at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments
# from bleach import clean_text as clean_html
# BLEACH = True
#except ImportError:
# try:
# BLEACH = False
# from nh3 import clean as clean_html
# except ImportError:
# try:
# BLEACH = False
# from lxml.html.clean import clean_html
# except ImportError:
# clean_html = None
from flask import Blueprint, request, flash, redirect, url_for, abort, Response
from flask_babel import gettext as _
@ -54,6 +54,7 @@ from sqlalchemy.orm.exc import StaleDataError
from sqlalchemy.sql.expression import func
from . import constants, logger, isoLanguages, gdriveutils, uploader, helper, kobo_sync_status
from .clean_html import clean_string
from . import config, ub, db, calibre_db
from .services.worker import WorkerThread
from .tasks.upload import TaskUpload
@ -1004,17 +1005,18 @@ def edit_book_series_index(series_index, book):
def edit_book_comments(comments, book):
modify_date = False
if comments:
try:
if BLEACH:
comments = clean_html(comments, tags=set(), attributes=set())
else:
comments = clean_html(comments)
except ParserError as e:
log.error("Comments of book {} are corrupted: {}".format(book.id, e))
comments = ""
except TypeError as e:
log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e))
comments = ""
comments = clean_string(comments, book.id)
#try:
# if BLEACH:
# comments = clean_html(comments, tags=set(), attributes=set())
# else:
# comments = clean_html(comments)
#except ParserError as e:
# log.error("Comments of book {} are corrupted: {}".format(book.id, e))
# comments = ""
#except TypeError as e:
# log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e))
# comments = ""
if len(book.comments):
if book.comments[0].text != comments:
book.comments[0].text = comments
@ -1072,18 +1074,19 @@ def edit_cc_data_value(book_id, book, c, to_save, cc_db_value, cc_string):
elif c.datatype == 'comments':
to_save[cc_string] = Markup(to_save[cc_string]).unescape()
if to_save[cc_string]:
try:
if BLEACH:
to_save[cc_string] = clean_html(to_save[cc_string], tags=set(), attributes=set())
else:
to_save[cc_string] = clean_html(to_save[cc_string])
except ParserError as e:
log.error("Customs Comments of book {} are corrupted: {}".format(book_id, e))
to_save[cc_string] = ""
except TypeError as e:
to_save[cc_string] = ""
log.error("Customs Comments can't be parsed, maybe 'lxml' is too new, "
"try installing 'bleach': {}".format(e))
to_save[cc_string] = clean_string(to_save[cc_string], book_id)
#try:
# if BLEACH:
# to_save[cc_string] = clean_html(to_save[cc_string], tags=set(), attributes=set())
# else:
# to_save[cc_string] = clean_html(to_save[cc_string])
#except ParserError as e:
# log.error("Customs Comments of book {} are corrupted: {}".format(book_id, e))
# to_save[cc_string] = ""
#except TypeError as e:
# to_save[cc_string] = ""
# log.error("Customs Comments can't be parsed, maybe 'lxml' is too new, "
# "try installing 'bleach': {}".format(e))
elif c.datatype == 'datetime':
try:
to_save[cc_string] = datetime.strptime(to_save[cc_string], "%Y-%m-%d")

View File

@ -30,7 +30,7 @@ except ImportError:
Levenshtein = False
from .. import logger
from ..clean_html import clean_string
class my_GoodreadsClient(GoodreadsClient):
@ -52,7 +52,7 @@ class my_GoodreadsRequest(GoodreadsRequest):
def request(self):
resp = requests.get(self.host+self.path, params=self.params,
headers={"User-agent":"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) "
headers={"User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) "
"Gecko/20100101 Firefox/125.0"})
if resp.status_code != 200:
raise GoodreadsRequestException(resp.reason, self.path)
@ -84,7 +84,7 @@ def connect(key=None, enabled=True):
_client = None
if not _client:
_client = GoodreadsClient(key, None)
_client = my_GoodreadsClient(key, None)
def get_author_info(author_name):
@ -109,6 +109,7 @@ def get_author_info(author_name):
if author_info:
author_info._timestamp = now
author_info.safe_about = clean_string(author_info.about)
_AUTHORS_CACHE[author_name] = author_info
return author_info

View File

@ -8,8 +8,8 @@
<img title="{{author.name}}" src="{{author.image_url}}" alt="{{author.name}}" class="author-photo pull-left">
{% endif %}
{%if author.about is not none %}
<p>{{author.about}}</p>
{%if author.safe_about is not none %}
<p>{{author.safe_about|safe}}</p>
{% endif %}
- {{_("via")}} <a href="{{author.link}}" class="author-link" target="_blank" rel="noopener">Goodreads</a>

View File

@ -9,7 +9,7 @@
<h2>{{title}}</h2>
<form role="form" method="POST" autocomplete="off">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="panel-group col-md-10 col-lg-8">
<div class="panel-group col-md-11 col-lg-8">
<div class="panel panel-default">
<div class="panel-heading">
<h4 class="panel-title">
@ -155,7 +155,6 @@
<div class="form-group">
<input type="checkbox" id="config_use_goodreads" name="config_use_goodreads" data-control="goodreads-settings" {% if config.config_use_goodreads %}checked{% endif %}>
<label for="config_use_goodreads">{{_('Use Goodreads')}}</label>
<a href="https://www.goodreads.com/api/keys" target="_blank" style="margin-left: 5px">{{_('Create an API Key')}}</a>
</div>
<div data-related="goodreads-settings">
<div class="form-group">

View File

@ -58,6 +58,7 @@ install_requires =
chardet>=3.0.0,<4.1.0
advocate>=1.0.0,<1.1.0
Flask-Limiter>=2.3.0,<3.6.0
regex>=2022.3.2,<2024.2.25
[options.packages.find]
@ -85,7 +86,7 @@ goodreads =
python-Levenshtein>=0.12.0,<0.26.0
ldap =
python-ldap>=3.0.0,<3.5.0
Flask-SimpleLDAP>=1.4.0,<1.5.0
Flask-SimpleLDAP>=1.4.0,<2.1.0
oauth =
Flask-Dance>=2.0.0,<7.1.0
SQLAlchemy-Utils>=0.33.5,<0.42.0

View File

@ -37,20 +37,20 @@
<div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3" style="margin-top:50px;">
<p class='text-justify attribute'><strong>Start Time: </strong>2024-02-26 20:07:24</p>
<p class='text-justify attribute'><strong>Start Time: </strong>2024-05-10 20:24:40</p>
</div>
</div>
<div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3">
<p class='text-justify attribute'><strong>Stop Time: </strong>2024-02-27 03:19:17</p>
<p class='text-justify attribute'><strong>Stop Time: </strong>2024-05-11 03:33:47</p>
</div>
</div>
<div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3">
<p class='text-justify attribute'><strong>Duration: </strong>6h 0 min</p>
<p class='text-justify attribute'><strong>Duration: </strong>5h 58 min</p>
</div>
</div>
</div>
@ -320,38 +320,30 @@
<tr id='pt2.9' class='hiddenRow bg-success'>
<tr id="ft2.9" class="none bg-danger">
<td>
<div class='testcase'>TestBackupMetadata - test_backup_change_book_series_index</div>
</td>
<td colspan='6' align='center'>PASS</td>
</tr>
<tr id="ft2.10" class="none bg-danger">
<td>
<div class='testcase'>TestBackupMetadata - test_backup_change_book_tags</div>
</td>
<td colspan='6'>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft2.10')">FAIL</a>
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft2.9')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft2.10" class="popup_window test_output" style="display:block;">
<div id="div_ft2.9" class="popup_window test_output" style="display:block;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus="this.blur();"
onclick="document.getElementById('div_ft2.10').style.display='none'"><span
onclick="document.getElementById('div_ft2.9').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File &#34;/home/ozzie/Development/calibre-web-test/test/test_backup_metadata.py&#34;, line 243, in test_backup_change_book_tags
self.assertCountEqual(metadata[&#39;tags&#39;], [&#39;Ku&#39;,&#39;kOl&#39;])
AssertionError: Element counts were not equal:
First has 1, Second has 0: &#39;Lo执|1u&#39;
First has 0, Second has 1: &#39;Ku&#39;
First has 0, Second has 1: &#39;kOl&#39;</pre>
File &#34;/home/ozzie/Development/calibre-web-test/test/test_backup_metadata.py&#34;, line 135, in test_backup_change_book_series_index
self.assertEqual(metadata[&#39;series&#39;][&#39;content&#39;], &#34;tEst&#34;)
AssertionError: &#39;test&#39; != &#39;tEst&#39;
- test
? ^
+ tEst
? ^</pre>
</div>
<div class="clearfix"></div>
</div>
@ -361,6 +353,15 @@ First has 0, Second has 1: &#39;kOl&#39;</pre>
<tr id='pt2.10' class='hiddenRow bg-success'>
<td>
<div class='testcase'>TestBackupMetadata - test_backup_change_book_tags</div>
</td>
<td colspan='6' align='center'>PASS</td>
</tr>
<tr id='pt2.11' class='hiddenRow bg-success'>
<td>
<div class='testcase'>TestBackupMetadata - test_backup_change_book_title</div>
@ -1028,11 +1029,11 @@ First has 0, Second has 1: &#39;kOl&#39;</pre>
<tr id="su" class="skipClass">
<tr id="su" class="failClass">
<td>TestEditAdditionalBooks</td>
<td class="text-center">20</td>
<td class="text-center">18</td>
<td class="text-center">0</td>
<td class="text-center">17</td>
<td class="text-center">1</td>
<td class="text-center">0</td>
<td class="text-center">2</td>
<td class="text-center">
@ -1150,11 +1151,33 @@ First has 0, Second has 1: &#39;kOl&#39;</pre>
<tr id='pt12.13' class='hiddenRow bg-success'>
<tr id="ft12.13" class="none bg-danger">
<td>
<div class='testcase'>TestEditAdditionalBooks - test_upload_metadata_cb7</div>
</td>
<td colspan='6' align='center'>PASS</td>
<td colspan='6'>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft12.13')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft12.13" class="popup_window test_output" style="display:block;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus="this.blur();"
onclick="document.getElementById('div_ft12.13').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File &#34;/home/ozzie/Development/calibre-web-test/test/test_edit_additional_books.py&#34;, line 246, in test_upload_metadata_cb7
self.assertEqual(&#39;Test 执 to&#39;, details[&#39;title&#39;])
AssertionError: &#39;Test 执 to&#39; != &#39;book&#39;
- Test 执 to
+ book</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr>
@ -2571,11 +2594,11 @@ IndexError: list index out of range</pre>
<tr id="su" class="passClass">
<tr id="su" class="failClass">
<td>TestGoodreads</td>
<td class="text-center">3</td>
<td class="text-center">3</td>
<td class="text-center">0</td>
<td class="text-center">2</td>
<td class="text-center">1</td>
<td class="text-center">0</td>
<td class="text-center">0</td>
<td class="text-center">
@ -2585,11 +2608,31 @@ IndexError: list index out of range</pre>
<tr id='pt28.1' class='hiddenRow bg-success'>
<tr id="ft28.1" class="none bg-danger">
<td>
<div class='testcase'>TestGoodreads - test_author_page</div>
</td>
<td colspan='6' align='center'>PASS</td>
<td colspan='6'>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft28.1')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft28.1" class="popup_window test_output" style="display:block;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus="this.blur();"
onclick="document.getElementById('div_ft28.1').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File &#34;/home/ozzie/Development/calibre-web-test/test/test_goodreads.py&#34;, line 100, in test_author_page
self.assertTrue(self.check_element_on_page((By.CLASS_NAME, &#34;author-photo&#34;)))
AssertionError: False is not true</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr>
@ -3435,7 +3478,7 @@ IndexError: list index out of range</pre>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File &#34;/home/ozzie/Development/calibre-web-test/test/test_login.py&#34;, line 532, in test_proxy_login_multi_user
File &#34;/home/ozzie/Development/calibre-web-test/test/test_login.py&#34;, line 575, in test_proxy_login_multi_user
self.assertTrue(&#39;&lt;input type=&#34;text&#34; class=&#34;form-control&#34; name=&#34;name&#34; id=&#34;name&#34; value=&#34;new_user1&#34; autocomplete=&#34;off&#34;&gt;&#39; in resp.text)
AssertionError: False is not true</pre>
</div>
@ -5569,8 +5612,8 @@ AssertionError: False is not true</pre>
<tr id='total_row' class="text-center bg-grey">
<td>Total</td>
<td>492</td>
<td>479</td>
<td>2</td>
<td>477</td>
<td>4</td>
<td>1</td>
<td>10</td>
<td>&nbsp;</td>
@ -5600,7 +5643,7 @@ AssertionError: False is not true</pre>
<tr>
<th>Platform</th>
<td>Linux 6.5.0-21-generic #21~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Feb 9 13:32:52 UTC 2 x86_64 x86_64</td>
<td>Linux 6.5.0-28-generic #29~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Apr 4 14:39:20 UTC 2 x86_64 x86_64</td>
<td>Basic</td>
</tr>
@ -5624,7 +5667,7 @@ AssertionError: False is not true</pre>
<tr>
<th>Babel</th>
<td>2.14.0</td>
<td>2.15.0</td>
<td>Basic</td>
</tr>
@ -5684,19 +5727,19 @@ AssertionError: False is not true</pre>
<tr>
<th>Jinja2</th>
<td>3.1.3</td>
<td>3.1.4</td>
<td>Basic</td>
</tr>
<tr>
<th>lxml</th>
<td>5.1.0</td>
<td>5.1.1</td>
<td>Basic</td>
</tr>
<tr>
<th>pyasn1</th>
<td>0.5.1</td>
<td>0.6.0</td>
<td>Basic</td>
</tr>
@ -5712,6 +5755,12 @@ AssertionError: False is not true</pre>
<td>Basic</td>
</tr>
<tr>
<th>regex</th>
<td>2023.12.25</td>
<td>Basic</td>
</tr>
<tr>
<th>requests</th>
<td>2.31.0</td>
@ -5720,7 +5769,7 @@ AssertionError: False is not true</pre>
<tr>
<th>SQLAlchemy</th>
<td>2.0.27</td>
<td>2.0.30</td>
<td>Basic</td>
</tr>
@ -5750,7 +5799,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.128.0</td>
<td>TestBackupMetadataGdrive</td>
</tr>
@ -5780,7 +5829,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.128.0</td>
<td>TestCliGdrivedb</td>
</tr>
@ -5810,7 +5859,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.128.0</td>
<td>TestEbookConvertCalibreGDrive</td>
</tr>
@ -5840,7 +5889,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.129.0</td>
<td>TestEbookConvertGDriveKepubify</td>
</tr>
@ -5876,25 +5925,25 @@ AssertionError: False is not true</pre>
<tr>
<th>py7zr</th>
<td>0.20.8</td>
<td>0.21.0</td>
<td>TestEditAdditionalBooks</td>
</tr>
<tr>
<th>rarfile</th>
<td>4.1</td>
<td>4.2</td>
<td>TestEditAdditionalBooks</td>
</tr>
<tr>
<th>py7zr</th>
<td>0.20.8</td>
<td>0.21.0</td>
<td>TestEditBooks</td>
</tr>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.129.0</td>
<td>TestEditAuthorsGdrive</td>
</tr>
@ -5930,7 +5979,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.129.0</td>
<td>TestEditBooksOnGdrive</td>
</tr>
@ -5972,7 +6021,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.129.0</td>
<td>TestEmbedMetadataGdrive</td>
</tr>
@ -6002,7 +6051,7 @@ AssertionError: False is not true</pre>
<tr>
<th>google-api-python-client</th>
<td>2.119.0</td>
<td>2.129.0</td>
<td>TestSetupGdrive</td>
</tr>
@ -6038,31 +6087,31 @@ AssertionError: False is not true</pre>
<tr>
<th>python-Levenshtein</th>
<td>0.25.0</td>
<td>0.25.1</td>
<td>TestGoodreads</td>
</tr>
<tr>
<th>jsonschema</th>
<td>4.21.1</td>
<td>4.22.0</td>
<td>TestKoboSync</td>
</tr>
<tr>
<th>jsonschema</th>
<td>4.21.1</td>
<td>4.22.0</td>
<td>TestKoboSyncBig</td>
</tr>
<tr>
<th>Flask-SimpleLDAP</th>
<td>1.4.0</td>
<td>2.0.0</td>
<td>TestLdapLogin</td>
</tr>
<tr>
<th>jsonschema</th>
<td>4.21.1</td>
<td>4.22.0</td>
<td>TestLdapLogin</td>
</tr>
@ -6074,13 +6123,13 @@ AssertionError: False is not true</pre>
<tr>
<th>Flask-Dance</th>
<td>7.0.1</td>
<td>7.1.0</td>
<td>TestOAuthLogin</td>
</tr>
<tr>
<th>SQLAlchemy-Utils</th>
<td>0.41.1</td>
<td>0.41.2</td>
<td>TestOAuthLogin</td>
</tr>
@ -6092,7 +6141,7 @@ AssertionError: False is not true</pre>
</div>
<script>
drawCircle(479, 2, 1, 10);
drawCircle(477, 4, 1, 10);
showCase(5);
</script>