From 3a603cec22b8072f9c1991a48897484ec5a45004 Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Sun, 5 May 2024 11:18:31 +0200 Subject: [PATCH] Handle error on uploading a book with lxml too new and no bleach, nh3 installation --- cps/config_sql.py | 3 ++- cps/editbooks.py | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/cps/config_sql.py b/cps/config_sql.py index 8176bf41..a781f2c5 100644 --- a/cps/config_sql.py +++ b/cps/config_sql.py @@ -493,7 +493,8 @@ def autodetect_calibre_binaries(): for element in calibre_path: supported_binary_paths = [os.path.join(element, binary) for binary in constants.SUPPORTED_CALIBRE_BINARIES.values()] if all(os.path.isfile(binary_path) and os.access(binary_path, os.X_OK) for binary_path in supported_binary_paths): - values = [process_wait([binary_path, "--version"], pattern='\(calibre (.*)\)') for binary_path in supported_binary_paths] + values = [process_wait([binary_path, "--version"], + pattern=r'\(calibre (.*)\)') for binary_path in supported_binary_paths] if all(values): version = values[0].group(1) log.debug("calibre version %s", version) diff --git a/cps/editbooks.py b/cps/editbooks.py index d5fe580c..030fbf90 100644 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -35,12 +35,12 @@ try: BLEACH = True except ImportError: try: - from nh3 import clean as clean_html BLEACH = False + from nh3 import clean as clean_html except ImportError: try: - from lxml.html.clean import clean_html BLEACH = False + from lxml.html.clean import clean_html except ImportError: clean_html = None @@ -1012,6 +1012,9 @@ def edit_book_comments(comments, book): except ParserError as e: log.error("Comments of book {} are corrupted: {}".format(book.id, e)) comments = "" + except TypeError as e: + log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e)) + comments = "" if len(book.comments): if book.comments[0].text != comments: book.comments[0].text = comments @@ -1069,7 +1072,18 @@ def edit_cc_data_value(book_id, book, c, to_save, cc_db_value, cc_string): elif c.datatype == 'comments': to_save[cc_string] = Markup(to_save[cc_string]).unescape() if to_save[cc_string]: - to_save[cc_string] = clean_html(to_save[cc_string]) + try: + if BLEACH: + to_save[cc_string] = clean_html(to_save[cc_string], tags=set(), attributes=set()) + else: + to_save[cc_string] = clean_html(to_save[cc_string]) + except ParserError as e: + log.error("Customs Comments of book {} are corrupted: {}".format(book_id, e)) + to_save[cc_string] = "" + except TypeError as e: + to_save[cc_string] = "" + log.error("Customs Comments can't be parsed, maybe 'lxml' is too new, " + "try installing 'bleach': {}".format(e)) elif c.datatype == 'datetime': try: to_save[cc_string] = datetime.strptime(to_save[cc_string], "%Y-%m-%d")