Better epub cover parsing with multiple cover-image items

Code cosmetics renamed variables refactored xml page generation refactored prepare author
2025-10-19 17:47:39 +00:00 · 2022-03-13 12:34:21 +01:00
parent 296f76b5fb
commit 4545f4a20d
18 changed files with 609 additions and 644 deletions
--- a/cps/helper.py
+++ b/cps/helper.py
@@ -23,11 +23,10 @@ import mimetypes
 import re
 import shutil
 import socket
-import unicodedata
 from datetime import datetime, timedelta
 from tempfile import gettempdir
-from urllib.parse import urlparse
 import requests
+import unidecode

 from babel.dates import format_datetime
 from babel.units import format_unit
@@ -41,15 +40,19 @@ from werkzeug.security import generate_password_hash
 from markupsafe import escape
 from urllib.parse import quote

+
 try:
-    import unidecode
-    use_unidecode = True
+    import advocate
+    from advocate.exceptions import UnacceptableAddressException
+    use_advocate = True
 except ImportError:
-    use_unidecode = False
+    use_advocate = False
+    advocate = requests
+    UnacceptableAddressException = MissingSchema = BaseException

 from . import calibre_db, cli
 from .tasks.convert import TaskConvert
-from . import logger, config, get_locale, db, ub, kobo_sync_status
+from . import logger, config, get_locale, db, ub
 from . import gdriveutils as gd
 from .constants import STATIC_DIR as _STATIC_DIR
 from .subproc_wrapper import process_wait
@@ -143,7 +146,7 @@ def check_send_to_kindle_with_converter(formats):
                            'text': _('Convert %(orig)s to %(format)s and send to Kindle',
                                      orig='Epub',
                                      format='Mobi')})
-    if 'AZW3' in formats and not 'MOBI' in formats:
+    if 'AZW3' in formats and 'MOBI' not in formats:
        bookformats.append({'format': 'Mobi',
                            'convert': 2,
                            'text': _('Convert %(orig)s to %(format)s and send to Kindle',
@@ -185,11 +188,11 @@ def check_send_to_kindle(entry):
 # Check if a reader is existing for any of the book formats, if not, return empty list, otherwise return
 # list with supported formats
 def check_read_formats(entry):
-    EXTENSIONS_READER = {'TXT', 'PDF', 'EPUB', 'CBZ', 'CBT', 'CBR', 'DJVU'}
+    extensions_reader = {'TXT', 'PDF', 'EPUB', 'CBZ', 'CBT', 'CBR', 'DJVU'}
    bookformats = list()
    if len(entry.data):
        for ele in iter(entry.data):
-            if ele.format.upper() in EXTENSIONS_READER:
+            if ele.format.upper() in extensions_reader:
                bookformats.append(ele.format.lower())
    return bookformats

@@ -213,10 +216,10 @@ def send_mail(book_id, book_format, convert, kindle_mail, calibrepath, user_id):
        if entry.format.upper() == book_format.upper():
            converted_file_name = entry.name + '.' + book_format.lower()
            link = '<a href="{}">{}</a>'.format(url_for('web.show_book', book_id=book_id), escape(book.title))
-            EmailText = _(u"%(book)s send to Kindle", book=link)
+            email_text = _(u"%(book)s send to Kindle", book=link)
            WorkerThread.add(user_id, TaskEmail(_(u"Send to Kindle"), book.path, converted_file_name,
                             config.get_mail_settings(), kindle_mail,
-                             EmailText, _(u'This e-mail has been sent via Calibre-Web.')))
+                             email_text, _(u'This e-mail has been sent via Calibre-Web.')))
            return
    return _(u"The requested file could not be read. Maybe wrong permissions?")

@@ -229,15 +232,8 @@ def get_valid_filename(value, replace_whitespace=True, chars=128):
    if value[-1:] == u'.':
        value = value[:-1]+u'_'
    value = value.replace("/", "_").replace(":", "_").strip('\0')
-    if use_unidecode:
-        if config.config_unicode_filename:
-            value = (unidecode.unidecode(value))
-    else:
-        value = value.replace(u'§', u'SS')
-        value = value.replace(u'ß', u'ss')
-        value = unicodedata.normalize('NFKD', value)
-        re_slugify = re.compile(r'[\W\s-]', re.UNICODE)
-        value = re_slugify.sub('', value)
+    if config.config_unicode_filename:
+        value = (unidecode.unidecode(value))
    if replace_whitespace:
        #  *+:\"/<>? are replaced by _
        value = re.sub(r'[*+:\\\"/<>?]+', u'_', value, flags=re.U)
@@ -266,6 +262,7 @@ def split_authors(values):


 def get_sorted_author(value):
+    value2 = None
    try:
        if ',' not in value:
            regexes = [r"^(JR|SR)\.?$", r"^I{1,3}\.?$", r"^IV\.?$"]
@@ -290,6 +287,7 @@ def get_sorted_author(value):
            value2 = value
    return value2

+
 def edit_book_read_status(book_id, read_status=None):
    if not config.config_read_column:
        book = ub.session.query(ub.ReadBook).filter(and_(ub.ReadBook.user_id == int(current_user.id),
@@ -303,9 +301,9 @@ def edit_book_read_status(book_id, read_status=None):
            else:
                book.read_status = ub.ReadBook.STATUS_FINISHED if read_status else ub.ReadBook.STATUS_UNREAD
        else:
-            readBook = ub.ReadBook(user_id=current_user.id, book_id = book_id)
-            readBook.read_status = ub.ReadBook.STATUS_FINISHED
-            book = readBook
+            read_book = ub.ReadBook(user_id=current_user.id, book_id=book_id)
+            read_book.read_status = ub.ReadBook.STATUS_FINISHED
+            book = read_book
        if not book.kobo_reading_state:
            kobo_reading_state = ub.KoboReadingState(user_id=current_user.id, book_id=book_id)
            kobo_reading_state.current_bookmark = ub.KoboBookmark()
@@ -332,12 +330,13 @@ def edit_book_read_status(book_id, read_status=None):
        except (KeyError, AttributeError):
            log.error(u"Custom Column No.%d is not existing in calibre database", config.config_read_column)
            return "Custom Column No.{} is not existing in calibre database".format(config.config_read_column)
-        except (OperationalError, InvalidRequestError) as e:
+        except (OperationalError, InvalidRequestError) as ex:
            calibre_db.session.rollback()
-            log.error(u"Read status could not set: {}".format(e))
-            return _("Read status could not set: {}".format(e.orig))
+            log.error(u"Read status could not set: {}".format(ex))
+            return _("Read status could not set: {}".format(ex.orig))
    return ""

+
 # Deletes a book fro the local filestorage, returns True if deleting is successfull, otherwise false
 def delete_book_file(book, calibrepath, book_format=None):
    # check that path is 2 elements deep, check that target path has no subfolders
@@ -361,15 +360,15 @@ def delete_book_file(book, calibrepath, book_format=None):
                                           id=book.id,
                                           path=book.path)
                    shutil.rmtree(path)
-                except (IOError, OSError) as e:
-                    log.error("Deleting book %s failed: %s", book.id, e)
-                    return False, _("Deleting book %(id)s failed: %(message)s", id=book.id, message=e)
+                except (IOError, OSError) as ex:
+                    log.error("Deleting book %s failed: %s", book.id, ex)
+                    return False, _("Deleting book %(id)s failed: %(message)s", id=book.id, message=ex)
                authorpath = os.path.join(calibrepath, os.path.split(book.path)[0])
                if not os.listdir(authorpath):
                    try:
                        shutil.rmtree(authorpath)
-                    except (IOError, OSError) as e:
-                        log.error("Deleting authorpath for book %s failed: %s", book.id, e)
+                    except (IOError, OSError) as ex:
+                        log.error("Deleting authorpath for book %s failed: %s", book.id, ex)
                return True, None

    log.error("Deleting book %s from database only, book path in database not valid: %s",
@@ -395,21 +394,21 @@ def clean_author_database(renamed_author, calibre_path="", local_book=None, gdri
                all_titledir = book.path.split('/')[1]
                all_new_path = os.path.join(calibre_path, all_new_authordir, all_titledir)
                all_new_name = get_valid_filename(book.title, chars=42) + ' - ' \
-                               + get_valid_filename(new_author.name, chars=42)
+                    + get_valid_filename(new_author.name, chars=42)
                # change location in database to new author/title path
                book.path = os.path.join(all_new_authordir, all_titledir).replace('\\', '/')
                for file_format in book.data:
                    if not gdrive:
                        shutil.move(os.path.normcase(os.path.join(all_new_path,
                                                                  file_format.name + '.' + file_format.format.lower())),
-                            os.path.normcase(os.path.join(all_new_path,
-                                                          all_new_name + '.' + file_format.format.lower())))
+                                    os.path.normcase(os.path.join(all_new_path,
+                                                                  all_new_name + '.' + file_format.format.lower())))
                    else:
-                        gFile = gd.getFileFromEbooksFolder(all_new_path,
-                                                           file_format.name + '.' + file_format.format.lower())
-                        if gFile:
-                            gd.moveGdriveFileRemote(gFile, all_new_name + u'.' + file_format.format.lower())
-                            gd.updateDatabaseOnEdit(gFile['id'], all_new_name + u'.' + file_format.format.lower())
+                        g_file = gd.getFileFromEbooksFolder(all_new_path,
+                                                            file_format.name + '.' + file_format.format.lower())
+                        if g_file:
+                            gd.moveGdriveFileRemote(g_file, all_new_name + u'.' + file_format.format.lower())
+                            gd.updateDatabaseOnEdit(g_file['id'], all_new_name + u'.' + file_format.format.lower())
                        else:
                            log.error("File {} not found on gdrive"
                                      .format(all_new_path, file_format.name + '.' + file_format.format.lower()))
@@ -426,16 +425,16 @@ def rename_all_authors(first_author, renamed_author, calibre_path="", localbook=
            old_author_dir = get_valid_filename(r, chars=96)
            new_author_rename_dir = get_valid_filename(new_author.name, chars=96)
            if gdrive:
-                gFile = gd.getFileFromEbooksFolder(None, old_author_dir)
-                if gFile:
-                    gd.moveGdriveFolderRemote(gFile, new_author_rename_dir)
+                g_file = gd.getFileFromEbooksFolder(None, old_author_dir)
+                if g_file:
+                    gd.moveGdriveFolderRemote(g_file, new_author_rename_dir)
            else:
                if os.path.isdir(os.path.join(calibre_path, old_author_dir)):
                    try:
                        old_author_path = os.path.join(calibre_path, old_author_dir)
                        new_author_path = os.path.join(calibre_path, new_author_rename_dir)
                        shutil.move(os.path.normcase(old_author_path), os.path.normcase(new_author_path))
-                    except (OSError) as ex:
+                    except OSError as ex:
                        log.error("Rename author from: %s to %s: %s", old_author_path, new_author_path, ex)
                        log.debug(ex, exc_info=True)
                        return _("Rename author from: '%(src)s' to '%(dest)s' failed with error: %(error)s",
@@ -444,6 +443,7 @@ def rename_all_authors(first_author, renamed_author, calibre_path="", localbook=
        new_authordir = get_valid_filename(localbook.authors[0].name, chars=96)
    return new_authordir

+
 # Moves files in file storage during author/title rename, or from temp dir to file storage
 def update_dir_structure_file(book_id, calibre_path, first_author, original_filepath, db_filename, renamed_author):
    # get book database entry from id, if original path overwrite source with original_filepath
@@ -483,11 +483,9 @@ def update_dir_structure_file(book_id, calibre_path, first_author, original_file


 def upload_new_file_gdrive(book_id, first_author, renamed_author, title, title_dir, original_filepath, filename_ext):
-    error = False
    book = calibre_db.get_book(book_id)
    file_name = get_valid_filename(title, chars=42) + ' - ' + \
-                get_valid_filename(first_author, chars=42) + \
-                filename_ext
+        get_valid_filename(first_author, chars=42) + filename_ext
    rename_all_authors(first_author, renamed_author, gdrive=True)
    gdrive_path = os.path.join(get_valid_filename(first_author, chars=96),
                               title_dir + " (" + str(book_id) + ")")
@@ -505,20 +503,20 @@ def update_dir_structure_gdrive(book_id, first_author, renamed_author):
    new_titledir = get_valid_filename(book.title, chars=96) + u" (" + str(book_id) + u")"

    if titledir != new_titledir:
-        gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), titledir)
-        if gFile:
-            gd.moveGdriveFileRemote(gFile, new_titledir)
+        g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), titledir)
+        if g_file:
+            gd.moveGdriveFileRemote(g_file, new_titledir)
            book.path = book.path.split('/')[0] + u'/' + new_titledir
-            gd.updateDatabaseOnEdit(gFile['id'], book.path)     # only child folder affected
+            gd.updateDatabaseOnEdit(g_file['id'], book.path)     # only child folder affected
        else:
            return _(u'File %(file)s not found on Google Drive', file=book.path)  # file not found

    if authordir != new_authordir and authordir not in renamed_author:
-        gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), new_titledir)
-        if gFile:
-            gd.moveGdriveFolderRemote(gFile, new_authordir)
+        g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), new_titledir)
+        if g_file:
+            gd.moveGdriveFolderRemote(g_file, new_authordir)
            book.path = new_authordir + u'/' + book.path.split('/')[1]
-            gd.updateDatabaseOnEdit(gFile['id'], book.path)
+            gd.updateDatabaseOnEdit(g_file['id'], book.path)
        else:
            return _(u'File %(file)s not found on Google Drive', file=authordir)  # file not found

@@ -542,15 +540,15 @@ def move_files_on_change(calibre_path, new_authordir, new_titledir, localbook, d
                # move original path to new path
                log.debug("Moving title: %s to %s", path, new_path)
                shutil.move(os.path.normcase(path), os.path.normcase(new_path))
-            else: # path is valid copy only files to new location (merge)
+            else:  # path is valid copy only files to new location (merge)
                log.info("Moving title: %s into existing: %s", path, new_path)
                # Take all files and subfolder from old path (strange command)
                for dir_name, __, file_list in os.walk(path):
                    for file in file_list:
                        shutil.move(os.path.normcase(os.path.join(dir_name, file)),
-                                        os.path.normcase(os.path.join(new_path + dir_name[len(path):], file)))
+                                    os.path.normcase(os.path.join(new_path + dir_name[len(path):], file)))
        # change location in database to new author/title path
-        localbook.path = os.path.join(new_authordir, new_titledir).replace('\\','/')
+        localbook.path = os.path.join(new_authordir, new_titledir).replace('\\', '/')
    except OSError as ex:
        log.error("Rename title from: %s to %s: %s", path, new_path, ex)
        log.debug(ex, exc_info=True)
@@ -587,12 +585,12 @@ def delete_book_gdrive(book, book_format):
        for entry in book.data:
            if entry.format.upper() == book_format:
                name = entry.name + '.' + book_format
-        gFile = gd.getFileFromEbooksFolder(book.path, name)
+        g_file = gd.getFileFromEbooksFolder(book.path, name)
    else:
-        gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), book.path.split('/')[1])
-    if gFile:
-        gd.deleteDatabaseEntry(gFile['id'])
-        gFile.Trash()
+        g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), book.path.split('/')[1])
+    if g_file:
+        gd.deleteDatabaseEntry(g_file['id'])
+        g_file.Trash()
    else:
        error = _(u'Book path %(path)s not found on Google Drive', path=book.path)  # file not found

@@ -624,12 +622,13 @@ def generate_random_password():

 def uniq(inpt):
    output = []
-    inpt = [ " ".join(inp.split()) for inp in inpt]
+    inpt = [" ".join(inp.split()) for inp in inpt]
    for x in inpt:
        if x not in output:
            output.append(x)
    return output

+
 def check_email(email):
    email = valid_email(email)
    if ub.session.query(ub.User).filter(func.lower(ub.User.email) == email.lower()).first():
@@ -642,7 +641,7 @@ def check_username(username):
    username = username.strip()
    if ub.session.query(ub.User).filter(func.lower(ub.User.name) == username.lower()).scalar():
        log.error(u"This username is already taken")
-        raise Exception (_(u"This username is already taken"))
+        raise Exception(_(u"This username is already taken"))
    return username


@@ -728,13 +727,13 @@ def get_book_cover_internal(book, use_generic_cover_on_failure):
 # saves book cover from url
 def save_cover_from_url(url, book_path):
    try:
-        if not cli.allow_localhost:
-            # 127.0.x.x, localhost, [::1], [::ffff:7f00:1]
-            ip = socket.getaddrinfo(urlparse(url).hostname, 0)[0][4][0]
-            if ip.startswith("127.") or ip.startswith('::ffff:7f') or ip == "::1" or ip == "0.0.0.0" or ip == "::":
-                log.error("Localhost was accessed for cover upload")
-                return False, _("You are not allowed to access localhost for cover uploads")
-        img = requests.get(url, timeout=(10, 200), allow_redirects=False)      # ToDo: Error Handling
+        if cli.allow_localhost:
+            img = requests.get(url, timeout=(10, 200), allow_redirects=False)  # ToDo: Error Handling
+        elif use_advocate:
+            img = advocate.get(url, timeout=(10, 200), allow_redirects=False)      # ToDo: Error Handling
+        else:
+            log.error("python modul advocate is not installed but is needed")
+            return False, _("Python modul 'advocate' is not installed but is needed for cover downloads")
        img.raise_for_status()
        return save_cover(img, book_path)
    except (socket.gaierror,
@@ -746,6 +745,9 @@ def save_cover_from_url(url, book_path):
    except MissingDelegateError as ex:
        log.info(u'File Format Error %s', ex)
        return False, _("Cover Format Error")
+    except UnacceptableAddressException:
+        log.error("Localhost was accessed for cover upload")
+        return False, _("You are not allowed to access localhost for cover uploads")


 def save_cover_from_filestorage(filepath, saved_filename, img):
@@ -808,7 +810,7 @@ def save_cover(img, book_path):
            os.mkdir(tmp_dir)
        ret, message = save_cover_from_filestorage(tmp_dir, "uploaded_cover.jpg", img)
        if ret is True:
-            gd.uploadFileToEbooksFolder(os.path.join(book_path, 'cover.jpg').replace("\\","/"),
+            gd.uploadFileToEbooksFolder(os.path.join(book_path, 'cover.jpg').replace("\\", "/"),
                                        os.path.join(tmp_dir, "uploaded_cover.jpg"))
            log.info("Cover is saved on Google Drive")
            return True, None
@@ -820,9 +822,9 @@ def save_cover(img, book_path):

 def do_download_file(book, book_format, client, data, headers):
    if config.config_use_google_drive:
-        #startTime = time.time()
+        # startTime = time.time()
        df = gd.getFileFromEbooksFolder(book.path, data.name + "." + book_format)
-        #log.debug('%s', time.time() - startTime)
+        # log.debug('%s', time.time() - startTime)
        if df:
            return gd.do_gdrive_download(df, headers)
        else:
@@ -846,16 +848,16 @@ def do_download_file(book, book_format, client, data, headers):
 ##################################


-def check_unrar(unrarLocation):
-    if not unrarLocation:
+def check_unrar(unrar_location):
+    if not unrar_location:
        return

-    if not os.path.exists(unrarLocation):
+    if not os.path.exists(unrar_location):
        return _('Unrar binary file not found')

    try:
-        unrarLocation = [unrarLocation]
-        value = process_wait(unrarLocation, pattern='UNRAR (.*) freeware')
+        unrar_location = [unrar_location]
+        value = process_wait(unrar_location, pattern='UNRAR (.*) freeware')
        if value:
            version = value.group(1)
            log.debug("unrar version %s", version)
@@ -882,19 +884,19 @@ def json_serial(obj):

 # helper function for displaying the runtime of tasks
 def format_runtime(runtime):
-    retVal = ""
+    ret_val = ""
    if runtime.days:
-        retVal = format_unit(runtime.days, 'duration-day', length="long", locale=get_locale()) + ', '
+        ret_val = format_unit(runtime.days, 'duration-day', length="long", locale=get_locale()) + ', '
    mins, seconds = divmod(runtime.seconds, 60)
    hours, minutes = divmod(mins, 60)
    # ToDo: locale.number_symbols._data['timeSeparator'] -> localize time separator ?
    if hours:
-        retVal += '{:d}:{:02d}:{:02d}s'.format(hours, minutes, seconds)
+        ret_val += '{:d}:{:02d}:{:02d}s'.format(hours, minutes, seconds)
    elif minutes:
-        retVal += '{:2d}:{:02d}s'.format(minutes, seconds)
+        ret_val += '{:2d}:{:02d}s'.format(minutes, seconds)
    else:
-        retVal += '{:2d}s'.format(seconds)
-    return retVal
+        ret_val += '{:2d}s'.format(seconds)
+    return ret_val


 # helper function to apply localize status information in tasklist entries
@@ -951,8 +953,8 @@ def check_valid_domain(domain_text):


 def get_cc_columns(filter_config_custom_read=False):
-    tmpcc = calibre_db.session.query(db.Custom_Columns)\
-        .filter(db.Custom_Columns.datatype.notin_(db.cc_exceptions)).all()
+    tmpcc = calibre_db.session.query(db.CustomColumns)\
+        .filter(db.CustomColumns.datatype.notin_(db.cc_exceptions)).all()
    cc = []
    r = None
    if config.config_columns_to_ignore:
@@ -971,6 +973,7 @@ def get_cc_columns(filter_config_custom_read=False):
 def get_download_link(book_id, book_format, client):
    book_format = book_format.split(".")[0]
    book = calibre_db.get_filtered_book(book_id, allow_show_archived=True)
+    data1= ""
    if book:
        data1 = calibre_db.get_book_format(book.id, book_format.upper())
    else: