1
0
mirror of https://github.com/janeczku/calibre-web synced 2025-10-17 16:47:39 +00:00

Better epub cover parsing with multiple cover-image items

Code cosmetics
renamed variables
refactored xml page generation
refactored prepare author
This commit is contained in:
Ozzie Isaacs
2022-03-13 12:34:21 +01:00
parent 296f76b5fb
commit 4545f4a20d
18 changed files with 609 additions and 644 deletions

View File

@@ -23,11 +23,10 @@ import mimetypes
import re
import shutil
import socket
import unicodedata
from datetime import datetime, timedelta
from tempfile import gettempdir
from urllib.parse import urlparse
import requests
import unidecode
from babel.dates import format_datetime
from babel.units import format_unit
@@ -41,15 +40,19 @@ from werkzeug.security import generate_password_hash
from markupsafe import escape
from urllib.parse import quote
try:
import unidecode
use_unidecode = True
import advocate
from advocate.exceptions import UnacceptableAddressException
use_advocate = True
except ImportError:
use_unidecode = False
use_advocate = False
advocate = requests
UnacceptableAddressException = MissingSchema = BaseException
from . import calibre_db, cli
from .tasks.convert import TaskConvert
from . import logger, config, get_locale, db, ub, kobo_sync_status
from . import logger, config, get_locale, db, ub
from . import gdriveutils as gd
from .constants import STATIC_DIR as _STATIC_DIR
from .subproc_wrapper import process_wait
@@ -143,7 +146,7 @@ def check_send_to_kindle_with_converter(formats):
'text': _('Convert %(orig)s to %(format)s and send to Kindle',
orig='Epub',
format='Mobi')})
if 'AZW3' in formats and not 'MOBI' in formats:
if 'AZW3' in formats and 'MOBI' not in formats:
bookformats.append({'format': 'Mobi',
'convert': 2,
'text': _('Convert %(orig)s to %(format)s and send to Kindle',
@@ -185,11 +188,11 @@ def check_send_to_kindle(entry):
# Check if a reader is existing for any of the book formats, if not, return empty list, otherwise return
# list with supported formats
def check_read_formats(entry):
EXTENSIONS_READER = {'TXT', 'PDF', 'EPUB', 'CBZ', 'CBT', 'CBR', 'DJVU'}
extensions_reader = {'TXT', 'PDF', 'EPUB', 'CBZ', 'CBT', 'CBR', 'DJVU'}
bookformats = list()
if len(entry.data):
for ele in iter(entry.data):
if ele.format.upper() in EXTENSIONS_READER:
if ele.format.upper() in extensions_reader:
bookformats.append(ele.format.lower())
return bookformats
@@ -213,10 +216,10 @@ def send_mail(book_id, book_format, convert, kindle_mail, calibrepath, user_id):
if entry.format.upper() == book_format.upper():
converted_file_name = entry.name + '.' + book_format.lower()
link = '<a href="{}">{}</a>'.format(url_for('web.show_book', book_id=book_id), escape(book.title))
EmailText = _(u"%(book)s send to Kindle", book=link)
email_text = _(u"%(book)s send to Kindle", book=link)
WorkerThread.add(user_id, TaskEmail(_(u"Send to Kindle"), book.path, converted_file_name,
config.get_mail_settings(), kindle_mail,
EmailText, _(u'This e-mail has been sent via Calibre-Web.')))
email_text, _(u'This e-mail has been sent via Calibre-Web.')))
return
return _(u"The requested file could not be read. Maybe wrong permissions?")
@@ -229,15 +232,8 @@ def get_valid_filename(value, replace_whitespace=True, chars=128):
if value[-1:] == u'.':
value = value[:-1]+u'_'
value = value.replace("/", "_").replace(":", "_").strip('\0')
if use_unidecode:
if config.config_unicode_filename:
value = (unidecode.unidecode(value))
else:
value = value.replace(u'§', u'SS')
value = value.replace(u'ß', u'ss')
value = unicodedata.normalize('NFKD', value)
re_slugify = re.compile(r'[\W\s-]', re.UNICODE)
value = re_slugify.sub('', value)
if config.config_unicode_filename:
value = (unidecode.unidecode(value))
if replace_whitespace:
# *+:\"/<>? are replaced by _
value = re.sub(r'[*+:\\\"/<>?]+', u'_', value, flags=re.U)
@@ -266,6 +262,7 @@ def split_authors(values):
def get_sorted_author(value):
value2 = None
try:
if ',' not in value:
regexes = [r"^(JR|SR)\.?$", r"^I{1,3}\.?$", r"^IV\.?$"]
@@ -290,6 +287,7 @@ def get_sorted_author(value):
value2 = value
return value2
def edit_book_read_status(book_id, read_status=None):
if not config.config_read_column:
book = ub.session.query(ub.ReadBook).filter(and_(ub.ReadBook.user_id == int(current_user.id),
@@ -303,9 +301,9 @@ def edit_book_read_status(book_id, read_status=None):
else:
book.read_status = ub.ReadBook.STATUS_FINISHED if read_status else ub.ReadBook.STATUS_UNREAD
else:
readBook = ub.ReadBook(user_id=current_user.id, book_id = book_id)
readBook.read_status = ub.ReadBook.STATUS_FINISHED
book = readBook
read_book = ub.ReadBook(user_id=current_user.id, book_id=book_id)
read_book.read_status = ub.ReadBook.STATUS_FINISHED
book = read_book
if not book.kobo_reading_state:
kobo_reading_state = ub.KoboReadingState(user_id=current_user.id, book_id=book_id)
kobo_reading_state.current_bookmark = ub.KoboBookmark()
@@ -332,12 +330,13 @@ def edit_book_read_status(book_id, read_status=None):
except (KeyError, AttributeError):
log.error(u"Custom Column No.%d is not existing in calibre database", config.config_read_column)
return "Custom Column No.{} is not existing in calibre database".format(config.config_read_column)
except (OperationalError, InvalidRequestError) as e:
except (OperationalError, InvalidRequestError) as ex:
calibre_db.session.rollback()
log.error(u"Read status could not set: {}".format(e))
return _("Read status could not set: {}".format(e.orig))
log.error(u"Read status could not set: {}".format(ex))
return _("Read status could not set: {}".format(ex.orig))
return ""
# Deletes a book fro the local filestorage, returns True if deleting is successfull, otherwise false
def delete_book_file(book, calibrepath, book_format=None):
# check that path is 2 elements deep, check that target path has no subfolders
@@ -361,15 +360,15 @@ def delete_book_file(book, calibrepath, book_format=None):
id=book.id,
path=book.path)
shutil.rmtree(path)
except (IOError, OSError) as e:
log.error("Deleting book %s failed: %s", book.id, e)
return False, _("Deleting book %(id)s failed: %(message)s", id=book.id, message=e)
except (IOError, OSError) as ex:
log.error("Deleting book %s failed: %s", book.id, ex)
return False, _("Deleting book %(id)s failed: %(message)s", id=book.id, message=ex)
authorpath = os.path.join(calibrepath, os.path.split(book.path)[0])
if not os.listdir(authorpath):
try:
shutil.rmtree(authorpath)
except (IOError, OSError) as e:
log.error("Deleting authorpath for book %s failed: %s", book.id, e)
except (IOError, OSError) as ex:
log.error("Deleting authorpath for book %s failed: %s", book.id, ex)
return True, None
log.error("Deleting book %s from database only, book path in database not valid: %s",
@@ -395,21 +394,21 @@ def clean_author_database(renamed_author, calibre_path="", local_book=None, gdri
all_titledir = book.path.split('/')[1]
all_new_path = os.path.join(calibre_path, all_new_authordir, all_titledir)
all_new_name = get_valid_filename(book.title, chars=42) + ' - ' \
+ get_valid_filename(new_author.name, chars=42)
+ get_valid_filename(new_author.name, chars=42)
# change location in database to new author/title path
book.path = os.path.join(all_new_authordir, all_titledir).replace('\\', '/')
for file_format in book.data:
if not gdrive:
shutil.move(os.path.normcase(os.path.join(all_new_path,
file_format.name + '.' + file_format.format.lower())),
os.path.normcase(os.path.join(all_new_path,
all_new_name + '.' + file_format.format.lower())))
os.path.normcase(os.path.join(all_new_path,
all_new_name + '.' + file_format.format.lower())))
else:
gFile = gd.getFileFromEbooksFolder(all_new_path,
file_format.name + '.' + file_format.format.lower())
if gFile:
gd.moveGdriveFileRemote(gFile, all_new_name + u'.' + file_format.format.lower())
gd.updateDatabaseOnEdit(gFile['id'], all_new_name + u'.' + file_format.format.lower())
g_file = gd.getFileFromEbooksFolder(all_new_path,
file_format.name + '.' + file_format.format.lower())
if g_file:
gd.moveGdriveFileRemote(g_file, all_new_name + u'.' + file_format.format.lower())
gd.updateDatabaseOnEdit(g_file['id'], all_new_name + u'.' + file_format.format.lower())
else:
log.error("File {} not found on gdrive"
.format(all_new_path, file_format.name + '.' + file_format.format.lower()))
@@ -426,16 +425,16 @@ def rename_all_authors(first_author, renamed_author, calibre_path="", localbook=
old_author_dir = get_valid_filename(r, chars=96)
new_author_rename_dir = get_valid_filename(new_author.name, chars=96)
if gdrive:
gFile = gd.getFileFromEbooksFolder(None, old_author_dir)
if gFile:
gd.moveGdriveFolderRemote(gFile, new_author_rename_dir)
g_file = gd.getFileFromEbooksFolder(None, old_author_dir)
if g_file:
gd.moveGdriveFolderRemote(g_file, new_author_rename_dir)
else:
if os.path.isdir(os.path.join(calibre_path, old_author_dir)):
try:
old_author_path = os.path.join(calibre_path, old_author_dir)
new_author_path = os.path.join(calibre_path, new_author_rename_dir)
shutil.move(os.path.normcase(old_author_path), os.path.normcase(new_author_path))
except (OSError) as ex:
except OSError as ex:
log.error("Rename author from: %s to %s: %s", old_author_path, new_author_path, ex)
log.debug(ex, exc_info=True)
return _("Rename author from: '%(src)s' to '%(dest)s' failed with error: %(error)s",
@@ -444,6 +443,7 @@ def rename_all_authors(first_author, renamed_author, calibre_path="", localbook=
new_authordir = get_valid_filename(localbook.authors[0].name, chars=96)
return new_authordir
# Moves files in file storage during author/title rename, or from temp dir to file storage
def update_dir_structure_file(book_id, calibre_path, first_author, original_filepath, db_filename, renamed_author):
# get book database entry from id, if original path overwrite source with original_filepath
@@ -483,11 +483,9 @@ def update_dir_structure_file(book_id, calibre_path, first_author, original_file
def upload_new_file_gdrive(book_id, first_author, renamed_author, title, title_dir, original_filepath, filename_ext):
error = False
book = calibre_db.get_book(book_id)
file_name = get_valid_filename(title, chars=42) + ' - ' + \
get_valid_filename(first_author, chars=42) + \
filename_ext
get_valid_filename(first_author, chars=42) + filename_ext
rename_all_authors(first_author, renamed_author, gdrive=True)
gdrive_path = os.path.join(get_valid_filename(first_author, chars=96),
title_dir + " (" + str(book_id) + ")")
@@ -505,20 +503,20 @@ def update_dir_structure_gdrive(book_id, first_author, renamed_author):
new_titledir = get_valid_filename(book.title, chars=96) + u" (" + str(book_id) + u")"
if titledir != new_titledir:
gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), titledir)
if gFile:
gd.moveGdriveFileRemote(gFile, new_titledir)
g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), titledir)
if g_file:
gd.moveGdriveFileRemote(g_file, new_titledir)
book.path = book.path.split('/')[0] + u'/' + new_titledir
gd.updateDatabaseOnEdit(gFile['id'], book.path) # only child folder affected
gd.updateDatabaseOnEdit(g_file['id'], book.path) # only child folder affected
else:
return _(u'File %(file)s not found on Google Drive', file=book.path) # file not found
if authordir != new_authordir and authordir not in renamed_author:
gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), new_titledir)
if gFile:
gd.moveGdriveFolderRemote(gFile, new_authordir)
g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), new_titledir)
if g_file:
gd.moveGdriveFolderRemote(g_file, new_authordir)
book.path = new_authordir + u'/' + book.path.split('/')[1]
gd.updateDatabaseOnEdit(gFile['id'], book.path)
gd.updateDatabaseOnEdit(g_file['id'], book.path)
else:
return _(u'File %(file)s not found on Google Drive', file=authordir) # file not found
@@ -542,15 +540,15 @@ def move_files_on_change(calibre_path, new_authordir, new_titledir, localbook, d
# move original path to new path
log.debug("Moving title: %s to %s", path, new_path)
shutil.move(os.path.normcase(path), os.path.normcase(new_path))
else: # path is valid copy only files to new location (merge)
else: # path is valid copy only files to new location (merge)
log.info("Moving title: %s into existing: %s", path, new_path)
# Take all files and subfolder from old path (strange command)
for dir_name, __, file_list in os.walk(path):
for file in file_list:
shutil.move(os.path.normcase(os.path.join(dir_name, file)),
os.path.normcase(os.path.join(new_path + dir_name[len(path):], file)))
os.path.normcase(os.path.join(new_path + dir_name[len(path):], file)))
# change location in database to new author/title path
localbook.path = os.path.join(new_authordir, new_titledir).replace('\\','/')
localbook.path = os.path.join(new_authordir, new_titledir).replace('\\', '/')
except OSError as ex:
log.error("Rename title from: %s to %s: %s", path, new_path, ex)
log.debug(ex, exc_info=True)
@@ -587,12 +585,12 @@ def delete_book_gdrive(book, book_format):
for entry in book.data:
if entry.format.upper() == book_format:
name = entry.name + '.' + book_format
gFile = gd.getFileFromEbooksFolder(book.path, name)
g_file = gd.getFileFromEbooksFolder(book.path, name)
else:
gFile = gd.getFileFromEbooksFolder(os.path.dirname(book.path), book.path.split('/')[1])
if gFile:
gd.deleteDatabaseEntry(gFile['id'])
gFile.Trash()
g_file = gd.getFileFromEbooksFolder(os.path.dirname(book.path), book.path.split('/')[1])
if g_file:
gd.deleteDatabaseEntry(g_file['id'])
g_file.Trash()
else:
error = _(u'Book path %(path)s not found on Google Drive', path=book.path) # file not found
@@ -624,12 +622,13 @@ def generate_random_password():
def uniq(inpt):
output = []
inpt = [ " ".join(inp.split()) for inp in inpt]
inpt = [" ".join(inp.split()) for inp in inpt]
for x in inpt:
if x not in output:
output.append(x)
return output
def check_email(email):
email = valid_email(email)
if ub.session.query(ub.User).filter(func.lower(ub.User.email) == email.lower()).first():
@@ -642,7 +641,7 @@ def check_username(username):
username = username.strip()
if ub.session.query(ub.User).filter(func.lower(ub.User.name) == username.lower()).scalar():
log.error(u"This username is already taken")
raise Exception (_(u"This username is already taken"))
raise Exception(_(u"This username is already taken"))
return username
@@ -728,13 +727,13 @@ def get_book_cover_internal(book, use_generic_cover_on_failure):
# saves book cover from url
def save_cover_from_url(url, book_path):
try:
if not cli.allow_localhost:
# 127.0.x.x, localhost, [::1], [::ffff:7f00:1]
ip = socket.getaddrinfo(urlparse(url).hostname, 0)[0][4][0]
if ip.startswith("127.") or ip.startswith('::ffff:7f') or ip == "::1" or ip == "0.0.0.0" or ip == "::":
log.error("Localhost was accessed for cover upload")
return False, _("You are not allowed to access localhost for cover uploads")
img = requests.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling
if cli.allow_localhost:
img = requests.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling
elif use_advocate:
img = advocate.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling
else:
log.error("python modul advocate is not installed but is needed")
return False, _("Python modul 'advocate' is not installed but is needed for cover downloads")
img.raise_for_status()
return save_cover(img, book_path)
except (socket.gaierror,
@@ -746,6 +745,9 @@ def save_cover_from_url(url, book_path):
except MissingDelegateError as ex:
log.info(u'File Format Error %s', ex)
return False, _("Cover Format Error")
except UnacceptableAddressException:
log.error("Localhost was accessed for cover upload")
return False, _("You are not allowed to access localhost for cover uploads")
def save_cover_from_filestorage(filepath, saved_filename, img):
@@ -808,7 +810,7 @@ def save_cover(img, book_path):
os.mkdir(tmp_dir)
ret, message = save_cover_from_filestorage(tmp_dir, "uploaded_cover.jpg", img)
if ret is True:
gd.uploadFileToEbooksFolder(os.path.join(book_path, 'cover.jpg').replace("\\","/"),
gd.uploadFileToEbooksFolder(os.path.join(book_path, 'cover.jpg').replace("\\", "/"),
os.path.join(tmp_dir, "uploaded_cover.jpg"))
log.info("Cover is saved on Google Drive")
return True, None
@@ -820,9 +822,9 @@ def save_cover(img, book_path):
def do_download_file(book, book_format, client, data, headers):
if config.config_use_google_drive:
#startTime = time.time()
# startTime = time.time()
df = gd.getFileFromEbooksFolder(book.path, data.name + "." + book_format)
#log.debug('%s', time.time() - startTime)
# log.debug('%s', time.time() - startTime)
if df:
return gd.do_gdrive_download(df, headers)
else:
@@ -846,16 +848,16 @@ def do_download_file(book, book_format, client, data, headers):
##################################
def check_unrar(unrarLocation):
if not unrarLocation:
def check_unrar(unrar_location):
if not unrar_location:
return
if not os.path.exists(unrarLocation):
if not os.path.exists(unrar_location):
return _('Unrar binary file not found')
try:
unrarLocation = [unrarLocation]
value = process_wait(unrarLocation, pattern='UNRAR (.*) freeware')
unrar_location = [unrar_location]
value = process_wait(unrar_location, pattern='UNRAR (.*) freeware')
if value:
version = value.group(1)
log.debug("unrar version %s", version)
@@ -882,19 +884,19 @@ def json_serial(obj):
# helper function for displaying the runtime of tasks
def format_runtime(runtime):
retVal = ""
ret_val = ""
if runtime.days:
retVal = format_unit(runtime.days, 'duration-day', length="long", locale=get_locale()) + ', '
ret_val = format_unit(runtime.days, 'duration-day', length="long", locale=get_locale()) + ', '
mins, seconds = divmod(runtime.seconds, 60)
hours, minutes = divmod(mins, 60)
# ToDo: locale.number_symbols._data['timeSeparator'] -> localize time separator ?
if hours:
retVal += '{:d}:{:02d}:{:02d}s'.format(hours, minutes, seconds)
ret_val += '{:d}:{:02d}:{:02d}s'.format(hours, minutes, seconds)
elif minutes:
retVal += '{:2d}:{:02d}s'.format(minutes, seconds)
ret_val += '{:2d}:{:02d}s'.format(minutes, seconds)
else:
retVal += '{:2d}s'.format(seconds)
return retVal
ret_val += '{:2d}s'.format(seconds)
return ret_val
# helper function to apply localize status information in tasklist entries
@@ -951,8 +953,8 @@ def check_valid_domain(domain_text):
def get_cc_columns(filter_config_custom_read=False):
tmpcc = calibre_db.session.query(db.Custom_Columns)\
.filter(db.Custom_Columns.datatype.notin_(db.cc_exceptions)).all()
tmpcc = calibre_db.session.query(db.CustomColumns)\
.filter(db.CustomColumns.datatype.notin_(db.cc_exceptions)).all()
cc = []
r = None
if config.config_columns_to_ignore:
@@ -971,6 +973,7 @@ def get_cc_columns(filter_config_custom_read=False):
def get_download_link(book_id, book_format, client):
book_format = book_format.split(".")[0]
book = calibre_db.get_filtered_book(book_id, allow_show_archived=True)
data1= ""
if book:
data1 = calibre_db.get_book_format(book.id, book_format.upper())
else: