1
0
mirror of https://github.com/janeczku/calibre-web synced 2025-01-21 14:36:58 +00:00

pdf metadata - ignore NullObject /Keywords to fix http 500 upload error

this fixes http 500 error while uploading valid PDF document with empty /Keywords
This commit is contained in:
contributor 2024-10-04 19:30:28 +03:00
parent 94df767c28
commit 07bf9047b0

View File

@ -39,17 +39,20 @@ except (ImportError, RuntimeError) as e:
try: try:
from pypdf import PdfReader from pypdf import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as ex: except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex) log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
try: try:
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as ex: except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex) log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
log.debug('PyPdf2 is also possible for metadata extracting from pdf files, but not recommended anymore') log.debug('PyPdf2 is also possible for metadata extracting from pdf files, but not recommended anymore')
try: try:
from PyPDF3 import PdfFileReader as PdfReader from PyPDF3 import PdfFileReader as PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as e: except ImportError as e:
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e) log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e)
@ -205,10 +208,12 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension, no_cove
if subject == '': if subject == '':
subject = doc_info.subject or "" subject = doc_info.subject or ""
if tags == '' and '/Keywords' in doc_info: if tags == '' and '/Keywords' in doc_info:
if isinstance(doc_info['/Keywords'], bytes): keywords = doc_info['/Keywords']
tags = doc_info['/Keywords'].decode('utf-8') if not isinstance(keywords, NullObject):
if isinstance(keywords, bytes):
tags = keywords.decode('utf-8')
else: else:
tags = doc_info['/Keywords'] tags = keywords
else: else:
title = original_file_name title = original_file_name