1
0
mirror of https://github.com/janeczku/calibre-web synced 2026-05-18 19:32:13 +00:00
Files
calibre-web/cps/fb2.py
T
jvoisin 224915bba1 Prevent XXE in epub/fb2/goodreads API
The lxml.etree.fromstring() function use the default XML parser, which resolves
external entities because XML handling defaults in Python sucks. There is no
need for such dangerous misfeatures in calibre-web, so let's disable it.

A user able to upload epub/fb2 could add something like this to the file:

```xml
<?xml version="1.0"?>
<!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<container><rootfiles><rootfile full-path="&xxe;"/></rootfiles></container>
```

and obtain the content of the `/etc/passwd` file, which is bad™.
2026-04-14 22:12:57 +02:00

86 lines
2.8 KiB
Python

# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2018 lemmsh, cervinko, OzzieIsaacs
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from lxml import etree
from .constants import BookMeta
# Safe parser: disable entity resolution and network access to prevent XXE attacks
_safe_parser = etree.XMLParser(resolve_entities=False, no_network=True)
def get_fb2_info(tmp_file_path, original_file_extension):
ns = {
'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0',
'l': 'http://www.w3.org/1999/xlink',
}
fb2_file = open(tmp_file_path, encoding="utf-8")
tree = etree.fromstring(fb2_file.read().encode(), parser=_safe_parser)
authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns)
def get_author(element):
last_name = element.xpath('fb:last-name/text()', namespaces=ns)
if len(last_name):
last_name = last_name[0]
else:
last_name = ''
middle_name = element.xpath('fb:middle-name/text()', namespaces=ns)
if len(middle_name):
middle_name = middle_name[0]
else:
middle_name = ''
first_name = element.xpath('fb:first-name/text()', namespaces=ns)
if len(first_name):
first_name = first_name[0]
else:
first_name = ''
return (first_name + ' '
+ middle_name + ' '
+ last_name)
author = str(", ".join(map(get_author, authors)))
title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)
if len(title):
title = str(title[0])
else:
title = ''
description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)
if len(description):
description = str(description[0])
else:
description = ''
return BookMeta(
file_path=tmp_file_path,
extension=original_file_extension,
title=title,
author=author,
cover=None,
description=description,
tags="",
series="",
series_id="",
languages="",
publisher="",
pubdate="",
identifiers=[])