mirror of
				https://github.com/janeczku/calibre-web
				synced 2025-10-30 23:03:02 +00:00 
			
		
		
		
	Add PyPDF2 to vendor
This commit is contained in:
		
							
								
								
									
										5
									
								
								vendor/PyPDF2/__init__.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										5
									
								
								vendor/PyPDF2/__init__.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| from .pdf import PdfFileReader, PdfFileWriter | ||||
| from .merger import PdfFileMerger | ||||
| from .pagerange import PageRange, parse_filename_page_ranges | ||||
| from ._version import __version__ | ||||
| __all__ = ["pdf", "PdfFileMerger"] | ||||
							
								
								
									
										1
									
								
								vendor/PyPDF2/_version.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										1
									
								
								vendor/PyPDF2/_version.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1 @@ | ||||
| __version__ = '1.26.0' | ||||
							
								
								
									
										362
									
								
								vendor/PyPDF2/filters.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										362
									
								
								vendor/PyPDF2/filters.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,362 @@ | ||||
| # vim: sw=4:expandtab:foldmethod=marker | ||||
| # | ||||
| # Copyright (c) 2006, Mathieu Fenniak | ||||
| # All rights reserved. | ||||
| # | ||||
| # Redistribution and use in source and binary forms, with or without | ||||
| # modification, are permitted provided that the following conditions are | ||||
| # met: | ||||
| # | ||||
| # * Redistributions of source code must retain the above copyright notice, | ||||
| # this list of conditions and the following disclaimer. | ||||
| # * Redistributions in binary form must reproduce the above copyright notice, | ||||
| # this list of conditions and the following disclaimer in the documentation | ||||
| # and/or other materials provided with the distribution. | ||||
| # * The name of the author may not be used to endorse or promote products | ||||
| # derived from this software without specific prior written permission. | ||||
| # | ||||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
| # POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
|  | ||||
| """ | ||||
| Implementation of stream filters for PDF. | ||||
| """ | ||||
| __author__ = "Mathieu Fenniak" | ||||
| __author_email__ = "biziqe@mathieu.fenniak.net" | ||||
|  | ||||
| from .utils import PdfReadError, ord_, chr_ | ||||
| from sys import version_info | ||||
| if version_info < ( 3, 0 ): | ||||
|     from cStringIO import StringIO | ||||
| else: | ||||
|     from io import StringIO | ||||
|     import struct | ||||
|  | ||||
| try: | ||||
|     import zlib | ||||
|  | ||||
|     def decompress(data): | ||||
|         return zlib.decompress(data) | ||||
|  | ||||
|     def compress(data): | ||||
|         return zlib.compress(data) | ||||
|  | ||||
| except ImportError: | ||||
|     # Unable to import zlib.  Attempt to use the System.IO.Compression | ||||
|     # library from the .NET framework. (IronPython only) | ||||
|     import System | ||||
|     from System import IO, Collections, Array | ||||
|  | ||||
|     def _string_to_bytearr(buf): | ||||
|         retval = Array.CreateInstance(System.Byte, len(buf)) | ||||
|         for i in range(len(buf)): | ||||
|             retval[i] = ord(buf[i]) | ||||
|         return retval | ||||
|  | ||||
|     def _bytearr_to_string(bytes): | ||||
|         retval = "" | ||||
|         for i in range(bytes.Length): | ||||
|             retval += chr(bytes[i]) | ||||
|         return retval | ||||
|  | ||||
|     def _read_bytes(stream): | ||||
|         ms = IO.MemoryStream() | ||||
|         buf = Array.CreateInstance(System.Byte, 2048) | ||||
|         while True: | ||||
|             bytes = stream.Read(buf, 0, buf.Length) | ||||
|             if bytes == 0: | ||||
|                 break | ||||
|             else: | ||||
|                 ms.Write(buf, 0, bytes) | ||||
|         retval = ms.ToArray() | ||||
|         ms.Close() | ||||
|         return retval | ||||
|  | ||||
|     def decompress(data): | ||||
|         bytes = _string_to_bytearr(data) | ||||
|         ms = IO.MemoryStream() | ||||
|         ms.Write(bytes, 0, bytes.Length) | ||||
|         ms.Position = 0  # fseek 0 | ||||
|         gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress) | ||||
|         bytes = _read_bytes(gz) | ||||
|         retval = _bytearr_to_string(bytes) | ||||
|         gz.Close() | ||||
|         return retval | ||||
|  | ||||
|     def compress(data): | ||||
|         bytes = _string_to_bytearr(data) | ||||
|         ms = IO.MemoryStream() | ||||
|         gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True) | ||||
|         gz.Write(bytes, 0, bytes.Length) | ||||
|         gz.Close() | ||||
|         ms.Position = 0 # fseek 0 | ||||
|         bytes = ms.ToArray() | ||||
|         retval = _bytearr_to_string(bytes) | ||||
|         ms.Close() | ||||
|         return retval | ||||
|  | ||||
|  | ||||
| class FlateDecode(object): | ||||
|     def decode(data, decodeParms): | ||||
|         data = decompress(data) | ||||
|         predictor = 1 | ||||
|         if decodeParms: | ||||
|             try: | ||||
|                 predictor = decodeParms.get("/Predictor", 1) | ||||
|             except AttributeError: | ||||
|                 pass    # usually an array with a null object was read | ||||
|  | ||||
|         # predictor 1 == no predictor | ||||
|         if predictor != 1: | ||||
|             columns = decodeParms["/Columns"] | ||||
|             # PNG prediction: | ||||
|             if predictor >= 10 and predictor <= 15: | ||||
|                 output = StringIO() | ||||
|                 # PNG prediction can vary from row to row | ||||
|                 rowlength = columns + 1 | ||||
|                 assert len(data) % rowlength == 0 | ||||
|                 prev_rowdata = (0,) * rowlength | ||||
|                 for row in range(len(data) // rowlength): | ||||
|                     rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]] | ||||
|                     filterByte = rowdata[0] | ||||
|                     if filterByte == 0: | ||||
|                         pass | ||||
|                     elif filterByte == 1: | ||||
|                         for i in range(2, rowlength): | ||||
|                             rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256 | ||||
|                     elif filterByte == 2: | ||||
|                         for i in range(1, rowlength): | ||||
|                             rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256 | ||||
|                     else: | ||||
|                         # unsupported PNG filter | ||||
|                         raise PdfReadError("Unsupported PNG filter %r" % filterByte) | ||||
|                     prev_rowdata = rowdata | ||||
|                     output.write(''.join([chr(x) for x in rowdata[1:]])) | ||||
|                 data = output.getvalue() | ||||
|             else: | ||||
|                 # unsupported predictor | ||||
|                 raise PdfReadError("Unsupported flatedecode predictor %r" % predictor) | ||||
|         return data | ||||
|     decode = staticmethod(decode) | ||||
|  | ||||
|     def encode(data): | ||||
|         return compress(data) | ||||
|     encode = staticmethod(encode) | ||||
|  | ||||
|  | ||||
| class ASCIIHexDecode(object): | ||||
|     def decode(data, decodeParms=None): | ||||
|         retval = "" | ||||
|         char = "" | ||||
|         x = 0 | ||||
|         while True: | ||||
|             c = data[x] | ||||
|             if c == ">": | ||||
|                 break | ||||
|             elif c.isspace(): | ||||
|                 x += 1 | ||||
|                 continue | ||||
|             char += c | ||||
|             if len(char) == 2: | ||||
|                 retval += chr(int(char, base=16)) | ||||
|                 char = "" | ||||
|             x += 1 | ||||
|         assert char == "" | ||||
|         return retval | ||||
|     decode = staticmethod(decode) | ||||
|  | ||||
|  | ||||
| class LZWDecode(object): | ||||
|     """Taken from: | ||||
|     http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm | ||||
|     """ | ||||
|     class decoder(object): | ||||
|         def __init__(self, data): | ||||
|             self.STOP=257 | ||||
|             self.CLEARDICT=256 | ||||
|             self.data=data | ||||
|             self.bytepos=0 | ||||
|             self.bitpos=0 | ||||
|             self.dict=[""]*4096 | ||||
|             for i in range(256): | ||||
|                 self.dict[i]=chr(i) | ||||
|             self.resetDict() | ||||
|  | ||||
|         def resetDict(self): | ||||
|             self.dictlen=258 | ||||
|             self.bitspercode=9 | ||||
|  | ||||
|         def nextCode(self): | ||||
|             fillbits=self.bitspercode | ||||
|             value=0 | ||||
|             while fillbits>0 : | ||||
|                 if self.bytepos >= len(self.data): | ||||
|                     return -1 | ||||
|                 nextbits=ord(self.data[self.bytepos]) | ||||
|                 bitsfromhere=8-self.bitpos | ||||
|                 if bitsfromhere>fillbits: | ||||
|                     bitsfromhere=fillbits | ||||
|                 value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & | ||||
|                            (0xff >> (8-bitsfromhere))) << | ||||
|                           (fillbits-bitsfromhere)) | ||||
|                 fillbits -= bitsfromhere | ||||
|                 self.bitpos += bitsfromhere | ||||
|                 if self.bitpos >=8: | ||||
|                     self.bitpos=0 | ||||
|                     self.bytepos = self.bytepos+1 | ||||
|             return value | ||||
|  | ||||
|         def decode(self): | ||||
|             """ algorithm derived from: | ||||
|             http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html | ||||
|             and the PDFReference | ||||
|             """ | ||||
|             cW = self.CLEARDICT; | ||||
|             baos="" | ||||
|             while True: | ||||
|                 pW = cW; | ||||
|                 cW = self.nextCode(); | ||||
|                 if cW == -1: | ||||
|                     raise PdfReadError("Missed the stop code in LZWDecode!") | ||||
|                 if cW == self.STOP: | ||||
|                     break; | ||||
|                 elif cW == self.CLEARDICT: | ||||
|                     self.resetDict(); | ||||
|                 elif pW == self.CLEARDICT: | ||||
|                     baos+=self.dict[cW] | ||||
|                 else: | ||||
|                     if cW < self.dictlen: | ||||
|                         baos += self.dict[cW] | ||||
|                         p=self.dict[pW]+self.dict[cW][0] | ||||
|                         self.dict[self.dictlen]=p | ||||
|                         self.dictlen+=1 | ||||
|                     else: | ||||
|                         p=self.dict[pW]+self.dict[pW][0] | ||||
|                         baos+=p | ||||
|                         self.dict[self.dictlen] = p; | ||||
|                         self.dictlen+=1 | ||||
|                     if (self.dictlen >= (1 << self.bitspercode) - 1 and | ||||
|                         self.bitspercode < 12): | ||||
|                         self.bitspercode+=1 | ||||
|             return baos | ||||
|  | ||||
|     @staticmethod | ||||
|     def decode(data,decodeParams=None): | ||||
|         return LZWDecode.decoder(data).decode() | ||||
|  | ||||
|  | ||||
| class ASCII85Decode(object): | ||||
|     def decode(data, decodeParms=None): | ||||
|         if version_info < ( 3, 0 ): | ||||
|             retval = "" | ||||
|             group = [] | ||||
|             x = 0 | ||||
|             hitEod = False | ||||
|             # remove all whitespace from data | ||||
|             data = [y for y in data if not (y in ' \n\r\t')] | ||||
|             while not hitEod: | ||||
|                 c = data[x] | ||||
|                 if len(retval) == 0 and c == "<" and data[x+1] == "~": | ||||
|                     x += 2 | ||||
|                     continue | ||||
|                 #elif c.isspace(): | ||||
|                 #    x += 1 | ||||
|                 #    continue | ||||
|                 elif c == 'z': | ||||
|                     assert len(group) == 0 | ||||
|                     retval += '\x00\x00\x00\x00' | ||||
|                     x += 1 | ||||
|                     continue | ||||
|                 elif c == "~" and data[x+1] == ">": | ||||
|                     if len(group) != 0: | ||||
|                         # cannot have a final group of just 1 char | ||||
|                         assert len(group) > 1 | ||||
|                         cnt = len(group) - 1 | ||||
|                         group += [ 85, 85, 85 ] | ||||
|                         hitEod = cnt | ||||
|                     else: | ||||
|                         break | ||||
|                 else: | ||||
|                     c = ord(c) - 33 | ||||
|                     assert c >= 0 and c < 85 | ||||
|                     group += [ c ] | ||||
|                 if len(group) >= 5: | ||||
|                     b = group[0] * (85**4) + \ | ||||
|                         group[1] * (85**3) + \ | ||||
|                         group[2] * (85**2) + \ | ||||
|                         group[3] * 85 + \ | ||||
|                         group[4] | ||||
|                     assert b < (2**32 - 1) | ||||
|                     c4 = chr((b >> 0) % 256) | ||||
|                     c3 = chr((b >> 8) % 256) | ||||
|                     c2 = chr((b >> 16) % 256) | ||||
|                     c1 = chr(b >> 24) | ||||
|                     retval += (c1 + c2 + c3 + c4) | ||||
|                     if hitEod: | ||||
|                         retval = retval[:-4+hitEod] | ||||
|                     group = [] | ||||
|                 x += 1 | ||||
|             return retval | ||||
|         else: | ||||
|             if isinstance(data, str): | ||||
|                 data = data.encode('ascii') | ||||
|             n = b = 0 | ||||
|             out = bytearray() | ||||
|             for c in data: | ||||
|                 if ord('!') <= c and c <= ord('u'): | ||||
|                     n += 1 | ||||
|                     b = b*85+(c-33) | ||||
|                     if n == 5: | ||||
|                         out += struct.pack(b'>L',b) | ||||
|                         n = b = 0 | ||||
|                 elif c == ord('z'): | ||||
|                     assert n == 0 | ||||
|                     out += b'\0\0\0\0' | ||||
|                 elif c == ord('~'): | ||||
|                     if n: | ||||
|                         for _ in range(5-n): | ||||
|                             b = b*85+84 | ||||
|                         out += struct.pack(b'>L',b)[:n-1] | ||||
|                     break | ||||
|             return bytes(out) | ||||
|     decode = staticmethod(decode) | ||||
|  | ||||
|  | ||||
| def decodeStreamData(stream): | ||||
|     from .generic import NameObject | ||||
|     filters = stream.get("/Filter", ()) | ||||
|     if len(filters) and not isinstance(filters[0], NameObject): | ||||
|         # we have a single filter instance | ||||
|         filters = (filters,) | ||||
|     data = stream._data | ||||
|     # If there is not data to decode we should not try to decode the data. | ||||
|     if data: | ||||
|         for filterType in filters: | ||||
|             if filterType == "/FlateDecode" or filterType == "/Fl": | ||||
|                 data = FlateDecode.decode(data, stream.get("/DecodeParms")) | ||||
|             elif filterType == "/ASCIIHexDecode" or filterType == "/AHx": | ||||
|                 data = ASCIIHexDecode.decode(data) | ||||
|             elif filterType == "/LZWDecode" or filterType == "/LZW": | ||||
|                 data = LZWDecode.decode(data, stream.get("/DecodeParms")) | ||||
|             elif filterType == "/ASCII85Decode" or filterType == "/A85": | ||||
|                 data = ASCII85Decode.decode(data) | ||||
|             elif filterType == "/Crypt": | ||||
|                 decodeParams = stream.get("/DecodeParams", {}) | ||||
|                 if "/Name" not in decodeParams and "/Type" not in decodeParams: | ||||
|                     pass | ||||
|                 else: | ||||
|                     raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") | ||||
|             else: | ||||
|                 # unsupported filter | ||||
|                 raise NotImplementedError("unsupported filter %s" % filterType) | ||||
|     return data | ||||
							
								
								
									
										1226
									
								
								vendor/PyPDF2/generic.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										1226
									
								
								vendor/PyPDF2/generic.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										553
									
								
								vendor/PyPDF2/merger.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										553
									
								
								vendor/PyPDF2/merger.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,553 @@ | ||||
| # vim: sw=4:expandtab:foldmethod=marker | ||||
| # | ||||
| # Copyright (c) 2006, Mathieu Fenniak | ||||
| # All rights reserved. | ||||
| # | ||||
| # Redistribution and use in source and binary forms, with or without | ||||
| # modification, are permitted provided that the following conditions are | ||||
| # met: | ||||
| # | ||||
| # * Redistributions of source code must retain the above copyright notice, | ||||
| # this list of conditions and the following disclaimer. | ||||
| # * Redistributions in binary form must reproduce the above copyright notice, | ||||
| # this list of conditions and the following disclaimer in the documentation | ||||
| # and/or other materials provided with the distribution. | ||||
| # * The name of the author may not be used to endorse or promote products | ||||
| # derived from this software without specific prior written permission. | ||||
| # | ||||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
| # POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
| from .generic import * | ||||
| from .utils import isString, str_ | ||||
| from .pdf import PdfFileReader, PdfFileWriter | ||||
| from .pagerange import PageRange | ||||
| from sys import version_info | ||||
| if version_info < ( 3, 0 ): | ||||
|     from cStringIO import StringIO | ||||
|     StreamIO = StringIO | ||||
| else: | ||||
|     from io import BytesIO | ||||
|     from io import FileIO as file | ||||
|     StreamIO = BytesIO | ||||
|  | ||||
|  | ||||
| class _MergedPage(object): | ||||
|     """ | ||||
|     _MergedPage is used internally by PdfFileMerger to collect necessary | ||||
|     information on each page that is being merged. | ||||
|     """ | ||||
|     def __init__(self, pagedata, src, id): | ||||
|         self.src = src | ||||
|         self.pagedata = pagedata | ||||
|         self.out_pagedata = None | ||||
|         self.id = id | ||||
|  | ||||
|  | ||||
| class PdfFileMerger(object): | ||||
|     """ | ||||
|     Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs | ||||
|     into a single PDF. It can concatenate, slice, insert, or any combination | ||||
|     of the above. | ||||
|  | ||||
|     See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`) | ||||
|     and :meth:`write()<write>` for usage information. | ||||
|  | ||||
|     :param bool strict: Determines whether user should be warned of all | ||||
|             problems and also causes some correctable problems to be fatal. | ||||
|             Defaults to ``True``. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, strict=True): | ||||
|         self.inputs = [] | ||||
|         self.pages = [] | ||||
|         self.output = PdfFileWriter() | ||||
|         self.bookmarks = [] | ||||
|         self.named_dests = [] | ||||
|         self.id_count = 0 | ||||
|         self.strict = strict | ||||
|  | ||||
|     def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True): | ||||
|         """ | ||||
|         Merges the pages from the given file into the output file at the | ||||
|         specified page number. | ||||
|  | ||||
|         :param int position: The *page number* to insert this file. File will | ||||
|             be inserted after the given number. | ||||
|  | ||||
|         :param fileobj: A File Object or an object that supports the standard read | ||||
|             and seek methods similar to a File Object. Could also be a | ||||
|             string representing a path to a PDF file. | ||||
|  | ||||
|         :param str bookmark: Optionally, you may specify a bookmark to be applied at | ||||
|             the beginning of the included file by supplying the text of the bookmark. | ||||
|  | ||||
|         :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple | ||||
|             to merge only the specified range of pages from the source | ||||
|             document into the output document. | ||||
|  | ||||
|         :param bool import_bookmarks: You may prevent the source document's bookmarks | ||||
|             from being imported by specifying this as ``False``. | ||||
|         """ | ||||
|  | ||||
|         # This parameter is passed to self.inputs.append and means | ||||
|         # that the stream used was created in this method. | ||||
|         my_file = False | ||||
|  | ||||
|         # If the fileobj parameter is a string, assume it is a path | ||||
|         # and create a file object at that location. If it is a file, | ||||
|         # copy the file's contents into a BytesIO (or StreamIO) stream object; if | ||||
|         # it is a PdfFileReader, copy that reader's stream into a | ||||
|         # BytesIO (or StreamIO) stream. | ||||
|         # If fileobj is none of the above types, it is not modified | ||||
|         decryption_key = None | ||||
|         if isString(fileobj): | ||||
|             fileobj = file(fileobj, 'rb') | ||||
|             my_file = True | ||||
|         elif isinstance(fileobj, file): | ||||
|             fileobj.seek(0) | ||||
|             filecontent = fileobj.read() | ||||
|             fileobj = StreamIO(filecontent) | ||||
|             my_file = True | ||||
|         elif isinstance(fileobj, PdfFileReader): | ||||
|             orig_tell = fileobj.stream.tell() | ||||
|             fileobj.stream.seek(0) | ||||
|             filecontent = StreamIO(fileobj.stream.read()) | ||||
|             fileobj.stream.seek(orig_tell) # reset the stream to its original location | ||||
|             fileobj = filecontent | ||||
|             if hasattr(fileobj, '_decryption_key'): | ||||
|                 decryption_key = fileobj._decryption_key | ||||
|             my_file = True | ||||
|  | ||||
|         # Create a new PdfFileReader instance using the stream | ||||
|         # (either file or BytesIO or StringIO) created above | ||||
|         pdfr = PdfFileReader(fileobj, strict=self.strict) | ||||
|         if decryption_key is not None: | ||||
|             pdfr._decryption_key = decryption_key | ||||
|  | ||||
|         # Find the range of pages to merge. | ||||
|         if pages == None: | ||||
|             pages = (0, pdfr.getNumPages()) | ||||
|         elif isinstance(pages, PageRange): | ||||
|             pages = pages.indices(pdfr.getNumPages()) | ||||
|         elif not isinstance(pages, tuple): | ||||
|             raise TypeError('"pages" must be a tuple of (start, stop[, step])') | ||||
|  | ||||
|         srcpages = [] | ||||
|         if bookmark: | ||||
|             bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit')) | ||||
|  | ||||
|         outline = [] | ||||
|         if import_bookmarks: | ||||
|             outline = pdfr.getOutlines() | ||||
|             outline = self._trim_outline(pdfr, outline, pages) | ||||
|  | ||||
|         if bookmark: | ||||
|             self.bookmarks += [bookmark, outline] | ||||
|         else: | ||||
|             self.bookmarks += outline | ||||
|  | ||||
|         dests = pdfr.namedDestinations | ||||
|         dests = self._trim_dests(pdfr, dests, pages) | ||||
|         self.named_dests += dests | ||||
|  | ||||
|         # Gather all the pages that are going to be merged | ||||
|         for i in range(*pages): | ||||
|             pg = pdfr.getPage(i) | ||||
|  | ||||
|             id = self.id_count | ||||
|             self.id_count += 1 | ||||
|  | ||||
|             mp = _MergedPage(pg, pdfr, id) | ||||
|  | ||||
|             srcpages.append(mp) | ||||
|  | ||||
|         self._associate_dests_to_pages(srcpages) | ||||
|         self._associate_bookmarks_to_pages(srcpages) | ||||
|  | ||||
|         # Slice to insert the pages at the specified position | ||||
|         self.pages[position:position] = srcpages | ||||
|  | ||||
|         # Keep track of our input files so we can close them later | ||||
|         self.inputs.append((fileobj, pdfr, my_file)) | ||||
|  | ||||
|     def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True): | ||||
|         """ | ||||
|         Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate | ||||
|         all pages onto the end of the file instead of specifying a position. | ||||
|  | ||||
|         :param fileobj: A File Object or an object that supports the standard read | ||||
|             and seek methods similar to a File Object. Could also be a | ||||
|             string representing a path to a PDF file. | ||||
|  | ||||
|         :param str bookmark: Optionally, you may specify a bookmark to be applied at | ||||
|             the beginning of the included file by supplying the text of the bookmark. | ||||
|  | ||||
|         :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple | ||||
|             to merge only the specified range of pages from the source | ||||
|             document into the output document. | ||||
|  | ||||
|         :param bool import_bookmarks: You may prevent the source document's bookmarks | ||||
|             from being imported by specifying this as ``False``. | ||||
|         """ | ||||
|  | ||||
|         self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks) | ||||
|  | ||||
|     def write(self, fileobj): | ||||
|         """ | ||||
|         Writes all data that has been merged to the given output file. | ||||
|  | ||||
|         :param fileobj: Output file. Can be a filename or any kind of | ||||
|             file-like object. | ||||
|         """ | ||||
|         my_file = False | ||||
|         if isString(fileobj): | ||||
|             fileobj = file(fileobj, 'wb') | ||||
|             my_file = True | ||||
|  | ||||
|         # Add pages to the PdfFileWriter | ||||
|         # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13 | ||||
|         for page in self.pages: | ||||
|             self.output.addPage(page.pagedata) | ||||
|             page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject()) | ||||
|             #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1 | ||||
|             #page.out_pagedata = IndirectObject(idnum, 0, self.output) | ||||
|  | ||||
|         # Once all pages are added, create bookmarks to point at those pages | ||||
|         self._write_dests() | ||||
|         self._write_bookmarks() | ||||
|  | ||||
|         # Write the output to the file | ||||
|         self.output.write(fileobj) | ||||
|  | ||||
|         if my_file: | ||||
|             fileobj.close() | ||||
|  | ||||
|     def close(self): | ||||
|         """ | ||||
|         Shuts all file descriptors (input and output) and clears all memory | ||||
|         usage. | ||||
|         """ | ||||
|         self.pages = [] | ||||
|         for fo, pdfr, mine in self.inputs: | ||||
|             if mine: | ||||
|                 fo.close() | ||||
|  | ||||
|         self.inputs = [] | ||||
|         self.output = None | ||||
|  | ||||
|     def addMetadata(self, infos): | ||||
|         """ | ||||
|         Add custom metadata to the output. | ||||
|  | ||||
|         :param dict infos: a Python dictionary where each key is a field | ||||
|             and each value is your new metadata. | ||||
|             Example: ``{u'/Title': u'My title'}`` | ||||
|         """ | ||||
|         self.output.addMetadata(infos) | ||||
|  | ||||
|     def setPageLayout(self, layout): | ||||
|         """ | ||||
|         Set the page layout | ||||
|  | ||||
|         :param str layout: The page layout to be used | ||||
|  | ||||
|         Valid layouts are: | ||||
|              /NoLayout        Layout explicitly not specified | ||||
|              /SinglePage      Show one page at a time | ||||
|              /OneColumn       Show one column at a time | ||||
|              /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left | ||||
|              /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right | ||||
|              /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left | ||||
|              /TwoPageRight    Show two pages at a time, odd-numbered pages on the right | ||||
|         """ | ||||
|         self.output.setPageLayout(layout) | ||||
|  | ||||
|     def setPageMode(self, mode): | ||||
|         """ | ||||
|         Set the page mode. | ||||
|  | ||||
|         :param str mode: The page mode to use. | ||||
|  | ||||
|         Valid modes are: | ||||
|             /UseNone         Do not show outlines or thumbnails panels | ||||
|             /UseOutlines     Show outlines (aka bookmarks) panel | ||||
|             /UseThumbs       Show page thumbnails panel | ||||
|             /FullScreen      Fullscreen view | ||||
|             /UseOC           Show Optional Content Group (OCG) panel | ||||
|             /UseAttachments  Show attachments panel | ||||
|         """ | ||||
|         self.output.setPageMode(mode) | ||||
|  | ||||
|     def _trim_dests(self, pdf, dests, pages): | ||||
|         """ | ||||
|         Removes any named destinations that are not a part of the specified | ||||
|         page set. | ||||
|         """ | ||||
|         new_dests = [] | ||||
|         prev_header_added = True | ||||
|         for k, o in list(dests.items()): | ||||
|             for j in range(*pages): | ||||
|                 if pdf.getPage(j).getObject() == o['/Page'].getObject(): | ||||
|                     o[NameObject('/Page')] = o['/Page'].getObject() | ||||
|                     assert str_(k) == str_(o['/Title']) | ||||
|                     new_dests.append(o) | ||||
|                     break | ||||
|         return new_dests | ||||
|  | ||||
|     def _trim_outline(self, pdf, outline, pages): | ||||
|         """ | ||||
|         Removes any outline/bookmark entries that are not a part of the | ||||
|         specified page set. | ||||
|         """ | ||||
|         new_outline = [] | ||||
|         prev_header_added = True | ||||
|         for i, o in enumerate(outline): | ||||
|             if isinstance(o, list): | ||||
|                 sub = self._trim_outline(pdf, o, pages) | ||||
|                 if sub: | ||||
|                     if not prev_header_added: | ||||
|                         new_outline.append(outline[i-1]) | ||||
|                     new_outline.append(sub) | ||||
|             else: | ||||
|                 prev_header_added = False | ||||
|                 for j in range(*pages): | ||||
|                     if pdf.getPage(j).getObject() == o['/Page'].getObject(): | ||||
|                         o[NameObject('/Page')] = o['/Page'].getObject() | ||||
|                         new_outline.append(o) | ||||
|                         prev_header_added = True | ||||
|                         break | ||||
|         return new_outline | ||||
|  | ||||
|     def _write_dests(self): | ||||
|         dests = self.named_dests | ||||
|  | ||||
|         for v in dests: | ||||
|             pageno = None | ||||
|             pdf = None | ||||
|             if '/Page' in v: | ||||
|                 for i, p in enumerate(self.pages): | ||||
|                     if p.id == v['/Page']: | ||||
|                         v[NameObject('/Page')] = p.out_pagedata | ||||
|                         pageno = i | ||||
|                         pdf = p.src | ||||
|                         break | ||||
|             if pageno != None: | ||||
|                 self.output.addNamedDestinationObject(v) | ||||
|  | ||||
|     def _write_bookmarks(self, bookmarks=None, parent=None): | ||||
|  | ||||
|         if bookmarks == None: | ||||
|             bookmarks = self.bookmarks | ||||
|  | ||||
|         last_added = None | ||||
|         for b in bookmarks: | ||||
|             if isinstance(b, list): | ||||
|                 self._write_bookmarks(b, last_added) | ||||
|                 continue | ||||
|  | ||||
|             pageno = None | ||||
|             pdf = None | ||||
|             if '/Page' in b: | ||||
|                 for i, p in enumerate(self.pages): | ||||
|                     if p.id == b['/Page']: | ||||
|                         #b[NameObject('/Page')] = p.out_pagedata | ||||
|                         args = [NumberObject(p.id), NameObject(b['/Type'])] | ||||
|                         #nothing more to add | ||||
|                         #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB' | ||||
|                         if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH': | ||||
|                             if '/Top' in b and not isinstance(b['/Top'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Top'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             del b['/Top'] | ||||
|                         elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV': | ||||
|                             if '/Left' in b and not isinstance(b['/Left'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Left'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             del b['/Left'] | ||||
|                         elif b['/Type'] == '/XYZ': | ||||
|                             if '/Left' in b and not isinstance(b['/Left'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Left'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             if '/Top' in b and not isinstance(b['/Top'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Top'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Zoom'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             del b['/Top'], b['/Zoom'], b['/Left'] | ||||
|                         elif b['/Type'] == '/FitR': | ||||
|                             if '/Left' in b and not isinstance(b['/Left'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Left'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Bottom'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             if '/Right' in b and not isinstance(b['/Right'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Right'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             if '/Top' in b and not isinstance(b['/Top'], NullObject): | ||||
|                                 args.append(FloatObject(b['/Top'])) | ||||
|                             else: | ||||
|                                 args.append(FloatObject(0)) | ||||
|                             del b['/Left'], b['/Right'], b['/Bottom'], b['/Top'] | ||||
|  | ||||
|                         b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)}) | ||||
|  | ||||
|                         pageno = i | ||||
|                         pdf = p.src | ||||
|                         break | ||||
|             if pageno != None: | ||||
|                 del b['/Page'], b['/Type'] | ||||
|                 last_added = self.output.addBookmarkDict(b, parent) | ||||
|  | ||||
|     def _associate_dests_to_pages(self, pages): | ||||
|         for nd in self.named_dests: | ||||
|             pageno = None | ||||
|             np = nd['/Page'] | ||||
|  | ||||
|             if isinstance(np, NumberObject): | ||||
|                 continue | ||||
|  | ||||
|             for p in pages: | ||||
|                 if np.getObject() == p.pagedata.getObject(): | ||||
|                     pageno = p.id | ||||
|  | ||||
|             if pageno != None: | ||||
|                 nd[NameObject('/Page')] = NumberObject(pageno) | ||||
|             else: | ||||
|                 raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],)) | ||||
|  | ||||
|     def _associate_bookmarks_to_pages(self, pages, bookmarks=None): | ||||
|         if bookmarks == None: | ||||
|             bookmarks = self.bookmarks | ||||
|  | ||||
|         for b in bookmarks: | ||||
|             if isinstance(b, list): | ||||
|                 self._associate_bookmarks_to_pages(pages, b) | ||||
|                 continue | ||||
|  | ||||
|             pageno = None | ||||
|             bp = b['/Page'] | ||||
|  | ||||
|             if isinstance(bp, NumberObject): | ||||
|                 continue | ||||
|  | ||||
|             for p in pages: | ||||
|                 if bp.getObject() == p.pagedata.getObject(): | ||||
|                     pageno = p.id | ||||
|  | ||||
|             if pageno != None: | ||||
|                 b[NameObject('/Page')] = NumberObject(pageno) | ||||
|             else: | ||||
|                 raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],)) | ||||
|  | ||||
|     def findBookmark(self, bookmark, root=None): | ||||
|         if root == None: | ||||
|             root = self.bookmarks | ||||
|  | ||||
|         for i, b in enumerate(root): | ||||
|             if isinstance(b, list): | ||||
|                 res = self.findBookmark(bookmark, b) | ||||
|                 if res: | ||||
|                     return [i] + res | ||||
|             elif b == bookmark or b['/Title'] == bookmark: | ||||
|                 return [i] | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def addBookmark(self, title, pagenum, parent=None): | ||||
|         """ | ||||
|         Add a bookmark to this PDF file. | ||||
|  | ||||
|         :param str title: Title to use for this bookmark. | ||||
|         :param int pagenum: Page number this bookmark will point to. | ||||
|         :param parent: A reference to a parent bookmark to create nested | ||||
|             bookmarks. | ||||
|         """ | ||||
|         if parent == None: | ||||
|             iloc = [len(self.bookmarks)-1] | ||||
|         elif isinstance(parent, list): | ||||
|             iloc = parent | ||||
|         else: | ||||
|             iloc = self.findBookmark(parent) | ||||
|  | ||||
|         dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) | ||||
|  | ||||
|         if parent == None: | ||||
|             self.bookmarks.append(dest) | ||||
|         else: | ||||
|             bmparent = self.bookmarks | ||||
|             for i in iloc[:-1]: | ||||
|                 bmparent = bmparent[i] | ||||
|             npos = iloc[-1]+1 | ||||
|             if npos < len(bmparent) and isinstance(bmparent[npos], list): | ||||
|                 bmparent[npos].append(dest) | ||||
|             else: | ||||
|                 bmparent.insert(npos, [dest]) | ||||
|         return dest | ||||
|  | ||||
|     def addNamedDestination(self, title, pagenum): | ||||
|         """ | ||||
|         Add a destination to the output. | ||||
|  | ||||
|         :param str title: Title to use | ||||
|         :param int pagenum: Page number this destination points at. | ||||
|         """ | ||||
|  | ||||
|         dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) | ||||
|         self.named_dests.append(dest) | ||||
|  | ||||
|  | ||||
| class OutlinesObject(list): | ||||
|     def __init__(self, pdf, tree, parent=None): | ||||
|         list.__init__(self) | ||||
|         self.tree = tree | ||||
|         self.pdf = pdf | ||||
|         self.parent = parent | ||||
|  | ||||
|     def remove(self, index): | ||||
|         obj = self[index] | ||||
|         del self[index] | ||||
|         self.tree.removeChild(obj) | ||||
|  | ||||
|     def add(self, title, pagenum): | ||||
|         pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum] | ||||
|         action = DictionaryObject() | ||||
|         action.update({ | ||||
|             NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]), | ||||
|             NameObject('/S') : NameObject('/GoTo') | ||||
|         }) | ||||
|         actionRef = self.pdf._addObject(action) | ||||
|         bookmark = TreeObject() | ||||
|  | ||||
|         bookmark.update({ | ||||
|             NameObject('/A'): actionRef, | ||||
|             NameObject('/Title'): createStringObject(title), | ||||
|         }) | ||||
|  | ||||
|         self.pdf._addObject(bookmark) | ||||
|  | ||||
|         self.tree.addChild(bookmark) | ||||
|  | ||||
|     def removeAll(self): | ||||
|         for child in [x for x in self.tree.children()]: | ||||
|             self.tree.removeChild(child) | ||||
|             self.pop() | ||||
							
								
								
									
										152
									
								
								vendor/PyPDF2/pagerange.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										152
									
								
								vendor/PyPDF2/pagerange.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| #!/usr/bin/env python | ||||
| """ | ||||
| Representation and utils for ranges of PDF file pages. | ||||
|  | ||||
| Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>. | ||||
| All rights reserved. This software is available under a BSD license; | ||||
| see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE | ||||
| """ | ||||
|  | ||||
| import re | ||||
| from .utils import isString | ||||
|  | ||||
| _INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0". | ||||
| PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) | ||||
| # groups:         12     34     5 6     7 8 | ||||
|  | ||||
|  | ||||
| class ParseError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| PAGE_RANGE_HELP = """Remember, page indices start with zero. | ||||
|         Page range expression examples: | ||||
|             :     all pages.                   -1    last page. | ||||
|             22    just the 23rd page.          :-1   all but the last page. | ||||
|             0:3   the first three pages.       -2    second-to-last page. | ||||
|             :3    the first three pages.       -2:   last two pages. | ||||
|             5:    from the sixth page onward.  -3:-1 third & second to last. | ||||
|         The third, "stride" or "step" number is also recognized. | ||||
|             ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0. | ||||
|             1:10:2    1 3 5 7 9                2::-1     2 1 0. | ||||
|             ::-1      all pages in reverse order. | ||||
| """ | ||||
|  | ||||
|  | ||||
| class PageRange(object): | ||||
|     """ | ||||
|     A slice-like representation of a range of page indices, | ||||
|         i.e. page numbers, only starting at zero. | ||||
|     The syntax is like what you would put between brackets [ ]. | ||||
|     The slice is one of the few Python types that can't be subclassed, | ||||
|     but this class converts to and from slices, and allows similar use. | ||||
|       o  PageRange(str) parses a string representing a page range. | ||||
|       o  PageRange(slice) directly "imports" a slice. | ||||
|       o  to_slice() gives the equivalent slice. | ||||
|       o  str() and repr() allow printing. | ||||
|       o  indices(n) is like slice.indices(n). | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, arg): | ||||
|         """ | ||||
|         Initialize with either a slice -- giving the equivalent page range, | ||||
|         or a PageRange object -- making a copy, | ||||
|         or a string like | ||||
|             "int", "[int]:[int]" or "[int]:[int]:[int]", | ||||
|             where the brackets indicate optional ints. | ||||
|         {page_range_help} | ||||
|         Note the difference between this notation and arguments to slice(): | ||||
|             slice(3) means the first three pages; | ||||
|             PageRange("3") means the range of only the fourth page. | ||||
|             However PageRange(slice(3)) means the first three pages. | ||||
|         """ | ||||
|         if isinstance(arg, slice): | ||||
|             self._slice = arg | ||||
|             return | ||||
|  | ||||
|         if isinstance(arg, PageRange): | ||||
|             self._slice = arg.to_slice() | ||||
|             return | ||||
|  | ||||
|         m = isString(arg) and re.match(PAGE_RANGE_RE, arg) | ||||
|         if not m: | ||||
|             raise ParseError(arg) | ||||
|         elif m.group(2): | ||||
|             # Special case: just an int means a range of one page. | ||||
|             start = int(m.group(2)) | ||||
|             stop = start + 1 if start != -1 else None | ||||
|             self._slice = slice(start, stop) | ||||
|         else: | ||||
|             self._slice = slice(*[int(g) if g else None | ||||
|                                   for g in m.group(4, 6, 8)]) | ||||
|  | ||||
|     # Just formatting this when there is __doc__ for __init__ | ||||
|     if __init__.__doc__: | ||||
|         __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP) | ||||
|  | ||||
|     @staticmethod | ||||
|     def valid(input): | ||||
|         """ True if input is a valid initializer for a PageRange. """ | ||||
|         return isinstance(input, slice)  or \ | ||||
|                isinstance(input, PageRange) or \ | ||||
|                (isString(input) | ||||
|                 and bool(re.match(PAGE_RANGE_RE, input))) | ||||
|  | ||||
|     def to_slice(self): | ||||
|         """ Return the slice equivalent of this page range. """ | ||||
|         return self._slice | ||||
|  | ||||
|     def __str__(self): | ||||
|         """ A string like "1:2:3". """ | ||||
|         s = self._slice | ||||
|         if s.step == None: | ||||
|             if s.start != None  and  s.stop == s.start + 1: | ||||
|                 return str(s.start) | ||||
|  | ||||
|             indices = s.start, s.stop | ||||
|         else: | ||||
|             indices = s.start, s.stop, s.step | ||||
|         return ':'.join("" if i == None else str(i) for i in indices) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         """ A string like "PageRange('1:2:3')". """ | ||||
|         return "PageRange(" + repr(str(self)) + ")" | ||||
|  | ||||
|     def indices(self, n): | ||||
|         """ | ||||
|         n is the length of the list of pages to choose from. | ||||
|         Returns arguments for range().  See help(slice.indices). | ||||
|         """ | ||||
|         return self._slice.indices(n) | ||||
|  | ||||
|  | ||||
| PAGE_RANGE_ALL = PageRange(":")  # The range of all pages. | ||||
|  | ||||
|  | ||||
| def parse_filename_page_ranges(args): | ||||
|     """ | ||||
|     Given a list of filenames and page ranges, return a list of | ||||
|     (filename, page_range) pairs. | ||||
|     First arg must be a filename; other ags are filenames, page-range | ||||
|     expressions, slice objects, or PageRange objects. | ||||
|     A filename not followed by a page range indicates all pages of the file. | ||||
|     """ | ||||
|     pairs = [] | ||||
|     pdf_filename = None | ||||
|     did_page_range = False | ||||
|     for arg in args + [None]: | ||||
|         if PageRange.valid(arg): | ||||
|             if not pdf_filename: | ||||
|                 raise ValueError("The first argument must be a filename, " \ | ||||
|                                  "not a page range.") | ||||
|  | ||||
|             pairs.append( (pdf_filename, PageRange(arg)) ) | ||||
|             did_page_range = True | ||||
|         else: | ||||
|             # New filename or end of list--do all of the previous file? | ||||
|             if pdf_filename and not did_page_range: | ||||
|                 pairs.append( (pdf_filename, PAGE_RANGE_ALL) ) | ||||
|  | ||||
|             pdf_filename = arg | ||||
|             did_page_range = False | ||||
|     return pairs | ||||
							
								
								
									
										3004
									
								
								vendor/PyPDF2/pdf.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										3004
									
								
								vendor/PyPDF2/pdf.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										295
									
								
								vendor/PyPDF2/utils.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										295
									
								
								vendor/PyPDF2/utils.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,295 @@ | ||||
| # Copyright (c) 2006, Mathieu Fenniak | ||||
| # All rights reserved. | ||||
| # | ||||
| # Redistribution and use in source and binary forms, with or without | ||||
| # modification, are permitted provided that the following conditions are | ||||
| # met: | ||||
| # | ||||
| # * Redistributions of source code must retain the above copyright notice, | ||||
| # this list of conditions and the following disclaimer. | ||||
| # * Redistributions in binary form must reproduce the above copyright notice, | ||||
| # this list of conditions and the following disclaimer in the documentation | ||||
| # and/or other materials provided with the distribution. | ||||
| # * The name of the author may not be used to endorse or promote products | ||||
| # derived from this software without specific prior written permission. | ||||
| # | ||||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
| # POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
| """ | ||||
| Utility functions for PDF library. | ||||
| """ | ||||
| __author__ = "Mathieu Fenniak" | ||||
| __author_email__ = "biziqe@mathieu.fenniak.net" | ||||
|  | ||||
|  | ||||
| import sys | ||||
|  | ||||
| try: | ||||
|     import __builtin__ as builtins | ||||
| except ImportError:  # Py3 | ||||
|     import builtins | ||||
|  | ||||
|  | ||||
| xrange_fn = getattr(builtins, "xrange", range) | ||||
| _basestring = getattr(builtins, "basestring", str) | ||||
|  | ||||
| bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X | ||||
| string_type = getattr(builtins, "unicode", str) | ||||
| int_types = (int, long) if sys.version_info[0] < 3 else (int,) | ||||
|  | ||||
|  | ||||
| # Make basic type tests more consistent | ||||
| def isString(s): | ||||
|     """Test if arg is a string. Compatible with Python 2 and 3.""" | ||||
|     return isinstance(s, _basestring) | ||||
|  | ||||
|  | ||||
| def isInt(n): | ||||
|     """Test if arg is an int. Compatible with Python 2 and 3.""" | ||||
|     return isinstance(n, int_types) | ||||
|  | ||||
|  | ||||
| def isBytes(b): | ||||
|     """Test if arg is a bytes instance. Compatible with Python 2 and 3.""" | ||||
|     return isinstance(b, bytes_type) | ||||
|  | ||||
|  | ||||
| #custom implementation of warnings.formatwarning | ||||
| def formatWarning(message, category, filename, lineno, line=None): | ||||
|     file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name | ||||
|     return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno) | ||||
|  | ||||
|  | ||||
| def readUntilWhitespace(stream, maxchars=None): | ||||
|     """ | ||||
|     Reads non-whitespace characters and returns them. | ||||
|     Stops upon encountering whitespace or when maxchars is reached. | ||||
|     """ | ||||
|     txt = b_("") | ||||
|     while True: | ||||
|         tok = stream.read(1) | ||||
|         if tok.isspace() or not tok: | ||||
|             break | ||||
|         txt += tok | ||||
|         if len(txt) == maxchars: | ||||
|             break | ||||
|     return txt | ||||
|  | ||||
|  | ||||
| def readNonWhitespace(stream): | ||||
|     """ | ||||
|     Finds and reads the next non-whitespace character (ignores whitespace). | ||||
|     """ | ||||
|     tok = WHITESPACES[0] | ||||
|     while tok in WHITESPACES: | ||||
|         tok = stream.read(1) | ||||
|     return tok | ||||
|  | ||||
|  | ||||
| def skipOverWhitespace(stream): | ||||
|     """ | ||||
|     Similar to readNonWhitespace, but returns a Boolean if more than | ||||
|     one whitespace character was read. | ||||
|     """ | ||||
|     tok = WHITESPACES[0] | ||||
|     cnt = 0; | ||||
|     while tok in WHITESPACES: | ||||
|         tok = stream.read(1) | ||||
|         cnt+=1 | ||||
|     return (cnt > 1) | ||||
|  | ||||
|  | ||||
| def skipOverComment(stream): | ||||
|     tok = stream.read(1) | ||||
|     stream.seek(-1, 1) | ||||
|     if tok == b_('%'): | ||||
|         while tok not in (b_('\n'), b_('\r')): | ||||
|             tok = stream.read(1) | ||||
|  | ||||
|  | ||||
| def readUntilRegex(stream, regex, ignore_eof=False): | ||||
|     """ | ||||
|     Reads until the regular expression pattern matched (ignore the match) | ||||
|     Raise PdfStreamError on premature end-of-file. | ||||
|     :param bool ignore_eof: If true, ignore end-of-line and return immediately | ||||
|     """ | ||||
|     name = b_('') | ||||
|     while True: | ||||
|         tok = stream.read(16) | ||||
|         if not tok: | ||||
|             # stream has truncated prematurely | ||||
|             if ignore_eof == True: | ||||
|                 return name | ||||
|             else: | ||||
|                 raise PdfStreamError("Stream has ended unexpectedly") | ||||
|         m = regex.search(tok) | ||||
|         if m is not None: | ||||
|             name += tok[:m.start()] | ||||
|             stream.seek(m.start()-len(tok), 1) | ||||
|             break | ||||
|         name += tok | ||||
|     return name | ||||
|  | ||||
|  | ||||
| class ConvertFunctionsToVirtualList(object): | ||||
|     def __init__(self, lengthFunction, getFunction): | ||||
|         self.lengthFunction = lengthFunction | ||||
|         self.getFunction = getFunction | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.lengthFunction() | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         if isinstance(index, slice): | ||||
|             indices = xrange_fn(*index.indices(len(self))) | ||||
|             cls = type(self) | ||||
|             return cls(indices.__len__, lambda idx: self[indices[idx]]) | ||||
|         if not isInt(index): | ||||
|             raise TypeError("sequence indices must be integers") | ||||
|         len_self = len(self) | ||||
|         if index < 0: | ||||
|             # support negative indexes | ||||
|             index = len_self + index | ||||
|         if index < 0 or index >= len_self: | ||||
|             raise IndexError("sequence index out of range") | ||||
|         return self.getFunction(index) | ||||
|  | ||||
|  | ||||
| def RC4_encrypt(key, plaintext): | ||||
|     S = [i for i in range(256)] | ||||
|     j = 0 | ||||
|     for i in range(256): | ||||
|         j = (j + S[i] + ord_(key[i % len(key)])) % 256 | ||||
|         S[i], S[j] = S[j], S[i] | ||||
|     i, j = 0, 0 | ||||
|     retval = b_("") | ||||
|     for x in range(len(plaintext)): | ||||
|         i = (i + 1) % 256 | ||||
|         j = (j + S[i]) % 256 | ||||
|         S[i], S[j] = S[j], S[i] | ||||
|         t = S[(S[i] + S[j]) % 256] | ||||
|         retval += b_(chr(ord_(plaintext[x]) ^ t)) | ||||
|     return retval | ||||
|  | ||||
|  | ||||
| def matrixMultiply(a, b): | ||||
|     return [[sum([float(i)*float(j) | ||||
|                   for i, j in zip(row, col)] | ||||
|                 ) for col in zip(*b)] | ||||
|             for row in a] | ||||
|  | ||||
|  | ||||
| def markLocation(stream): | ||||
|     """Creates text file showing current location in context.""" | ||||
|     # Mainly for debugging | ||||
|     RADIUS = 5000 | ||||
|     stream.seek(-RADIUS, 1) | ||||
|     outputDoc = open('PyPDF2_pdfLocation.txt', 'w') | ||||
|     outputDoc.write(stream.read(RADIUS)) | ||||
|     outputDoc.write('HERE') | ||||
|     outputDoc.write(stream.read(RADIUS)) | ||||
|     outputDoc.close() | ||||
|     stream.seek(-RADIUS, 1) | ||||
|  | ||||
|  | ||||
| class PyPdfError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PdfReadError(PyPdfError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PageSizeNotDefinedError(PyPdfError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PdfReadWarning(UserWarning): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PdfStreamError(PdfReadError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| if sys.version_info[0] < 3: | ||||
|     def b_(s): | ||||
|         return s | ||||
| else: | ||||
|     B_CACHE = {} | ||||
|  | ||||
|     def b_(s): | ||||
|         bc = B_CACHE | ||||
|         if s in bc: | ||||
|             return bc[s] | ||||
|         if type(s) == bytes: | ||||
|             return s | ||||
|         else: | ||||
|             r = s.encode('latin-1') | ||||
|             if len(s) < 2: | ||||
|                 bc[s] = r | ||||
|             return r | ||||
|  | ||||
|  | ||||
| def u_(s): | ||||
|     if sys.version_info[0] < 3: | ||||
|         return unicode(s, 'unicode_escape') | ||||
|     else: | ||||
|         return s | ||||
|  | ||||
|  | ||||
| def str_(b): | ||||
|     if sys.version_info[0] < 3: | ||||
|         return b | ||||
|     else: | ||||
|         if type(b) == bytes: | ||||
|             return b.decode('latin-1') | ||||
|         else: | ||||
|             return b | ||||
|  | ||||
|  | ||||
| def ord_(b): | ||||
|     if sys.version_info[0] < 3 or type(b) == str: | ||||
|         return ord(b) | ||||
|     else: | ||||
|         return b | ||||
|  | ||||
|  | ||||
| def chr_(c): | ||||
|     if sys.version_info[0] < 3: | ||||
|         return c | ||||
|     else: | ||||
|         return chr(c) | ||||
|  | ||||
|  | ||||
| def barray(b): | ||||
|     if sys.version_info[0] < 3: | ||||
|         return b | ||||
|     else: | ||||
|         return bytearray(b) | ||||
|  | ||||
|  | ||||
| def hexencode(b): | ||||
|     if sys.version_info[0] < 3: | ||||
|         return b.encode('hex') | ||||
|     else: | ||||
|         import codecs | ||||
|         coder = codecs.getencoder('hex_codec') | ||||
|         return coder(b)[0] | ||||
|  | ||||
|  | ||||
| def hexStr(num): | ||||
|     return hex(num).replace('L', '') | ||||
|  | ||||
|  | ||||
| WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']] | ||||
							
								
								
									
										358
									
								
								vendor/PyPDF2/xmp.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										358
									
								
								vendor/PyPDF2/xmp.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @@ -0,0 +1,358 @@ | ||||
| import re | ||||
| import datetime | ||||
| import decimal | ||||
| from .generic import PdfObject | ||||
| from xml.dom import getDOMImplementation | ||||
| from xml.dom.minidom import parseString | ||||
| from .utils import u_ | ||||
|  | ||||
| RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | ||||
| DC_NAMESPACE = "http://purl.org/dc/elements/1.1/" | ||||
| XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/" | ||||
| PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/" | ||||
| XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/" | ||||
|  | ||||
| # What is the PDFX namespace, you might ask?  I might ask that too.  It's | ||||
| # a completely undocumented namespace used to place "custom metadata" | ||||
| # properties, which are arbitrary metadata properties with no semantic or | ||||
| # documented meaning.  Elements in the namespace are key/value-style storage, | ||||
| # where the element name is the key and the content is the value.  The keys | ||||
| # are transformed into valid XML identifiers by substituting an invalid | ||||
| # identifier character with \u2182 followed by the unicode hex ID of the | ||||
| # original character.  A key like "my car" is therefore "my\u21820020car". | ||||
| # | ||||
| # \u2182, in case you're wondering, is the unicode character | ||||
| # \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for | ||||
| # escaping characters. | ||||
| # | ||||
| # Intentional users of the pdfx namespace should be shot on sight.  A | ||||
| # custom data schema and sensical XML elements could be used instead, as is | ||||
| # suggested by Adobe's own documentation on XMP (under "Extensibility of | ||||
| # Schemas"). | ||||
| # | ||||
| # Information presented here on the /pdfx/ schema is a result of limited | ||||
| # reverse engineering, and does not constitute a full specification. | ||||
| PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/" | ||||
|  | ||||
| iso8601 = re.compile(""" | ||||
|         (?P<year>[0-9]{4}) | ||||
|         (- | ||||
|             (?P<month>[0-9]{2}) | ||||
|             (- | ||||
|                 (?P<day>[0-9]+) | ||||
|                 (T | ||||
|                     (?P<hour>[0-9]{2}): | ||||
|                     (?P<minute>[0-9]{2}) | ||||
|                     (:(?P<second>[0-9]{2}(.[0-9]+)?))? | ||||
|                     (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2}) | ||||
|                 )? | ||||
|             )? | ||||
|         )? | ||||
|         """, re.VERBOSE) | ||||
|  | ||||
|  | ||||
| class XmpInformation(PdfObject): | ||||
|     """ | ||||
|     An object that represents Adobe XMP metadata. | ||||
|     Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>` | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, stream): | ||||
|         self.stream = stream | ||||
|         docRoot = parseString(self.stream.getData()) | ||||
|         self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0] | ||||
|         self.cache = {} | ||||
|  | ||||
|     def writeToStream(self, stream, encryption_key): | ||||
|         self.stream.writeToStream(stream, encryption_key) | ||||
|  | ||||
|     def getElement(self, aboutUri, namespace, name): | ||||
|         for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"): | ||||
|             if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri: | ||||
|                 attr = desc.getAttributeNodeNS(namespace, name) | ||||
|                 if attr != None: | ||||
|                     yield attr | ||||
|                 for element in desc.getElementsByTagNameNS(namespace, name): | ||||
|                     yield element | ||||
|  | ||||
|     def getNodesInNamespace(self, aboutUri, namespace): | ||||
|         for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"): | ||||
|             if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri: | ||||
|                 for i in range(desc.attributes.length): | ||||
|                     attr = desc.attributes.item(i) | ||||
|                     if attr.namespaceURI == namespace: | ||||
|                         yield attr | ||||
|                 for child in desc.childNodes: | ||||
|                     if child.namespaceURI == namespace: | ||||
|                         yield child | ||||
|  | ||||
|     def _getText(self, element): | ||||
|         text = "" | ||||
|         for child in element.childNodes: | ||||
|             if child.nodeType == child.TEXT_NODE: | ||||
|                 text += child.data | ||||
|         return text | ||||
|  | ||||
|     def _converter_string(value): | ||||
|         return value | ||||
|  | ||||
|     def _converter_date(value): | ||||
|         m = iso8601.match(value) | ||||
|         year = int(m.group("year")) | ||||
|         month = int(m.group("month") or "1") | ||||
|         day = int(m.group("day") or "1") | ||||
|         hour = int(m.group("hour") or "0") | ||||
|         minute = int(m.group("minute") or "0") | ||||
|         second = decimal.Decimal(m.group("second") or "0") | ||||
|         seconds = second.to_integral(decimal.ROUND_FLOOR) | ||||
|         milliseconds = (second - seconds) * 1000000 | ||||
|         tzd = m.group("tzd") or "Z" | ||||
|         dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds) | ||||
|         if tzd != "Z": | ||||
|             tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")] | ||||
|             tzd_hours *= -1 | ||||
|             if tzd_hours < 0: | ||||
|                 tzd_minutes *= -1 | ||||
|             dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes) | ||||
|         return dt | ||||
|     _test_converter_date = staticmethod(_converter_date) | ||||
|  | ||||
|     def _getter_bag(namespace, name, converter): | ||||
|         def get(self): | ||||
|             cached = self.cache.get(namespace, {}).get(name) | ||||
|             if cached: | ||||
|                 return cached | ||||
|             retval = [] | ||||
|             for element in self.getElement("", namespace, name): | ||||
|                 bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag") | ||||
|                 if len(bags): | ||||
|                     for bag in bags: | ||||
|                         for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"): | ||||
|                             value = self._getText(item) | ||||
|                             value = converter(value) | ||||
|                             retval.append(value) | ||||
|             ns_cache = self.cache.setdefault(namespace, {}) | ||||
|             ns_cache[name] = retval | ||||
|             return retval | ||||
|         return get | ||||
|  | ||||
|     def _getter_seq(namespace, name, converter): | ||||
|         def get(self): | ||||
|             cached = self.cache.get(namespace, {}).get(name) | ||||
|             if cached: | ||||
|                 return cached | ||||
|             retval = [] | ||||
|             for element in self.getElement("", namespace, name): | ||||
|                 seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq") | ||||
|                 if len(seqs): | ||||
|                     for seq in seqs: | ||||
|                         for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"): | ||||
|                             value = self._getText(item) | ||||
|                             value = converter(value) | ||||
|                             retval.append(value) | ||||
|                 else: | ||||
|                     value = converter(self._getText(element)) | ||||
|                     retval.append(value) | ||||
|             ns_cache = self.cache.setdefault(namespace, {}) | ||||
|             ns_cache[name] = retval | ||||
|             return retval | ||||
|         return get | ||||
|  | ||||
|     def _getter_langalt(namespace, name, converter): | ||||
|         def get(self): | ||||
|             cached = self.cache.get(namespace, {}).get(name) | ||||
|             if cached: | ||||
|                 return cached | ||||
|             retval = {} | ||||
|             for element in self.getElement("", namespace, name): | ||||
|                 alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt") | ||||
|                 if len(alts): | ||||
|                     for alt in alts: | ||||
|                         for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"): | ||||
|                             value = self._getText(item) | ||||
|                             value = converter(value) | ||||
|                             retval[item.getAttribute("xml:lang")] = value | ||||
|                 else: | ||||
|                     retval["x-default"] = converter(self._getText(element)) | ||||
|             ns_cache = self.cache.setdefault(namespace, {}) | ||||
|             ns_cache[name] = retval | ||||
|             return retval | ||||
|         return get | ||||
|  | ||||
|     def _getter_single(namespace, name, converter): | ||||
|         def get(self): | ||||
|             cached = self.cache.get(namespace, {}).get(name) | ||||
|             if cached: | ||||
|                 return cached | ||||
|             value = None | ||||
|             for element in self.getElement("", namespace, name): | ||||
|                 if element.nodeType == element.ATTRIBUTE_NODE: | ||||
|                     value = element.nodeValue | ||||
|                 else: | ||||
|                     value = self._getText(element) | ||||
|                 break | ||||
|             if value != None: | ||||
|                 value = converter(value) | ||||
|             ns_cache = self.cache.setdefault(namespace, {}) | ||||
|             ns_cache[name] = value | ||||
|             return value | ||||
|         return get | ||||
|  | ||||
|     dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string)) | ||||
|     """ | ||||
|     Contributors to the resource (other than the authors). An unsorted | ||||
|     array of names. | ||||
|     """ | ||||
|  | ||||
|     dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string)) | ||||
|     """ | ||||
|     Text describing the extent or scope of the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string)) | ||||
|     """ | ||||
|     A sorted array of names of the authors of the resource, listed in order | ||||
|     of precedence. | ||||
|     """ | ||||
|  | ||||
|     dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date)) | ||||
|     """ | ||||
|     A sorted array of dates (datetime.datetime instances) of signifigance to | ||||
|     the resource.  The dates and times are in UTC. | ||||
|     """ | ||||
|  | ||||
|     dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string)) | ||||
|     """ | ||||
|     A language-keyed dictionary of textual descriptions of the content of the | ||||
|     resource. | ||||
|     """ | ||||
|  | ||||
|     dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string)) | ||||
|     """ | ||||
|     The mime-type of the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string)) | ||||
|     """ | ||||
|     Unique identifier of the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string)) | ||||
|     """ | ||||
|     An unordered array specifying the languages used in the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string)) | ||||
|     """ | ||||
|     An unordered array of publisher names. | ||||
|     """ | ||||
|  | ||||
|     dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string)) | ||||
|     """ | ||||
|     An unordered array of text descriptions of relationships to other | ||||
|     documents. | ||||
|     """ | ||||
|  | ||||
|     dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string)) | ||||
|     """ | ||||
|     A language-keyed dictionary of textual descriptions of the rights the | ||||
|     user has to this resource. | ||||
|     """ | ||||
|  | ||||
|     dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string)) | ||||
|     """ | ||||
|     Unique identifier of the work from which this resource was derived. | ||||
|     """ | ||||
|  | ||||
|     dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string)) | ||||
|     """ | ||||
|     An unordered array of descriptive phrases or keywrods that specify the | ||||
|     topic of the content of the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string)) | ||||
|     """ | ||||
|     A language-keyed dictionary of the title of the resource. | ||||
|     """ | ||||
|  | ||||
|     dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string)) | ||||
|     """ | ||||
|     An unordered array of textual descriptions of the document type. | ||||
|     """ | ||||
|  | ||||
|     pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string)) | ||||
|     """ | ||||
|     An unformatted text string representing document keywords. | ||||
|     """ | ||||
|  | ||||
|     pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string)) | ||||
|     """ | ||||
|     The PDF file version, for example 1.0, 1.3. | ||||
|     """ | ||||
|  | ||||
|     pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string)) | ||||
|     """ | ||||
|     The name of the tool that created the PDF document. | ||||
|     """ | ||||
|  | ||||
|     xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date)) | ||||
|     """ | ||||
|     The date and time the resource was originally created.  The date and | ||||
|     time are returned as a UTC datetime.datetime object. | ||||
|     """ | ||||
|  | ||||
|     xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date)) | ||||
|     """ | ||||
|     The date and time the resource was last modified.  The date and time | ||||
|     are returned as a UTC datetime.datetime object. | ||||
|     """ | ||||
|  | ||||
|     xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date)) | ||||
|     """ | ||||
|     The date and time that any metadata for this resource was last | ||||
|     changed.  The date and time are returned as a UTC datetime.datetime | ||||
|     object. | ||||
|     """ | ||||
|  | ||||
|     xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string)) | ||||
|     """ | ||||
|     The name of the first known tool used to create the resource. | ||||
|     """ | ||||
|  | ||||
|     xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string)) | ||||
|     """ | ||||
|     The common identifier for all versions and renditions of this resource. | ||||
|     """ | ||||
|  | ||||
|     xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string)) | ||||
|     """ | ||||
|     An identifier for a specific incarnation of a document, updated each | ||||
|     time a file is saved. | ||||
|     """ | ||||
|  | ||||
|     def custom_properties(self): | ||||
|         if not hasattr(self, "_custom_properties"): | ||||
|             self._custom_properties = {} | ||||
|             for node in self.getNodesInNamespace("", PDFX_NAMESPACE): | ||||
|                 key = node.localName | ||||
|                 while True: | ||||
|                     # see documentation about PDFX_NAMESPACE earlier in file | ||||
|                     idx = key.find(u_("\u2182")) | ||||
|                     if idx == -1: | ||||
|                         break | ||||
|                     key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:] | ||||
|                 if node.nodeType == node.ATTRIBUTE_NODE: | ||||
|                     value = node.nodeValue | ||||
|                 else: | ||||
|                     value = self._getText(node) | ||||
|                 self._custom_properties[key] = value | ||||
|         return self._custom_properties | ||||
|  | ||||
|     custom_properties = property(custom_properties) | ||||
|     """ | ||||
|     Retrieves custom metadata properties defined in the undocumented pdfx | ||||
|     metadata schema. | ||||
|  | ||||
|     :return: a dictionary of key/value items for custom metadata properties. | ||||
|     :rtype: dict | ||||
|     """ | ||||
		Reference in New Issue
	
	Block a user
	 Cervinko Cera
					Cervinko Cera