calibre-web/vendor/PyPDF2/pagerange.py

#!/usr/bin/env python
"""
Representation and utils for ranges of PDF file pages.

Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
All rights reserved. This software is available under a BSD license;
see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
"""

import re
from .utils import isString

_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
# groups:         12     34     5 6     7 8


class ParseError(Exception):
    pass


PAGE_RANGE_HELP = """Remember, page indices start with zero.
        Page range expression examples:
            :     all pages.                   -1    last page.
            22    just the 23rd page.          :-1   all but the last page.
            0:3   the first three pages.       -2    second-to-last page.
            :3    the first three pages.       -2:   last two pages.
            5:    from the sixth page onward.  -3:-1 third & second to last.
        The third, "stride" or "step" number is also recognized.
            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
            1:10:2    1 3 5 7 9                2::-1     2 1 0.
            ::-1      all pages in reverse order.
"""


class PageRange(object):
    """
    A slice-like representation of a range of page indices,
        i.e. page numbers, only starting at zero.
    The syntax is like what you would put between brackets [ ].
    The slice is one of the few Python types that can't be subclassed,
    but this class converts to and from slices, and allows similar use.
      o  PageRange(str) parses a string representing a page range.
      o  PageRange(slice) directly "imports" a slice.
      o  to_slice() gives the equivalent slice.
      o  str() and repr() allow printing.
      o  indices(n) is like slice.indices(n).
    """

    def __init__(self, arg):
        """
        Initialize with either a slice -- giving the equivalent page range,
        or a PageRange object -- making a copy,
        or a string like
            "int", "[int]:[int]" or "[int]:[int]:[int]",
            where the brackets indicate optional ints.
        {page_range_help}
        Note the difference between this notation and arguments to slice():
            slice(3) means the first three pages;
            PageRange("3") means the range of only the fourth page.
            However PageRange(slice(3)) means the first three pages.
        """
        if isinstance(arg, slice):
            self._slice = arg
            return

        if isinstance(arg, PageRange):
            self._slice = arg.to_slice()
            return

        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
        if not m:
            raise ParseError(arg)
        elif m.group(2):
            # Special case: just an int means a range of one page.
            start = int(m.group(2))
            stop = start + 1 if start != -1 else None
            self._slice = slice(start, stop)
        else:
            self._slice = slice(*[int(g) if g else None
                                  for g in m.group(4, 6, 8)])

    # Just formatting this when there is __doc__ for __init__
    if __init__.__doc__:
        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)

    @staticmethod
    def valid(input):
        """ True if input is a valid initializer for a PageRange. """
        return isinstance(input, slice)  or \
               isinstance(input, PageRange) or \
               (isString(input)
                and bool(re.match(PAGE_RANGE_RE, input)))

    def to_slice(self):
        """ Return the slice equivalent of this page range. """
        return self._slice

    def __str__(self):
        """ A string like "1:2:3". """
        s = self._slice
        if s.step == None:
            if s.start != None  and  s.stop == s.start + 1:
                return str(s.start)

            indices = s.start, s.stop
        else:
            indices = s.start, s.stop, s.step
        return ':'.join("" if i == None else str(i) for i in indices)

    def __repr__(self):
        """ A string like "PageRange('1:2:3')". """
        return "PageRange(" + repr(str(self)) + ")"

    def indices(self, n):
        """
        n is the length of the list of pages to choose from.
        Returns arguments for range().  See help(slice.indices).
        """
        return self._slice.indices(n)


PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.


def parse_filename_page_ranges(args):
    """
    Given a list of filenames and page ranges, return a list of
    (filename, page_range) pairs.
    First arg must be a filename; other ags are filenames, page-range
    expressions, slice objects, or PageRange objects.
    A filename not followed by a page range indicates all pages of the file.
    """
    pairs = []
    pdf_filename = None
    did_page_range = False
    for arg in args + [None]:
        if PageRange.valid(arg):
            if not pdf_filename:
                raise ValueError("The first argument must be a filename, " \
                                 "not a page range.")

            pairs.append( (pdf_filename, PageRange(arg)) )
            did_page_range = True
        else:
            # New filename or end of list--do all of the previous file?
            if pdf_filename and not did_page_range:
                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )

            pdf_filename = arg
            did_page_range = False
    return pairs
Add PyPDF2 to vendor 2016-08-07 20:12:32 +00:00			`#!/usr/bin/env python`
			`"""`
			`Representation and utils for ranges of PDF file pages.`

			`Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.`
			`All rights reserved. This software is available under a BSD license;`
			`see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE`
			`"""`

			`import re`
			`from .utils import isString`

			`_INT_RE = r"(0\|-?[1-9]\d*)" # A decimal int, don't allow "-0".`
			`PAGE_RANGE_RE = "^({int}\|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)`
			`# groups: 12 34 5 6 7 8`


			`class ParseError(Exception):`
			`pass`


			`PAGE_RANGE_HELP = """Remember, page indices start with zero.`
			`Page range expression examples:`
			`: all pages. -1 last page.`
			`22 just the 23rd page. :-1 all but the last page.`
			`0:3 the first three pages. -2 second-to-last page.`
			`:3 the first three pages. -2: last two pages.`
			`5: from the sixth page onward. -3:-1 third & second to last.`
			`The third, "stride" or "step" number is also recognized.`
			`::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.`
			`1:10:2 1 3 5 7 9 2::-1 2 1 0.`
			`::-1 all pages in reverse order.`
			`"""`


			`class PageRange(object):`
			`"""`
			`A slice-like representation of a range of page indices,`
			`i.e. page numbers, only starting at zero.`
			`The syntax is like what you would put between brackets [ ].`
			`The slice is one of the few Python types that can't be subclassed,`
			`but this class converts to and from slices, and allows similar use.`
			`o PageRange(str) parses a string representing a page range.`
			`o PageRange(slice) directly "imports" a slice.`
			`o to_slice() gives the equivalent slice.`
			`o str() and repr() allow printing.`
			`o indices(n) is like slice.indices(n).`
			`"""`

			`def __init__(self, arg):`
			`"""`
			`Initialize with either a slice -- giving the equivalent page range,`
			`or a PageRange object -- making a copy,`
			`or a string like`
			`"int", "[int]:[int]" or "[int]:[int]:[int]",`
			`where the brackets indicate optional ints.`
			`{page_range_help}`
			`Note the difference between this notation and arguments to slice():`
			`slice(3) means the first three pages;`
			`PageRange("3") means the range of only the fourth page.`
			`However PageRange(slice(3)) means the first three pages.`
			`"""`
			`if isinstance(arg, slice):`
			`self._slice = arg`
			`return`

			`if isinstance(arg, PageRange):`
			`self._slice = arg.to_slice()`
			`return`

			`m = isString(arg) and re.match(PAGE_RANGE_RE, arg)`
			`if not m:`
			`raise ParseError(arg)`
			`elif m.group(2):`
			`# Special case: just an int means a range of one page.`
			`start = int(m.group(2))`
			`stop = start + 1 if start != -1 else None`
			`self._slice = slice(start, stop)`
			`else:`
			`self._slice = slice(*[int(g) if g else None`
			`for g in m.group(4, 6, 8)])`

			`# Just formatting this when there is __doc__ for __init__`
			`if __init__.__doc__:`
			`__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)`

			`@staticmethod`
			`def valid(input):`
			`""" True if input is a valid initializer for a PageRange. """`
			`return isinstance(input, slice) or \`
			`isinstance(input, PageRange) or \`
			`(isString(input)`
			`and bool(re.match(PAGE_RANGE_RE, input)))`

			`def to_slice(self):`
			`""" Return the slice equivalent of this page range. """`
			`return self._slice`

			`def __str__(self):`
			`""" A string like "1:2:3". """`
			`s = self._slice`
			`if s.step == None:`
			`if s.start != None and s.stop == s.start + 1:`
			`return str(s.start)`

			`indices = s.start, s.stop`
			`else:`
			`indices = s.start, s.stop, s.step`
			`return ':'.join("" if i == None else str(i) for i in indices)`

			`def __repr__(self):`
			`""" A string like "PageRange('1:2:3')". """`
			`return "PageRange(" + repr(str(self)) + ")"`

			`def indices(self, n):`
			`"""`
			`n is the length of the list of pages to choose from.`
			`Returns arguments for range(). See help(slice.indices).`
			`"""`
			`return self._slice.indices(n)`


			`PAGE_RANGE_ALL = PageRange(":") # The range of all pages.`


			`def parse_filename_page_ranges(args):`
			`"""`
			`Given a list of filenames and page ranges, return a list of`
			`(filename, page_range) pairs.`
			`First arg must be a filename; other ags are filenames, page-range`
			`expressions, slice objects, or PageRange objects.`
			`A filename not followed by a page range indicates all pages of the file.`
			`"""`
			`pairs = []`
			`pdf_filename = None`
			`did_page_range = False`
			`for arg in args + [None]:`
			`if PageRange.valid(arg):`
			`if not pdf_filename:`
			`raise ValueError("The first argument must be a filename, " \`
			`"not a page range.")`

			`pairs.append( (pdf_filename, PageRange(arg)) )`
			`did_page_range = True`
			`else:`
			`# New filename or end of list--do all of the previous file?`
			`if pdf_filename and not did_page_range:`
			`pairs.append( (pdf_filename, PAGE_RANGE_ALL) )`

			`pdf_filename = arg`
			`did_page_range = False`
			`return pairs`