release 2013.10.07

[jeuxvideo] Improve code quality (fixes #1567 )
Restore warning when user forgets to quote URL (#1396 )
2013-10-07 14:34:19 +02:00 · 2013-10-07 14:33:23 +02:00 · 2013-10-07 12:21:24 +02:00 · 2013-10-06 21:56:23 +02:00 · 2013-10-06 16:39:35 +02:00 · 2013-10-06 16:30:26 +02:00
15 changed files with 270 additions and 32 deletions
--- a/README.md
+++ b/README.md
@@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
    --datebefore DATE          download only videos uploaded before this date
    --dateafter DATE           download only videos uploaded after this date
    --no-playlist              download only the currently playing video
    --age-limit YEARS          download only videos suitable for the given age
    --download-archive FILE    Download only videos not present in the archive
                               file. Record all downloaded videos in it.
 ## Download Options:
    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -0,0 +1,53 @@
 #!/usr/bin/env python
 import sys
 import unittest
 # Allow direct execution
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from youtube_dl import YoutubeDL
 from helper import try_rm
 def _download_restricted(url, filename, age):
    """ Returns true iff the file has been downloaded """
    params = {
        'age_limit': age,
        'skip_download': True,
        'writeinfojson': True,
        "outtmpl": "%(id)s.%(ext)s",
    }
    ydl = YoutubeDL(params)
    ydl.add_default_info_extractors()
    json_filename = filename + '.info.json'
    try_rm(json_filename)
    ydl.download([url])
    res = os.path.exists(json_filename)
    try_rm(json_filename)
    return res
 class TestAgeRestriction(unittest.TestCase):
    def _assert_restricted(self, url, filename, age, old_age=None):
        self.assertTrue(_download_restricted(url, filename, old_age))
        self.assertFalse(_download_restricted(url, filename, age))
    def test_youtube(self):
        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
    def test_youporn(self):
        self._assert_restricted(
            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
            '505835.mp4', 2, old_age=25)
    def test_pornotube(self):
        self._assert_restricted(
            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
            '1689755.flv', 13)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import
 import errno
 import io
 import os
 import re
@@ -84,6 +85,11 @@ class YoutubeDL(object):
    cachedir:          Location of the cache files in the filesystem.
                       None to disable filesystem cache.
    noplaylist:        Download single video instead of a playlist if in doubt.
    age_limit:         An integer representing the user's age in years.
                       Unsuitable videos for the given age are skipped.
    downloadarchive:   File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@@ -309,6 +315,13 @@ class YoutubeDL(object):
            dateRange = self.params.get('daterange', DateRange())
            if date not in dateRange:
                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
            if age_limit < info_dict.get('age_limit', 0):
                return u'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return (u'%(title)s has already been recorded in archive'
                    % info_dict)
        return None
    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +591,8 @@ class YoutubeDL(object):
                    self.report_error(u'postprocessing: %s' % str(err))
                    return
        self.record_download_archive(info_dict)
    def download(self, url_list):
        """Download a given list of URLs."""
        if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +632,26 @@ class YoutubeDL(object):
                os.remove(encodeFilename(filename))
            except (IOError, OSError):
                self.report_warning(u'Unable to remove downloaded video file')
    def in_download_archive(self, info_dict):
        fn = self.params.get('download_archive')
        if fn is None:
            return False
        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
        try:
            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                for line in archive_file:
                    if line.strip() == vid_id:
                        return True
        except IOError as ioe:
            if ioe.errno != errno.ENOENT:
                raise
        return False
    def record_download_archive(self, info_dict):
        fn = self.params.get('download_archive')
        if fn is None:
            return
        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
            archive_file.write(vid_id + u'\n')
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -188,6 +188,12 @@ def parseOpts(overrideArguments=None):
    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
                         help='download only videos suitable for the given age',
                         default=None, type=int)
    selection.add_option('--download-archive', metavar='FILE',
                         dest='download_archive',
                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')
    authentication.add_option('-u', '--username',
@@ -478,6 +484,8 @@ def _real_main(argv=None):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
            if desc is False:
                continue
            if hasattr(ie, 'SEARCH_KEY'):
                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
                _COUNTS = (u'', u'5', u'10', u'all')
@@ -631,6 +639,8 @@ def _real_main(argv=None):
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
        'age_limit': opts.age_limit,
        'download_archive': opts.download_archive,
        })
    if opts.verbose:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -141,6 +141,7 @@ from .youtube import (
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
    YoutubeRecommendedIE,
    YoutubeTruncatedURLIE,
    YoutubeWatchLaterIE,
    YoutubeFavouritesIE,
 )
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -54,6 +54,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    urlhandle:      [internal] The urlHandle to be used to download the file,
                    like returned by urllib.request.urlopen
    age_limit:      Age restriction for the video, as an integer (years)
    formats:        A list of dictionaries for each format available, it must
                    be ordered from worst to best quality. Potential fields:
                    * url       Mandatory. The URL of the video file
@@ -318,6 +319,15 @@ class InfoExtractor(object):
                                        self._og_regex('video')],
                                       html, name, **kargs)
    def _rta_search(self, html):
        # See http://www.rtalabel.org/index.php?content=howtofaq#single
        if re.search(r'(?ix)<meta\s+name="rating"\s+'
                     r'     content="RTA-5042-1996-1400-1577-RTA"',
                     html):
            return 18
        return 0
 class SearchInfoExtractor(InfoExtractor):
    """
    Base class for paged search queries extractors.
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):
        except ValueError:
            # since this is the last-resort InfoExtractor, if
            # this error is thrown, it'll be thrown here
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Failed to download URL: %s' % url)
        self.report_extraction(video_id)
        # Look for BrightCove:
@@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):
            # HTML5 video
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Unsupported URL: %s' % url)
        # It's possible that one of the regexes
        # matched, but returned an empty group:
        if mobj.group(1) is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Did not find a valid video URL at %s' % url)
        video_url = mobj.group(1)
        video_url = compat_urlparse.urljoin(url, video_url)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -6,6 +6,7 @@ import xml.etree.ElementTree
 from .common import InfoExtractor
 class JeuxVideoIE(InfoExtractor):
    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
@@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        title = re.match(self._VALID_URL, url).group(1)
        webpage = self._download_webpage(url, title)
-        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
+        xml_link = self._html_search_regex(
            r'<param name="flashvars" value="config=(.*?)" />',
            webpage, u'config URL')
-        xml_link = m_download.group(1)
+        video_id = self._search_regex(
            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
            xml_link, u'video ID')
-        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
+        xml_config = self._download_webpage(
-
+            xml_link, title, u'Downloading XML config')
        xml_config = self._download_webpage(xml_link, title,
                                                  'Downloading XML config')
        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info = re.search(r'<format\.json>(.*?)</format\.json>',
+        info_json = self._search_regex(
-                         xml_config, re.MULTILINE|re.DOTALL).group(1)
+            r'(?sm)<format\.json>(.*?)</format\.json>',
-        info = json.loads(info)['versions'][0]
+            xml_config, u'JSON information')
        info = json.loads(info_json)['versions'][0]
        video_url = 'http://video720.jeuxvideo.com/' + info['file']
-        return {'id': id,
+        return {
-                'title' : config.find('titre_video').text,
+            'id': video_id,
-                'ext' : 'mp4',
+            'title': config.find('titre_video').text,
-                'url' : video_url,
+            'ext': 'mp4',
-                'description': self._og_search_description(webpage),
+            'url': video_url,
-                'thumbnail': config.find('image').text,
+            'description': self._og_search_description(webpage),
-                }
+            'thumbnail': config.find('image').text,
        }
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
        if upload_date: upload_date = unified_strdate(upload_date)
        age_limit = self._rta_search(webpage)
        info = {'id': video_id,
                'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
                'upload_date': upload_date,
                'title': video_title,
                'ext': 'flv',
-                'format': 'flv'}
+                'format': 'flv',
                'age_limit': age_limit}
        return [info]
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,9 +30,14 @@ class RedTubeIE(InfoExtractor):
            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
            webpage, u'title')
        # No self-labeling, but they describe themselves as
        # "Home of Videos Porno"
        age_limit = 18
        return {
-            'id':       video_id,
+            'id':        video_id,
-            'url':      video_url,
+            'url':       video_url,
-            'ext':      video_extension,
+            'ext':       video_extension,
-            'title':    video_title,
+            'title':     video_title,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor):
    """Information extractor for vimeo.com."""
    # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
    _NETRC_MACHINE = 'vimeo'
    IE_NAME = u'vimeo'
    _TESTS = [
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
        age_limit = self._rta_search(webpage)
        # Get JSON parameters
        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
                'ext': extension,
                'format': format,
                'thumbnail': thumbnail,
-                'description': video_description
+                'description': video_description,
                'age_limit': age_limit,
            })
        if self._downloader.params.get('listformats', None):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1250,9 +1250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        return url_map
    def _real_extract(self, url):
        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
@@ -1495,7 +1492,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'description':  video_description,
                'player_url':   player_url,
                'subtitles':    video_subtitles,
-                'duration':     video_duration
+                'duration':     video_duration,
                'age_limit':    18 if age_gate else 0,
            })
        return results
@@ -1636,7 +1634,7 @@ class YoutubeChannelIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1829,3 +1827,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
        return self.url_result(playlist_id, 'YoutubePlaylist')
 class YoutubeTruncatedURLIE(InfoExtractor):
    IE_NAME = 'youtube:truncated_url'
    IE_DESC = False  # Do not list
    _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
    def _real_extract(self, url):
        raise ExtractorError(
            u'Did you forget to quote the URL? Remember that & is a meta '
            u'character in most shells, so you want to put the URL in quotes, '
            u'like  youtube-dl '
            u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
            u' (or simply  youtube-dl BaW_jenozKc  ).',
            expected=True)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -830,3 +830,99 @@ def get_cachedir(params={}):
    cache_root = os.environ.get('XDG_CACHE_HOME',
                                os.path.expanduser('~/.cache'))
    return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
 # Cross-platform file locking
 if sys.platform == 'win32':
    import ctypes.wintypes
    import msvcrt
    class OVERLAPPED(ctypes.Structure):
        _fields_ = [
            ('Internal', ctypes.wintypes.LPVOID),
            ('InternalHigh', ctypes.wintypes.LPVOID),
            ('Offset', ctypes.wintypes.DWORD),
            ('OffsetHigh', ctypes.wintypes.DWORD),
            ('hEvent', ctypes.wintypes.HANDLE),
        ]
    kernel32 = ctypes.windll.kernel32
    LockFileEx = kernel32.LockFileEx
    LockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwFlags
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED)  # Overlapped
    ]
    LockFileEx.restype = ctypes.wintypes.BOOL
    UnlockFileEx = kernel32.UnlockFileEx
    UnlockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED)  # Overlapped
    ]
    UnlockFileEx.restype = ctypes.wintypes.BOOL
    whole_low = 0xffffffff
    whole_high = 0x7fffffff
    def _lock_file(f, exclusive):
        overlapped = OVERLAPPED()
        overlapped.Offset = 0
        overlapped.OffsetHigh = 0
        overlapped.hEvent = 0
        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
        handle = msvcrt.get_osfhandle(f.fileno())
        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
                          whole_low, whole_high, f._lock_file_overlapped_p):
            raise OSError('Locking file failed: %r' % ctypes.FormatError())
    def _unlock_file(f):
        assert f._lock_file_overlapped_p
        handle = msvcrt.get_osfhandle(f.fileno())
        if not UnlockFileEx(handle, 0,
                            whole_low, whole_high, f._lock_file_overlapped_p):
            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 else:
    import fcntl
    def _lock_file(f, exclusive):
        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
    def _unlock_file(f):
        fcntl.lockf(f, fcntl.LOCK_UN)
 class locked_file(object):
    def __init__(self, filename, mode, encoding=None):
        assert mode in ['r', 'a', 'w']
        self.f = io.open(filename, mode, encoding=encoding)
        self.mode = mode
    def __enter__(self):
        exclusive = self.mode != 'r'
        try:
            _lock_file(self.f, exclusive)
        except IOError:
            self.f.close()
            raise
        return self
    def __exit__(self, etype, value, traceback):
        try:
            _unlock_file(self.f)
        finally:
            self.f.close()
    def __iter__(self):
        return iter(self.f)
    def write(self, *args):
        return self.f.write(*args)
    def read(self, *args):
        return self.f.read(*args)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.10.06'
+__version__ = '2013.10.07'
Author	SHA1	Message	Date
Philipp Hagemeister	4481a754e4	release 2013.10.07	2013-10-07 14:34:19 +02:00
Philipp Hagemeister	faa6ef6bc8	[jeuxvideo] Improve code quality (fixes #1567 )	2013-10-07 14:33:23 +02:00
Philipp Hagemeister	15870e90b0	Restore warning when user forgets to quote URL (#1396 )	2013-10-07 12:21:24 +02:00
Jaime Marquínez Ferrándiz	387ae5f30b	[vimeo] Recognize urls ending in a slash (fixes #1242 )	2013-10-06 21:56:23 +02:00
Philipp Hagemeister	1310bf2474	[redtube] add age_limit	2013-10-06 16:39:35 +02:00
Philipp Hagemeister	b24f347190	Merge branch 'download-archive' Conflicts: youtube_dl/YoutubeDL.py youtube_dl/__init__.py	2013-10-06 16:30:26 +02:00
Philipp Hagemeister	ee6c9f95e1	Remove superfluous parenthesis	2013-10-06 16:28:36 +02:00
Philipp Hagemeister	2a69c6b879	Merge branch 'age_limit'	2013-10-06 16:23:18 +02:00
Philipp Hagemeister	cfadd183c4	Call extracted property age_limit everywhere	2013-10-06 16:23:06 +02:00
Philipp Hagemeister	e484c81f0c	[generic] Clarify error messages	2013-10-06 16:03:18 +02:00
Philipp Hagemeister	8dbe9899a9	Allow users to specify an age limit (fixes #1545 ) With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS . Add rudimentary support in youtube, pornotube, and youporn.	2013-10-06 06:08:56 +02:00
Philipp Hagemeister	c1c9a79c49	Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.	2013-10-06 04:27:10 +02:00
`@@ -1,2 +1,2 @@`

	`__version__ = '2013.10.06'`	`__version__ = '2013.10.07'`