release 2013.10.09

[youtube] Do not recognize attribution link as user (Fixes #1573 )
Merge pull request #1584 from wingsuit/master
2013-10-09 23:50:47 +02:00 · 2013-10-09 23:50:38 +02:00 · 2013-10-09 07:44:06 -07:00 · 2013-10-09 16:41:36 +02:00 · 2013-10-09 21:56:09 +08:00 · 2013-10-08 21:25:38 +02:00
18 changed files with 359 additions and 91 deletions
--- a/README.md
+++ b/README.md
@@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
    --datebefore DATE          download only videos uploaded before this date
    --dateafter DATE           download only videos uploaded after this date
    --no-playlist              download only the currently playing video
+    --age-limit YEARS          download only videos suitable for the given age
+    --download-archive FILE    Download only videos not present in the archive
+                               file. Record all downloaded videos in it.

 ## Download Options:
    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
--- a/test/helper.py
+++ b/test/helper.py
@@ -12,12 +12,7 @@ from youtube_dl.utils import (
    compat_urllib_request,
 )

-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
+youtube_dl._setup_opener(timeout=10)

 PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
 with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl import YoutubeDL
+from helper import try_rm
+
+
+def _download_restricted(url, filename, age):
+    """ Returns true iff the file has been downloaded """
+
+    params = {
+        'age_limit': age,
+        'skip_download': True,
+        'writeinfojson': True,
+        "outtmpl": "%(id)s.%(ext)s",
+    }
+    ydl = YoutubeDL(params)
+    ydl.add_default_info_extractors()
+    json_filename = filename + '.info.json'
+    try_rm(json_filename)
+    ydl.download([url])
+    res = os.path.exists(json_filename)
+    try_rm(json_filename)
+    return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+    def _assert_restricted(self, url, filename, age, old_age=None):
+        self.assertTrue(_download_restricted(url, filename, old_age))
+        self.assertFalse(_download_restricted(url, filename, age))
+
+    def test_youtube(self):
+        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+    def test_youporn(self):
+        self._assert_restricted(
+            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+            '505835.mp4', 2, old_age=25)
+
+    def test_pornotube(self):
+        self._assert_restricted(
+            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+            '1689755.flv', 13)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -19,14 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para

 RETRIES = 3

-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-socket.setdefaulttimeout(10)
-
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

 class YoutubeDL(youtube_dl.YoutubeDL):
@@ -45,6 +37,7 @@ def _file_md5(fn):
    with open(fn, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

+import helper  # Set up remaining global configuration
 from helper import get_testcases, try_rm
 defs = get_testcases()

--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@

 from __future__ import absolute_import

+import errno
 import io
 import os
 import re
@@ -84,6 +85,11 @@ class YoutubeDL(object):
    cachedir:          Location of the cache files in the filesystem.
                       None to disable filesystem cache.
    noplaylist:        Download single video instead of a playlist if in doubt.
+    age_limit:         An integer representing the user's age in years.
+                       Unsuitable videos for the given age are skipped.
+    downloadarchive:   File name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded
+                       again.
    
    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@@ -113,7 +119,7 @@ class YoutubeDL(object):
                and not params['restrictfilenames']):
            # On Python 3, the Unicode filesystem API will throw errors (#1474)
            self.report_warning(
-                u'Assuming --restrict-filenames isnce file system encoding '
+                u'Assuming --restrict-filenames since file system encoding '
                u'cannot encode all charactes. '
                u'Set the LC_ALL environment variable to fix this.')
            params['restrictfilenames'] = True
@@ -309,6 +315,13 @@ class YoutubeDL(object):
            dateRange = self.params.get('daterange', DateRange())
            if date not in dateRange:
                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        age_limit = self.params.get('age_limit')
+        if age_limit is not None:
+            if age_limit < info_dict.get('age_limit', 0):
+                return u'Skipping "' + title + '" because it is age restricted'
+        if self.in_download_archive(info_dict):
+            return (u'%(title)s has already been recorded in archive'
+                    % info_dict)
        return None
        
    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +591,8 @@ class YoutubeDL(object):
                    self.report_error(u'postprocessing: %s' % str(err))
                    return

+        self.record_download_archive(info_dict)
+
    def download(self, url_list):
        """Download a given list of URLs."""
        if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +632,26 @@ class YoutubeDL(object):
                os.remove(encodeFilename(filename))
            except (IOError, OSError):
                self.report_warning(u'Unable to remove downloaded video file')
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        try:
+            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                for line in archive_file:
+                    if line.strip() == vid_id:
+                        return True
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT:
+                raise
+        return False
+
+    def record_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+            archive_file.write(vid_id + u'\n')
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -36,6 +36,7 @@ __authors__  = (
 __license__ = 'Public Domain'

 import codecs
+import collections
 import getpass
 import optparse
 import os
@@ -188,6 +189,12 @@ def parseOpts(overrideArguments=None):
    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+                         help='download only videos suitable for the given age',
+                         default=None, type=int)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')


    authentication.add_option('-u', '--username',
@@ -441,27 +448,7 @@ def _real_main(argv=None):
    all_urls = batchurls + args
    all_urls = [url.strip() for url in all_urls]

-    # General configuration
-    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-    if opts.proxy is not None:
-        if opts.proxy == '':
-            proxies = {}
-        else:
-            proxies = {'http': opts.proxy, 'https': opts.proxy}
-    else:
-        proxies = compat_urllib_request.getproxies()
-        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
-        if 'http' in proxies and 'https' not in proxies:
-            proxies['https'] = proxies['http']
-    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
-    https_handler = make_HTTPS_handler(opts)
-    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
-    # Delete the default user-agent header, which would otherwise apply in
-    # cases where our custom HTTP handler doesn't come into play
-    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
-    opener.addheaders =[]
-    compat_urllib_request.install_opener(opener)
-    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+    opener = _setup_opener(jar=jar, opts=opts)

    extractors = gen_extractors()

@@ -478,6 +465,8 @@ def _real_main(argv=None):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+            if desc is False:
+                continue
            if hasattr(ie, 'SEARCH_KEY'):
                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
                _COUNTS = (u'', u'5', u'10', u'all')
@@ -631,6 +620,8 @@ def _real_main(argv=None):
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
+        'age_limit': opts.age_limit,
+        'download_archive': opts.download_archive,
        })

    if opts.verbose:
@@ -650,7 +641,12 @@ def _real_main(argv=None):
            except:
                pass
        write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+
+        proxy_map = {}
+        for handler in opener.handlers:
+            if hasattr(handler, 'proxies'):
+                proxy_map.update(handler.proxies)
+        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')

    ydl.add_default_info_extractors()

@@ -688,6 +684,37 @@ def _real_main(argv=None):

    sys.exit(retcode)

+
+def _setup_opener(jar=None, opts=None, timeout=300):
+    if opts is None:
+        FakeOptions = collections.namedtuple(
+            'FakeOptions', ['proxy', 'no_check_certificate'])
+        opts = FakeOptions(proxy=None, no_check_certificate=False)
+
+    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+    if opts.proxy is not None:
+        if opts.proxy == '':
+            proxies = {}
+        else:
+            proxies = {'http': opts.proxy, 'https': opts.proxy}
+    else:
+        proxies = compat_urllib_request.getproxies()
+        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+        if 'http' in proxies and 'https' not in proxies:
+            proxies['https'] = proxies['http']
+    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+    https_handler = make_HTTPS_handler(opts)
+    opener = compat_urllib_request.build_opener(
+        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders = []
+    compat_urllib_request.install_opener(opener)
+    socket.setdefaulttimeout(timeout)
+    return opener
+
+
 def main(argv=None):
    try:
        _real_main(argv)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -141,6 +141,7 @@ from .youtube import (
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
    YoutubeRecommendedIE,
+    YoutubeTruncatedURLIE,
    YoutubeWatchLaterIE,
    YoutubeFavouritesIE,
 )
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -54,6 +54,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    urlhandle:      [internal] The urlHandle to be used to download the file,
                    like returned by urllib.request.urlopen
+    age_limit:      Age restriction for the video, as an integer (years)
    formats:        A list of dictionaries for each format available, it must
                    be ordered from worst to best quality. Potential fields:
                    * url       Mandatory. The URL of the video file
@@ -318,6 +319,15 @@ class InfoExtractor(object):
                                        self._og_regex('video')],
                                       html, name, **kargs)

+    def _rta_search(self, html):
+        # See http://www.rtalabel.org/index.php?content=howtofaq#single
+        if re.search(r'(?ix)<meta\s+name="rating"\s+'
+                     r'     content="RTA-5042-1996-1400-1577-RTA"',
+                     html):
+            return 18
+        return 0
+
+
 class SearchInfoExtractor(InfoExtractor):
    """
    Base class for paged search queries extractors.
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):
        except ValueError:
            # since this is the last-resort InfoExtractor, if
            # this error is thrown, it'll be thrown here
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Failed to download URL: %s' % url)

        self.report_extraction(video_id)
        # Look for BrightCove:
@@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):
            # HTML5 video
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Unsupported URL: %s' % url)

        # It's possible that one of the regexes
        # matched, but returned an empty group:
        if mobj.group(1) is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Did not find a valid video URL at %s' % url)

        video_url = mobj.group(1)
        video_url = compat_urlparse.urljoin(url, video_url)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -6,6 +6,7 @@ import xml.etree.ElementTree

 from .common import InfoExtractor

+
 class JeuxVideoIE(InfoExtractor):
    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'

@@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        title = re.match(self._VALID_URL, url).group(1)
        webpage = self._download_webpage(url, title)
-        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
-
-        xml_link = m_download.group(1)
+        xml_link = self._html_search_regex(
+            r'<param name="flashvars" value="config=(.*?)" />',
+            webpage, u'config URL')
        
-        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
+        video_id = self._search_regex(
+            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
+            xml_link, u'video ID')

-        xml_config = self._download_webpage(xml_link, title,
-                                                  'Downloading XML config')
+        xml_config = self._download_webpage(
+            xml_link, title, u'Downloading XML config')
        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info = re.search(r'<format\.json>(.*?)</format\.json>',
-                         xml_config, re.MULTILINE|re.DOTALL).group(1)
-        info = json.loads(info)['versions'][0]
+        info_json = self._search_regex(
+            r'(?sm)<format\.json>(.*?)</format\.json>',
+            xml_config, u'JSON information')
+        info = json.loads(info_json)['versions'][0]
        
        video_url = 'http://video720.jeuxvideo.com/' + info['file']

-        return {'id': id,
-                'title' : config.find('titre_video').text,
-                'ext' : 'mp4',
-                'url' : video_url,
-                'description': self._og_search_description(webpage),
-                'thumbnail': config.find('image').text,
-                }
+        return {
+            'id': video_id,
+            'title': config.find('titre_video').text,
+            'ext': 'mp4',
+            'url': video_url,
+            'description': self._og_search_description(webpage),
+            'thumbnail': config.find('image').text,
+        }
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
        if upload_date: upload_date = unified_strdate(upload_date)
+        age_limit = self._rta_search(webpage)

        info = {'id': video_id,
                'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
                'upload_date': upload_date,
                'title': video_title,
                'ext': 'flv',
-                'format': 'flv'}
+                'format': 'flv',
+                'age_limit': age_limit}

        return [info]
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,9 +30,14 @@ class RedTubeIE(InfoExtractor):
            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
            webpage, u'title')

+        # No self-labeling, but they describe themselves as
+        # "Home of Videos Porno"
+        age_limit = 18
+
        return {
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      video_extension,
-            'title':    video_title,
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       video_extension,
+            'title':     video_title,
+            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,11 +1,15 @@
 import re
 import json
+import xml.etree.ElementTree
+import datetime

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    ExtractorError,
 )

+
 class VevoIE(InfoExtractor):
    """
    Accepts urls from vevo.com or in the format 'vevo:{id}'
@@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):
    _TEST = {
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
-        u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
        u'info_dict': {
            u"upload_date": u"20130624",
            u"uploader": u"Hurts",
-            u"title": u"Somebody to Die For"
+            u"title": u"Somebody to Die For",
+            u'duration': 230,
        }
    }

@@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        json_url = 'http://www.vevo.com/data/video/%s' % video_id
-        base_url = 'http://smil.lvl3.vevo.com'
-        videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
+        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-        links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')

        self.report_extraction(video_id)
-        video_info = json.loads(info_json)
-        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
-        if m_urls is None or len(m_urls) == 0:
-            raise ExtractorError(u'Unable to extract video url')
-        # They are sorted from worst to best quality
-        m_url = m_urls[-1]
-        video_url = base_url + '/' + m_url.group('url')
-        ext = m_url.group('ext')
+        video_info = json.loads(info_json)['video']
+        last_version = {'version': -1}
+        for version in video_info['videoVersions']:
+            # These are the HTTP downloads, other types are for different manifests
+            if version['sourceType'] == 2:
+                if version['version'] > last_version['version']:
+                    last_version = version
+        if last_version['version'] == -1:
+            raise ExtractorError(u'Unable to extract last version of the video')

-        return {'url': video_url,
-                'ext': ext,
-                'id': video_id,
-                'title': video_info['title'],
-                'thumbnail': video_info['img'],
-                'upload_date': video_info['launchDate'].replace('/',''),
-                'uploader': video_info['Artists'][0]['title'],
-                }
+        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        formats = []
+        # Already sorted from worst to best quality
+        for rend in renditions.findall('rendition'):
+            attr = rend.attrib
+            f_url = attr['url']
+            formats.append({
+                'url': f_url,
+                'ext': determine_ext(f_url),
+                'height': int(attr['frameheight']),
+                'width': int(attr['frameWidth']),
+            })
+
+        date_epoch = int(self._search_regex(
+            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
+        upload_date = datetime.datetime.fromtimestamp(date_epoch)
+        info = {
+            'id': video_id,
+            'title': video_info['title'],
+            'formats': formats,
+            'thumbnail': video_info['imageUrl'],
+            'upload_date': upload_date.strftime('%Y%m%d'),
+            'uploader': video_info['mainArtists'][0]['artistName'],
+            'duration': video_info['duration'],
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+
+        return info
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor):
    """Information extractor for vimeo.com."""

    # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
    _NETRC_MACHINE = 'vimeo'
    IE_NAME = u'vimeo'
    _TESTS = [
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
+        age_limit = self._rta_search(webpage)

        # Get JSON parameters
        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
                'ext': extension,
                'format': format,
                'thumbnail': thumbnail,
-                'description': video_description
+                'description': video_description,
+                'age_limit': age_limit,
            })

        if self._downloader.params.get('listformats', None):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1116,6 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'lang': lang,
                'v': video_id,
                'fmt': self._downloader.params.get('subtitlesformat'),
+                'name': l[0],
            })
            url = u'http://www.youtube.com/api/timedtext?' + params
            sub_lang_list[lang] = url
@@ -1250,9 +1251,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        return url_map

    def _real_extract(self, url):
-        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
-            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
-
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
@@ -1495,7 +1493,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'description':  video_description,
                'player_url':   player_url,
                'subtitles':    video_subtitles,
-                'duration':     video_duration
+                'duration':     video_duration,
+                'age_limit':    18 if age_gate else 0,
            })
        return results

@@ -1636,7 +1635,7 @@ class YoutubeChannelIE(InfoExtractor):

 class YoutubeUserIE(InfoExtractor):
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1829,3 +1828,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
        return self.url_result(playlist_id, 'YoutubePlaylist')
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+    IE_NAME = 'youtube:truncated_url'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
+
+    def _real_extract(self, url):
+        raise ExtractorError(
+            u'Did you forget to quote the URL? Remember that & is a meta '
+            u'character in most shells, so you want to put the URL in quotes, '
+            u'like  youtube-dl '
+            u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
+            u' (or simply  youtube-dl BaW_jenozKc  ).',
+            expected=True)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -830,3 +830,99 @@ def get_cachedir(params={}):
    cache_root = os.environ.get('XDG_CACHE_HOME',
                                os.path.expanduser('~/.cache'))
    return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+    import ctypes.wintypes
+    import msvcrt
+
+    class OVERLAPPED(ctypes.Structure):
+        _fields_ = [
+            ('Internal', ctypes.wintypes.LPVOID),
+            ('InternalHigh', ctypes.wintypes.LPVOID),
+            ('Offset', ctypes.wintypes.DWORD),
+            ('OffsetHigh', ctypes.wintypes.DWORD),
+            ('hEvent', ctypes.wintypes.HANDLE),
+        ]
+
+    kernel32 = ctypes.windll.kernel32
+    LockFileEx = kernel32.LockFileEx
+    LockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwFlags
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    LockFileEx.restype = ctypes.wintypes.BOOL
+    UnlockFileEx = kernel32.UnlockFileEx
+    UnlockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    UnlockFileEx.restype = ctypes.wintypes.BOOL
+    whole_low = 0xffffffff
+    whole_high = 0x7fffffff
+
+    def _lock_file(f, exclusive):
+        overlapped = OVERLAPPED()
+        overlapped.Offset = 0
+        overlapped.OffsetHigh = 0
+        overlapped.hEvent = 0
+        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+                          whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+    def _unlock_file(f):
+        assert f._lock_file_overlapped_p
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not UnlockFileEx(handle, 0,
+                            whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+    import fcntl
+
+    def _lock_file(f, exclusive):
+        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+    def _unlock_file(f):
+        fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+class locked_file(object):
+    def __init__(self, filename, mode, encoding=None):
+        assert mode in ['r', 'a', 'w']
+        self.f = io.open(filename, mode, encoding=encoding)
+        self.mode = mode
+
+    def __enter__(self):
+        exclusive = self.mode != 'r'
+        try:
+            _lock_file(self.f, exclusive)
+        except IOError:
+            self.f.close()
+            raise
+        return self
+
+    def __exit__(self, etype, value, traceback):
+        try:
+            _unlock_file(self.f)
+        finally:
+            self.f.close()
+
+    def __iter__(self):
+        return iter(self.f)
+
+    def write(self, *args):
+        return self.f.write(*args)
+
+    def read(self, *args):
+        return self.f.read(*args)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.10.06'
+__version__ = '2013.10.09'
Author	SHA1	Message	Date
Philipp Hagemeister	8f1ae18a18	release 2013.10.09	2013-10-09 23:50:47 +02:00
Philipp Hagemeister	57da92b7df	[youtube] Do not recognize attribution link as user (Fixes #1573 )	2013-10-09 23:50:38 +02:00
Jaime Marquínez Ferrándiz	df4f632dbc	Merge pull request #1584 from wingsuit/master Tiny tpo	2013-10-09 07:44:06 -07:00
Jaime Marquínez Ferrándiz	a34c2faae4	[youtube] set the 'name' parameter in the subtitles url (fixes #1577 )	2013-10-09 16:41:36 +02:00
Tom	1d368c7589	Tiny tpo	2013-10-09 21:56:09 +08:00
Jaime Marquínez Ferrándiz	88bd97e34c	[vevo] Some improvements (fixes #1580 ) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result	2013-10-08 21:25:38 +02:00
Jaime Marquínez Ferrándiz	2ae3edb1cf	Fix the printing of the proxy map in debug mode The proxies have to be extracted from the opener.handlers	2013-10-07 21:10:31 +02:00
Philipp Hagemeister	b2ad967e45	Simplify test setup	2013-10-07 19:06:36 +02:00
Philipp Hagemeister	a27b9e8bd5	Move opener setup into a separate helper function	2013-10-07 19:01:47 +02:00
Philipp Hagemeister	4481a754e4	release 2013.10.07	2013-10-07 14:34:19 +02:00
Philipp Hagemeister	faa6ef6bc8	[jeuxvideo] Improve code quality (fixes #1567 )	2013-10-07 14:33:23 +02:00
Philipp Hagemeister	15870e90b0	Restore warning when user forgets to quote URL (#1396 )	2013-10-07 12:21:24 +02:00
Jaime Marquínez Ferrándiz	387ae5f30b	[vimeo] Recognize urls ending in a slash (fixes #1242 )	2013-10-06 21:56:23 +02:00
Philipp Hagemeister	1310bf2474	[redtube] add age_limit	2013-10-06 16:39:35 +02:00
Philipp Hagemeister	b24f347190	Merge branch 'download-archive' Conflicts: youtube_dl/YoutubeDL.py youtube_dl/__init__.py	2013-10-06 16:30:26 +02:00
Philipp Hagemeister	ee6c9f95e1	Remove superfluous parenthesis	2013-10-06 16:28:36 +02:00
Philipp Hagemeister	2a69c6b879	Merge branch 'age_limit'	2013-10-06 16:23:18 +02:00
Philipp Hagemeister	cfadd183c4	Call extracted property age_limit everywhere	2013-10-06 16:23:06 +02:00
Philipp Hagemeister	e484c81f0c	[generic] Clarify error messages	2013-10-06 16:03:18 +02:00
Philipp Hagemeister	8dbe9899a9	Allow users to specify an age limit (fixes #1545 ) With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS . Add rudimentary support in youtube, pornotube, and youporn.	2013-10-06 06:08:56 +02:00
Philipp Hagemeister	c1c9a79c49	Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.	2013-10-06 04:27:10 +02:00