release 2013.11.02

[youtube] Encode subtitle track name in request (Fixes #1700 )
Add an extractor for vk.com (closes #1635 )
2013-11-02 11:21:36 +01:00 · 2013-11-02 11:21:05 +01:00 · 2013-11-01 22:34:18 +01:00 · 2013-11-01 11:56:15 +01:00 · 2013-10-31 08:07:26 +01:00 · 2013-10-31 07:55:03 +01:00
8 changed files with 117 additions and 19 deletions
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
        return info_dict
    def getSubtitles(self):
        info_dict = self.getInfoDict()
-        return info_dict[0]['subtitles']
+        return info_dict['subtitles']
    def test_no_writesubtitles(self):
        subtitles = self.getSubtitles()
        self.assertEqual(subtitles, None)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -142,6 +142,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
+from .vk import VKIE
 from .wat import WatIE
 from .websurg import WeBSurgIE
 from .weibo import WeiboIE
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
        """Build a request with the family filter disabled"""
        request = compat_urllib_request.Request(url)
        request.add_header('Cookie', 'family_filter=off')
+        request.add_header('Cookie', 'ff=off')
        return request

 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            },
            u'skip': u'VEVO is only available in some countries',
        },
+        # age-restricted video
+        {
+            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+            u'file': u'xyh2zz.mp4',
+            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
+            u'info_dict': {
+                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+                u'uploader': 'HotWaves1012',
+                u'age_limit': 18,
+            }
+
+        }
    ]

    def _real_extract(self, url):
@@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                             # Looking for official user
                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
-                                            webpage, 'video uploader')
+                                            webpage, 'video uploader', fatal=False)
+        age_limit = self._rta_search(webpage)

        video_upload_date = None
        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            self._list_available_subtitles(video_id)
            return

-        return [{
+        return {
            'id':       video_id,
            'formats': formats,
            'uploader': video_uploader,
            'upload_date':  video_upload_date,
            'title':    self._og_search_title(webpage),
            'subtitles':    video_subtitles,
-            'thumbnail': info['thumbnail_url']
-        }]
+            'thumbnail': info['thumbnail_url'],
+            'age_limit': age_limit,
+        }

    def _get_available_subtitles(self, video_id):
        try:
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):

        if video_id is None:
            # This is an event page:
-            player = get_meta_content('twitter:player', webpage)
-            if player is None:
-                raise ExtractorError('Couldn\'t extract event api url')
-            api_url = player.replace('/player', '')
-            api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
-            info = json.loads(self._download_webpage(api_url, event_name,
-                                                     u'Downloading event info'))
+            config_json = self._search_regex(r'window.config = ({.*?});',
+                webpage, u'window config')
+            info = json.loads(config_json)['event']
            videos = [self._extract_video_info(video_data['data'])
                for video_data in info['feed']['data'] if video_data['type'] == u'video']
            return self.playlist_result(videos, info['id'], info['full_name'])
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor):
    _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
    _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
    IE_NAME = u'metacafe'
-    _TESTS = [{
+    _TESTS = [
+    # Youtube video
+    {
        u"add_ie": ["Youtube"],
        u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
        u"file":  u"_aUehQsCQtM.mp4",
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
            u"uploader_id": u"PBS"
        }
    },
+    # Normal metacafe video
+    {
+        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
+        u'info_dict': {
+            u'id': u'11121940',
+            u'ext': u'mp4',
+            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
+            u'uploader': u'ign',
+            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+        },
+    },
+    # AnyClip video
    {
        u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
        u"file": u"an-dVVXnuY7Jh77J.mp4",
        u"info_dict": {
            u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
            u"uploader": u"anyclip",
-            u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
-        }
-    }]
+            u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+        },
+    },
+    # age-restricted video
+    {
+        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
+        u'info_dict': {
+            u'id': u'5186653',
+            u'ext': u'mp4',
+            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+            u'uploader': u'Dwayne Pipe',
+            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
+            u'age_limit': 18,
+        },
+    },
+    ]


    def report_disclaimer(self):
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
            'submit': "Continue - I'm over 18",
            }
        request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
            self.report_age_confirmation()
            compat_urllib_request.urlopen(request).read()
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):

        # Retrieve video webpage to extract further information
        req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
-        req.headers['Cookie'] = 'flashVersion=0;'
+
+        # AnyClip videos require the flashversion cookie so that we get the link
+        # to the mp4 file
+        mobj_an = re.match(r'^an-(.*?)$', video_id)
+        if mobj_an:
+            req.headers['Cookie'] = 'flashVersion=0;'
        webpage = self._download_webpage(req, video_id)

        # Extract URL, uploader and title from webpage
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)

+        if re.search(r'"contentRating":"restricted"', webpage) is not None:
+            age_limit = 18
+        else:
+            age_limit = 0
+
        return {
            '_type':    'video',
            'id':       video_id,
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
            'upload_date':  None,
            'title':    video_title,
            'ext':      video_ext,
+            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -0,0 +1,45 @@
+# encoding: utf-8
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    unescapeHTML,
+)
+
+
+class VKIE(InfoExtractor):
+    IE_NAME = u'vk.com'
+    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
+
+    _TEST = {
+        u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+        u'md5': u'0deae91935c54e00003c2a00646315f0',
+        u'info_dict': {
+            u'id': u'162222515',
+            u'ext': u'flv',
+            u'title': u'ProtivoGunz - Хуёвая песня',
+            u'uploader': u'Noize MC',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
+        if m_yt is not None:
+            self.to_screen(u'Youtube video detected')
+            return self.url_result(m_yt.group(1), 'Youtube')
+        vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
+        vars = json.loads(vars_json)
+
+        return {
+            'id': compat_str(vars['vid']),
+            'url': vars['url240'],
+            'title': unescapeHTML(vars['md_title']),
+            'thumbnail': vars['jpg'],
+            'uploader': vars['md_author'],
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1111,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'lang': lang,
                'v': video_id,
                'fmt': self._downloader.params.get('subtitlesformat'),
-                'name': l[0],
+                'name': l[0].encode('utf-8'),
            })
            url = u'http://www.youtube.com/api/timedtext?' + params
            sub_lang_list[lang] = url
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.10.30'
+__version__ = '2013.11.02'
Author	SHA1	Message	Date
Philipp Hagemeister	aa2484e390	release 2013.11.02	2013-11-02 11:21:36 +01:00
Philipp Hagemeister	8eddf3e91d	[youtube] Encode subtitle track name in request (Fixes #1700 )	2013-11-02 11:21:05 +01:00
Jaime Marquínez Ferrándiz	60d142aa8d	Add an extractor for vk.com (closes #1635 )	2013-11-01 22:34:18 +01:00
Jaime Marquínez Ferrándiz	66cf3ac342	[metacafe] Fix support for age-restricted videos (fixes #1696 ) The 'Content-Type' header must be set for disabling the family filter. The 'flashversion' cookie is only needed for AnyClip videos. Added tests for standard metacafe videos and for age-restricted videos. Also set the 'age_limit' field.	2013-11-01 11:56:15 +01:00
Jaime Marquínez Ferrándiz	5f1ea943ab	[livestream] fix the extraction of events It now uses a json dictionary from the webpage.	2013-10-31 08:07:26 +01:00
Jaime Marquínez Ferrándiz	0ef7ad5cd4	Fix the test for dailymotion subtitles The extractor returns a single info_dict now.	2013-10-31 07:55:03 +01:00
Philipp Hagemeister	9f1109a564	[dailymotion] Fix support for age-restricted videos (Fixes #1688 )	2013-10-31 00:20:49 +01:00