release 2014.09.04.3

[arte.tv:+7] Add one another one pattern for json vp url
[nba] Modernize
2014-09-04 16:20:17 +02:00 · 2014-09-04 20:44:51 +07:00 · 2014-09-04 20:06:14 +07:00 · 2014-09-04 19:48:29 +07:00 · 2014-09-04 19:37:40 +07:00 · 2014-09-04 19:34:40 +07:00
6 changed files with 66 additions and 38 deletions
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):

    def _extract_from_webpage(self, webpage, video_id, lang):
        json_url = self._html_search_regex(
-            r'arte_vp_url="(.*?)"', webpage, 'json vp url')
+            [r'arte_vp_url="(.*?)"', r'data-url="([^"]+)"'],
+            webpage, 'json vp url')
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import (
+    remove_end,
+    parse_duration,
+)


 class NBAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap.nba',
            'ext': 'mp4',
-            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'duration': 181,
        },
    }

@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'

        shortened_video_id = video_id.rpartition('/')[2]
-        title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
+        title = remove_end(
+            self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
+
+        description = self._og_search_description(webpage)
+        duration = parse_duration(
+            self._html_search_meta('duration', webpage, 'duration', fatal=False))

-        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)

        return {
            'id': shortened_video_id,
            'url': video_url,
            'title': title,
            'description': description,
+            'duration': duration,
        }
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'

    _TEST = {
-        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
-        u'playlist': [
+        'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+        'info_dict': {
+            'id': '57758',
+            'title': 'Learning Topic Models --- Going beyond SVD',
+        },
+        'playlist': [
            {
-                u'file': u'57758.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
            {
-                u'file': u'57758-slides.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758-slides',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
        ],
-        u'params': {
+        'params': {
            # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
        },
    }

@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        talk_id = mobj.group('id')
        webpage = self._download_webpage(url, talk_id)
-        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
-            u'rtmp url')
-        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
-            webpage, u'presenter play path')
+        rtmp_url = self._search_regex(
+            r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
+        play_path = self._search_regex(
+            r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+            webpage, 'presenter play path')
        title = clean_html(get_element_by_attribute('class', 'title', webpage))
        video_info = {
-                'id': talk_id,
-                'title': title,
-                'url': rtmp_url,
-                'play_path': play_path,
-                'ext': 'flv',
-            }
+            'id': talk_id,
+            'title': title,
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+        }
        m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
        if m_slides is None:
            return video_info
        else:
-            return [
-                video_info,
-                # The slides video
-                {
-                    'id': talk_id + '-slides',
-                    'title': title,
-                    'url': rtmp_url,
-                    'play_path': m_slides.group(1),
-                    'ext': 'flv',
-                },
-            ]
+            return {
+                '_type': 'playlist',
+                'id': talk_id,
+                'title': title,
+                'entries': [
+                    video_info,
+                    # The slides video
+                    {
+                        'id': talk_id + '-slides',
+                        'title': title,
+                        'url': rtmp_url,
+                        'play_path': m_slides.group(1),
+                        'ext': 'flv',
+                    },
+                ],
+            }
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
        'info_dict': {
            'id': 'Mikey',
        },
-        'playlist_mincount': 9917,
+        'playlist_mincount': 19,
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
    _TEST = {
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'info_dict': {
+            'title': 'Sinkhole of bureaucracy',
+        },
        'playlist': [{
            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
            'info_dict': {
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.09.04.2'
+__version__ = '2014.09.04.3'
Author	SHA1	Message	Date
Philipp Hagemeister	16e6f396b4	release 2014.09.04.3	2014-09-04 16:20:17 +02:00
Sergey M․	c6ec6b2e8b	[arte.tv:+7] Add one another one pattern for json vp url	2014-09-04 20:44:51 +07:00
Sergey M․	7bbc6428b6	[nba] Modernize	2014-09-04 20:06:14 +07:00
Sergey M․	c1a3c9ddb2	[techtalks] Modernize	2014-09-04 19:48:29 +07:00
Sergey M․	feec0f56f5	[toypics:user] Update test playlist count	2014-09-04 19:37:40 +07:00
Sergey M․	8029857d27	[washingtonpost] Add playlist title to test	2014-09-04 19:34:40 +07:00