release 2015.02.19.3

[nationalgeographic] Add extractor (closes #4960 )
[pornhub] Fix uploader regex
2015-02-19 19:28:17 +01:00 · 2015-02-19 18:17:31 +01:00 · 2015-02-19 22:15:49 +06:00 · 2015-02-19 22:15:19 +06:00 · 2015-02-19 21:47:11 +06:00 · 2015-02-19 15:00:39 +01:00
16 changed files with 155 additions and 40 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites

 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe

 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -68,6 +68,7 @@
 - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
 - **CBS**
 - **CBSNews**: CBS News
+ - **CBSSports**
 - **CeskaTelevize**
 - **channel9**: Channel 9
 - **Chilloutzone**
@@ -264,6 +265,7 @@
 - **myvideo**
 - **MyVidster**
 - **n-tv.de**
+ - **NationalGeographic**
 - **Naver**
 - **NBA**
 - **NBC**
@@ -321,6 +323,7 @@
 - **podomatic**
 - **PornHd**
 - **PornHub**
+ - **PornHubPlaylist**
 - **Pornotube**
 - **PornoXO**
 - **PromptFile**
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -58,6 +58,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
+from .cbssports import CBSSportsIE
 from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
@@ -284,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
+from .nationalgeographic import NationalGeographicIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
@@ -352,7 +354,10 @@ from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
-from .pornhub import PornHubIE
+from .pornhub import (
+    PornHubIE,
+    PornHubPlaylistIE,
+)
 from .pornotube import PornotubeIE
 from .pornoxo import PornoXOIE
 from .promptfile import PromptFileIE
--- a/youtube_dl/extractor/cbssports.py
+++ b/youtube_dl/extractor/cbssports.py
@@ -0,0 +1,30 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CBSSportsIE(InfoExtractor):
+    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
+        'info_dict': {
+            'id': '_d5_GbO8p1sT',
+            'ext': 'flv',
+            'title': 'US Open flashbacks: 1990s',
+            'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        section = mobj.group('section')
+        video_id = mobj.group('id')
+        all_videos = self._download_json(
+            'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
+            video_id)
+        # The json file contains the info of all the videos in the section
+        video_info = next(v for v in all_videos if v['pcid'] == video_id)
+        return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
    IE_NAME = '5min'
    _VALID_URL = r'''(?x)
        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
+            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
            5min:)
        (?P<id>\d+)
        '''
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
            'info_dict': {
                'id': 'Mrj4DVp2zeA',
                'ext': 'mp4',
-                'upload_date': '20150204',
+                'upload_date': '20150212',
                'uploader': 'The National Archives UK',
                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                'uploader_id': 'NationalArchives08',
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -7,11 +7,12 @@ from ..utils import (
    int_or_none,
    js_to_json,
    mimetype2ext,
+    ExtractorError,
 )


 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'

    _TESTS = [{
        'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -21,6 +22,14 @@ class ImgurIE(InfoExtractor):
            'title': 'MRW gifv is up and running without any bugs',
            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
        },
+    }, {
+        'url': 'https://imgur.com/A61SaA1',
+        'info_dict': {
+            'id': 'A61SaA1',
+            'ext': 'mp4',
+            'title': 'MRW gifv is up and running without any bugs',
+            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+        },
    }]

    def _real_extract(self, url):
@@ -34,10 +43,14 @@ class ImgurIE(InfoExtractor):
            r'<param name="height" value="([0-9]+)"',
            webpage, 'height', fatal=False))

-        formats = []
        video_elements = self._search_regex(
            r'(?s)<div class="video-elements">(.*?)</div>',
-            webpage, 'video elements')
+            webpage, 'video elements', default=None)
+        if not video_elements:
+            raise ExtractorError(
+                'No sources found for video %s. Maybe an image?' % video_id,
+                expected=True)
+
        formats = []
        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
            formats.append({
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    url_basename,
+)
+
+
+class NationalGeographicIE(InfoExtractor):
+    _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
+
+    _TEST = {
+        'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+        'info_dict': {
+            'id': '4DmDACA6Qtk_',
+            'ext': 'flv',
+            'title': 'Mating Crabs Busted by Sharks',
+            'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+        },
+        'add_ie': ['ThePlatform'],
+    }
+
+    def _real_extract(self, url):
+        name = url_basename(url)
+
+        webpage = self._download_webpage(url, name)
+        feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
+        guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
+
+        feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
+        content = feed.find('.//{http://search.yahoo.com/mrss/}content')
+        theplatform_id = url_basename(content.attrib.get('url'))
+
+        return self.url_result(smuggle_url(
+            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
+            # For some reason, the normal links don't work and we must force the use of f4m
+            {'force_smil_url': True}))
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):

    _TESTS = [
        {
-            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
            # md5 checksum is not stable
            'info_dict': {
-                'id': 'bTmnLCvIbaaH',
+                'id': 'c9xnCo0YPOPH',
                'ext': 'flv',
-                'title': 'I Am a Firefighter',
-                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
            },
        },
        {
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dl/extractor/netzkino.py
@@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
            'timestamp': 1344858571,
            'age_limit': 12,
        },
+        'params': {
+            'skip_download': 'Download only works from Germany',
+        }
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):

        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
-            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
            webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
        if thumbnail:
@@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
            'formats': formats,
            'age_limit': 18,
        }
+
+
+class PornHubPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.pornhub.com/playlist/6201671',
+        'info_dict': {
+            'id': '6201671',
+            'title': 'P0p4',
+        },
+        'playlist_mincount': 35,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
+            for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
+        ]
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
+            playlist_id)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist.get('title'), playlist.get('description'))
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
            'id': '437BE28B89D799D7',
            'title': 'big_buck_bunny_720p_surround.avi',
            'ext': 'avi',
-            'thumbnail': 're:^http://.*\.jpg$',
        }
    }

@@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
            ''', webpage, 'hash')

        fields = {
-            "hash": confirm_hash,
+            "hash": confirm_hash.encode('utf-8'),
            "confirm": "Continue as Free User"
        }

@@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
            webpage, 'title', default=None)
        thumbnail = self._html_search_regex(
            r'<img\s+src="([^"]*)".+?name="bg"',
-            webpage, 'thumbnail')
+            webpage, 'thumbnail', default=None)

        formats = [{
            'format_id': 'sd',
--- a/youtube_dl/extractor/theonion.py
+++ b/youtube_dl/extractor/theonion.py
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError


 class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
    _TEST = {
        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
        'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        article_id = mobj.group('article_id')
-
-        webpage = self._download_webpage(url, article_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)

        video_id = self._search_regex(
            r'"videoId":\s(\d+),', webpage, 'video ID')
@@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
        thumbnail = self._og_search_thumbnail(webpage)

        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        if not sources:
-            raise ExtractorError(
-                'No sources found for video %s' % video_id, expected=True)
-
        formats = []
        for src, type_ in sources:
            if type_ == 'video/mp4':
@@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                })
            elif type_ == 'application/x-mpegURL':
                formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
            else:
                self.report_warning(
                    'Encountered unexpected format: %s' % type_)
-
        self._sort_formats(formats)

        return {
            'id': video_id,
+            'display_id': display_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
        if not provider_id:
            provider_id = 'dJ5BDC'

-        if mobj.group('config'):
+        if smuggled_data.get('force_smil_url', False):
+            smil_url = url
+        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
        description = self._html_search_meta('description', webpage)
        thumbnail = self._og_search_thumbnail(webpage)

-        story_filename = self._search_regex(
-            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
-        speaker_id = self._search_regex(
-            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
-        story_id = self._search_regex(
-            r'\.storyId\((\d+)\)', webpage, 'story ID')
-        speaker_type = self._search_regex(
-            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
-        great_life = self._search_regex(
-            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+        embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+            r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+            webpage, 'embed params').split(',')]
+
+        (
+            _, speaker_id, story_id, story_duration,
+            speaker_type, great_life, _thumbnail, _has_subtitles,
+            story_filename, _story_order) = embed_params
+
        is_great_life_series = great_life == 'true'
-        duration = int_or_none(self._search_regex(
-            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+        duration = int_or_none(story_duration)

        # URL building, see: http://www.webofstories.com/scripts/player.js
        ms_prefix = ''
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.19.2'
+__version__ = '2015.02.19.3'
Author	SHA1	Message	Date
Philipp Hagemeister	a21420389e	release 2015.02.19.3	2015-02-19 19:28:17 +01:00
Jaime Marquínez Ferrándiz	6140baf4e1	[nationalgeographic] Add extractor (closes #4960 )	2015-02-19 18:17:31 +01:00
Sergey M․	8fc642eb5b	[pornhub] Fix uploader regex	2015-02-19 22:15:49 +06:00
Sergey M․	e66e1a0046	[pornhub] Add support for playlists (Closes #4995 )	2015-02-19 22:15:19 +06:00
Sergey M․	d5c69f1da4	[5min] Cover joystiq.com URLs (Closes #4962 )	2015-02-19 21:47:11 +06:00
Jaime Marquínez Ferrándiz	5c8a3f862a	[nbc] Use a test video that works outside the US	2015-02-19 15:00:39 +01:00
Jaime Marquínez Ferrándiz	a3b9157f49	[cbssports] Add extractor (closes #4996 )	2015-02-19 13:06:53 +01:00
Philipp Hagemeister	b88ba05356	[imgur] Simplify	2015-02-19 05:53:09 +01:00
Philipp Hagemeister	b74d505577	Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements'	2015-02-19 05:16:11 +01:00
John Boehr	9e2d7dca87	[imgur] improve error check for non-video URLs	2015-02-18 19:47:54 -08:00
John Boehr	d236b37ac9	[imgur] improve regex #4998	2015-02-18 19:28:19 -08:00
Philipp Hagemeister	e880c66bd8	[theonion] Modernize	2015-02-19 04:12:40 +01:00
Philipp Hagemeister	383456aa29	[Makefile] Also delete *.avi files in clean	2015-02-19 04:09:52 +01:00
John Boehr	1a13940c8d	[imgur] support regular URL	2015-02-18 18:12:48 -08:00
Philipp Hagemeister	3d54788495	[webofstories] Fix extraction	2015-02-19 02:12:08 +01:00
Philipp Hagemeister	71d53ace2f	[sockshare] Do not require thumbnail anymore Thumbnail is not present on the website anymore.	2015-02-19 02:04:30 +01:00
Philipp Hagemeister	f37e3f99f0	[generic] Correct test case Video has been reuploaded / edited	2015-02-19 02:00:52 +01:00
Philipp Hagemeister	bd03ffc16e	[netzkino] Skip download in test case Works fine from Germany, but fails from everywhere else	2015-02-19 01:58:54 +01:00