release 2013.09.20.1

Add an extractor for ebaumsworld.com (closes #1462 )
[youtube] add algo for length 91
2013-09-20 22:59:14 +02:00 · 2013-09-20 16:55:50 +02:00 · 2013-09-20 14:43:16 +02:00 · 2013-09-20 13:26:03 +02:00 · 2013-09-20 13:05:34 +02:00 · 2013-09-20 10:24:48 +02:00
14 changed files with 203 additions and 28 deletions
--- a/README.md
+++ b/README.md
@@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like.
    -U, --update               update this program to latest version. Make sure
                               that you have sufficient permissions (run with
                               sudo if needed)
-    -i, --ignore-errors        continue on download errors
+    -i, --ignore-errors        continue on download errors, for example to to
+                               skip unavailable videos in a playlist
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
    --referer REF              specify a custom referer, use if the video access
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -1,13 +1,20 @@
 #!/usr/bin/env python
+# encoding: utf-8

 # Generate youtube signature algorithm from test cases

 import sys

 tests = [
+    # 93 - vfl79wBKW 2013/07/20
+    (u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"€",
+     u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"),
    # 92 - vflQw-fB4 2013/07/17
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
     "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
+    # 91 - vfl79wBKW 2013/07/20 (sporadic)
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
+     "/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
    # 90
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
     "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -544,11 +544,11 @@ class YoutubeDL(object):
            else:
                try:
                    success = self.fd._do_download(filename, info_dict)
-                except (OSError, IOError) as err:
-                    raise UnavailableVideoError(err)
                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                    self.report_error(u'unable to download video data: %s' % str(err))
                    return
+                except (OSError, IOError) as err:
+                    raise UnavailableVideoError(err)
                except (ContentTooShortError, ) as err:
                    self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                    return
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -30,6 +30,7 @@ __authors__  = (
    'Pierre Rudloff',
    'Huarong Huo',
    'Ismael Mejía',
+    'Steffan \'Ruirize\' James',
 )

 __license__ = 'Public Domain'
@@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('-U', '--update',
            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
    general.add_option('--dump-user-agent',
            action='store_true', dest='dump_user_agent',
            help='display the current browser identification', default=False)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -24,11 +24,16 @@ from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
+from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
 from .exfm import ExfmIE
 from .facebook import FacebookIE
+from .fktv import (
+    FKTVIE,
+    FKTVPosteckeIE,
+)
 from .flickr import FlickrIE
 from .francetv import (
    PluzzIE,
@@ -68,6 +73,7 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
+from .newgrounds import NewgroundsIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -0,0 +1,37 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class EbaumsWorldIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
+        u'file': u'83367677.mp4',
+        u'info_dict': {
+            u'title': u'A Giant Python Opens The Door',
+            u'description': u'This is how nightmares start...',
+            u'uploader': u'jihadpizza',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        config_xml = self._download_webpage(
+            'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
+        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+        video_url = config.find('file').text
+
+        return {
+            'id': video_id,
+            'title': config.find('title').text,
+            'url': video_url,
+            'ext': determine_ext(video_url),
+            'description': config.find('description').text,
+            'thumbnail': config.find('image').text,
+            'uploader': config.find('username').text,
+        }
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -0,0 +1,79 @@
+import re
+import random
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    get_element_by_id,
+    clean_html,
+)
+
+
+class FKTVIE(InfoExtractor):
+    IE_NAME = u'fernsehkritik.tv'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+
+    _TEST = {
+        u'url': u'http://fernsehkritik.tv/folge-1',
+        u'file': u'00011.flv',
+        u'info_dict': {
+            u'title': u'Folge 1 vom 10. April 2007',
+            u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = int(mobj.group('ep'))
+
+        server = random.randint(2, 4)
+        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
+        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+            episode)
+        playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
+            u'playlist', flags=re.DOTALL)
+        files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
+        # TODO: return a single multipart video
+        videos = []
+        for i, _ in enumerate(files, 1):
+            video_id = '%04d%d' % (episode, i)
+            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+            video_title = 'Fernsehkritik %d.%d' % (episode, i)
+            videos.append({
+                'id': video_id,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'title': clean_html(get_element_by_id('eptitle', start_webpage)),
+                'description': clean_html(get_element_by_id('contentlist', start_webpage)),
+                'thumbnail': video_thumbnail
+            })
+        return videos
+
+
+class FKTVPosteckeIE(InfoExtractor):
+    IE_NAME = u'fernsehkritik.tv:postecke'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
+    _TEST = {
+        u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
+        u'file': u'0120.flv',
+        u'md5': u'262f0adbac80317412f7e57b4808e5c4',
+        u'info_dict': {
+            u"title": u"Postecke 120"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = int(mobj.group('ep'))
+
+        server = random.randint(2, 4)
+        video_id = '%04d' % episode
+        video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
+        video_title = 'Postecke %d' % episode
+        return {
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      determine_ext(video_url),
+            'title':    video_title,
+        }
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
    IE_NAME = u'pluzz.francetv.fr'
    _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'

-    _TEST = {
-        u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
-        u'file': u'88439064.mp4',
-        u'info_dict': {
-            u'title': u'Allô Rufo',
-            u'description': u'md5:d909f1ebdf963814b65772aea250400e',
-        },
-        u'params': {
-            u'skip_download': True,
-        },
-    }
+    # Can't use tests, videos expire in 7 days

    def _real_extract(self, url):
        title = re.match(self._VALID_URL, url).group(1)
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

-        video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
+        video_url = self._search_regex(
+            [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
            webpage, u'video URL', flags=re.DOTALL)

        info = {
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@@ -7,11 +7,11 @@ from .common import InfoExtractor
 class HotNewHipHopIE(InfoExtractor):
    _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
    _TEST = {
-        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
+        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
        u'file': u'1435540.mp3',
        u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
        u'info_dict': {
-            u"title": u"Freddie Gibbs Songs - Lay It Down"
+            u"title": u"Freddie Gibbs - Lay It Down"
        }
    }

--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -0,0 +1,38 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class NewgroundsIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
+    _TEST = {
+        u'url': u'http://www.newgrounds.com/audio/listen/549479',
+        u'file': u'549479.mp3',
+        u'md5': u'fe6033d297591288fa1c1f780386f07a',
+        u'info_dict': {
+            u"title": u"B7 - BusMode",
+            u"uploader": u"Burn7",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        music_id = mobj.group('id')
+        webpage = self._download_webpage(url, music_id)
+        
+        title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
+        uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
+        
+        music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
+        music_url_json = json.loads(music_url_json_string)
+        music_url = music_url_json['url']
+
+        return {
+            'id':       music_id,
+            'title':    title,
+            'url':      music_url,
+            'uploader': uploader,
+            'ext':      determine_ext(music_url),
+        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -11,8 +11,8 @@ from ..utils import (

 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
-    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
-    _TEST = {
+    _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _TESTS = [{
        u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
        u'file': u'1509445.flv',
        u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
            u"uploader_id": u"Ruseful2011", 
            u"title": u"FemaleAgent Shy beauty takes the bait"
        }
-    }
+    },
+    {
+        u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        u'file': u'2221348.flv',
+        u'md5': u'e767b9475de189320f691f49c679c4c7',
+        u'info_dict': {
+            u"upload_date": u"20130914", 
+            u"uploader_id": u"jojo747400", 
+            u"title": u"Britney Spears  Sexy Booty"
+        }
+    }]

    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')
-        mrss_url = 'http://xhamster.com/movies/%s/.html?hd' % video_id
+        seo = mobj.group('seo')
+        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
        webpage = self._download_webpage(mrss_url, video_id)

        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -416,8 +416,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _decrypt_signature(self, s):
        """Turn the encrypted s field into a working signature"""

-        if len(s) == 92:
+        if len(s) == 93:
+            return s[86:29:-1] + s[88] + s[28:5:-1]
+        elif len(s) == 92:
            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+        elif len(s) == 91:
+            return s[84:27:-1] + s[86] + s[26:5:-1]
        elif len(s) == 90:
            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
        elif len(s) == 89:
@@ -783,10 +787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                        if self._downloader.params.get('verbose'):
                            s = url_data['s'][0]
                            if age_gate:
-                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
-                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
-                                    'flash player', fatal=False)
-                                player = 'flash player %s' % player_version
+                                player = 'flash player'
                            else:
                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                    'html5 player', fatal=False)
@@ -1008,6 +1009,9 @@ class YoutubeUserIE(InfoExtractor):
                response = json.loads(page)
            except ValueError as err:
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+            if 'entry' not in response['feed']:
+                # Number of videos is a multiple of self._MAX_RESULTS
+                break

            # Extract video identifiers
            ids_in_page = []
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.09.17'
+__version__ = '2013.09.20.1'
Author	SHA1	Message	Date
Philipp Hagemeister	58f289d013	release 2013.09.20.1	2013-09-20 22:59:14 +02:00
Jaime Marquínez Ferrándiz	3d60bb96e1	Add an extractor for ebaumsworld.com (closes #1462 )	2013-09-20 16:55:50 +02:00
Jaime Marquínez Ferrándiz	38d025b3f0	[youtube] add algo for length 91	2013-09-20 14:43:16 +02:00
Jaime Marquínez Ferrándiz	c40c6aaaaa	Catch socket.error before IOError Since python 2.6 it's a child class.	2013-09-20 13:26:03 +02:00
Jaime Marquínez Ferrándiz	1a810f0d4e	[funnyordie] Fix video url extraction	2013-09-20 13:05:34 +02:00
Philipp Hagemeister	63037593c0	release 2013.09.20	2013-09-20 10:24:48 +02:00
Jaime Marquínez Ferrándiz	7a878d47fa	Merge pull request #1464 from patrickslin/patch-7 Unable to decrypt signature length 93 (fixes #1461)	2013-09-20 08:25:10 +02:00
patrickslin	bc4b900898	Unable to decrypt signature length 93 (fixes #1461 )	2013-09-19 21:49:06 -07:00
Jaime Marquínez Ferrándiz	c5e743f66f	[fktv] support videos splitted in any number of parts and some style changes	2013-09-18 23:32:37 +02:00
Jaime Marquínez Ferrándiz	6c36d8d6fb	Merge pull request #1438 from rzhxeo/fktv Add support for http://fernsehkritik.tv	2013-09-18 23:05:56 +02:00
Jaime Marquínez Ferrándiz	71c82637e7	[youtube] apply the fix for lists with number of videos multiple of _MAX_RESULTS to user extraction Copied from the playlist extractor.	2013-09-18 23:00:32 +02:00
Philipp Hagemeister	2dad310e2c	Credit @Ruirize for newgrounds	2013-09-18 22:30:22 +02:00
Philipp Hagemeister	d0ae9e3a8d	[newgrounds] simplify	2013-09-18 22:14:43 +02:00
Ruirize	a19413c311	Changed file hash.	2013-09-18 17:17:12 +01:00
Ruirize	1ef80b55dd	Fixes test fail Was unaware of --id being passed to test.	2013-09-18 16:23:38 +01:00
Ruirize	eb03f4dad3	Added Newgrounds support	2013-09-18 15:54:45 +01:00
Philipp Hagemeister	830dd1944a	Clarify -i help (#1453 )	2013-09-18 13:23:04 +02:00
rzhxeo	1237c9a3a5	XHamsterIE: Fix support for new HD video url format and add test (closes PR #1443 )	2013-09-17 23:08:01 +02:00
Jaime Marquínez Ferrándiz	5d13df79a5	[francetv] Remove Pluzz test Videos expire in 7 days	2013-09-17 22:49:43 +02:00
Jaime Marquínez Ferrándiz	6523223a4c	[hotnewhiphop] Fix test case title	2013-09-17 21:10:57 +02:00
Jaime Marquínez Ferrándiz	4a67aafb7e	[youtube] Don't search the flash player version for videos with age gate activated	2013-09-17 20:59:55 +02:00
rzhxeo	0761d02b0b	Add FKTV extractor	2013-09-16 14:46:19 +02:00
rzhxeo	71c107fc57	Add FKTV extractor Support for Fernsehkritik-TV (incl. Postecke)	2013-09-16 14:45:14 +02:00