release 2014.03.21.4

[videolectures] (New extractor)
[daum] Modernize
2014-03-21 14:38:55 +01:00 · 2014-03-21 14:38:41 +01:00 · 2014-03-21 14:38:41 +01:00 · 2014-03-21 08:01:20 +01:00 · 2014-03-21 02:10:35 +01:00 · 2014-03-21 02:10:24 +01:00
7 changed files with 161 additions and 18 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -174,6 +174,7 @@ from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
+from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playvid import PlayvidIE
@@ -259,6 +260,7 @@ from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
+from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import (
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -10,9 +10,9 @@ from ..utils import (


 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
    IE_DESC = 'C-SPAN'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
        'md5': '8e44ce11f0f725527daccc453f553eb0',
        'info_dict': {
@@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
            'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
        },
        'skip': 'Regularly fails on travis, for unknown reasons',
-    }
+    }, {
+        'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+        # For whatever reason, the served video alternates between
+        # two different ones
+        #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
+        'info_dict': {
+            'id': '340723',
+            'ext': 'mp4',
+            'title': 'International Health Care Models',
+            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_id = mobj.group('id')
        webpage = self._download_webpage(url, page_id)
-        video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
+        video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')

        description = self._html_search_regex(
            [
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -1,25 +1,28 @@
 # encoding: utf-8
+
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
-    determine_ext,
 )


 class DaumIE(InfoExtractor):
    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
-    IE_NAME = u'daum.net'
+    IE_NAME = 'daum.net'

    _TEST = {
-        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
-        u'file': u'52554690.mp4',
-        u'info_dict': {
-            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'upload_date': u'20130831',
-            u'duration': 3868,
+        'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        'info_dict': {
+            'id': '52554690',
+            'ext': 'mp4',
+            'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'upload_date': '20130831',
+            'duration': 3868,
        },
    }

@@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
        webpage = self._download_webpage(canonical_url, video_id)
        full_id = self._search_regex(
            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
-            webpage, u'full id')
+            webpage, 'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
        info = self._download_xml(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            u'Downloading video info')
+            'Downloading video info')
        urls = self._download_xml(
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')

        self.to_screen(u'%s: Getting video urls' % video_id)
        formats = []
@@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
            format_url = url_doc.find('result/url').text
            formats.append({
                'url': format_url,
-                'ext': determine_ext(format_url),
                'format_id': profile,
            })

--- a/youtube_dl/extractor/parliamentliveuk.py
+++ b/youtube_dl/extractor/parliamentliveuk.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+    IE_NAME = 'parliamentlive.tv'
+    IE_DESC = 'UK parliament videos'
+    _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+        'info_dict': {
+            'id': '15121',
+            'ext': 'asf',
+            'title': 'hoc home affairs committee, 18 mar 2014.pm',
+            'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+        },
+        'params': {
+            'skip_download': True,  # Requires mplayer (mms)
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        asx_url = self._html_search_regex(
+            r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
+            'metadata URL')
+        asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
+        video_url = asx.find('.//REF').attrib['HREF']
+
+        title = self._search_regex(
+            r'''(?x)player\.setClipDetails\(
+                (?:(?:[0-9]+|"[^"]+"),\s*){2}
+                "([^"]+",\s*"[^"]+)"
+                ''',
+            webpage, 'title').replace('", "', ', ')
+        description = self._html_search_regex(
+            r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
+            webpage, 'description')
+
+        return {
+            'id': video_id,
+            'ext': 'asf',
+            'url': video_url,
+            'title': title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/videolecturesnet.py
+++ b/youtube_dl/extractor/videolecturesnet.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+)
+
+
+class VideoLecturesNetIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    IE_NAME = 'videolectures.net'
+
+    _TEST = {
+        'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+        'info_dict': {
+            'id': 'promogram_igor_mekjavic_eng',
+            'ext': 'mp4',
+            'title': 'Automatics, robotics and biocybernetics',
+            'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+            'upload_date': '20130627',
+            'duration': 565,
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
+        smil = self._download_xml(smil_url, video_id)
+
+        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
+        description = find_xpath_attr(smil, './/meta', 'name', 'abstract').attrib['content']
+        upload_date = unified_strdate(
+            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+
+        switch = smil.find('.//switch')
+        duration = parse_duration(switch.attrib.get('dur'))
+        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
+        thumbnail = (
+            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+
+        formats = [{
+            'url': v.attrib['src'],
+            'width': int_or_none(v.attrib.get('width')),
+            'height': int_or_none(v.attrib.get('height')),
+            'filesize': int_or_none(v.attrib.get('size')),
+            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+            'ext': v.attrib.get('ext'),
+        } for v in switch.findall('./video')
+            if v.attrib.get('proto') == 'http']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -763,6 +763,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):

 def unified_strdate(date_str):
    """Return a string with the date in the format YYYYMMDD"""
+
+    if date_str is None:
+        return None
+
    upload_date = None
    #Replace commas
    date_str = date_str.replace(',', ' ')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.03.21.1'
+__version__ = '2014.03.21.4'
Author	SHA1	Message	Date
Philipp Hagemeister	3e50c29984	release 2014.03.21.4	2014-03-21 14:38:55 +01:00
Philipp Hagemeister	64e7ad6045	[videolectures] (New extractor)	2014-03-21 14:38:41 +01:00
Philipp Hagemeister	23f4a93bb4	[daum] Modernize	2014-03-21 14:38:41 +01:00
Jaime Marquínez Ferrándiz	6f13b055f1	[cspan] Fix typo in a comment	2014-03-21 08:01:20 +01:00
Philipp Hagemeister	1f91bd15c3	release 2014.03.21.3	2014-03-21 02:10:35 +01:00
Philipp Hagemeister	11a15be4ce	[cspan] Add support for newer videos (Fixes #2577 )	2014-03-21 02:10:24 +01:00
Philipp Hagemeister	14e17e18cb	release 2014.03.21.2	2014-03-21 01:42:45 +01:00
Philipp Hagemeister	1b124d1942	[parliamentliveuk] Add extractor	2014-03-21 01:42:28 +01:00