[comcarcoff] (#4454)

2014-12-12 20:32:02 +01:00
parent 46d9760f5e
commit dc5596ff54
3 changed files with 53 additions and 47 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -48,7 +48,6 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
-from .ccc import ComCarCoffIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@@ -66,6 +65,7 @@ from .cnn import (
 )
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comcarcoff import ComCarCoffIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -1,46 +0,0 @@
-# encoding: utf-8
-import re
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    unified_strdate,
-)
-
-class ComCarCoffIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]+)/?'
-    _TESTS = [
-        {
-            'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
-            'info_dict': {
-                'id': 'miranda-sings-happy-thanksgiving-miranda',
-                'upload_date': '20141127',
-                'title': 'Happy Thanksgiving Miranda',
-                'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
-                'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
-            },
-        }
-    ]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        full_data = json.loads(self._search_regex(
-            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
-            webpage, 'json'))
-
-        video_id = full_data['activeVideo']['video']
-        video_data = full_data['videos'][video_id]
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': video_data['title'],
-            'description': video_data['description'],
-            # XXX: the original datum is a full ISO timestamp... why convert it to a worse format?
-            'upload_date': unified_strdate(video_data['pubDate']),
-            'thumbnail': video_data['images']['thumb'],
-            # XXX: what do we do with video_data['images']['poster']?
-            'formats': self._extract_m3u8_formats(video_data['mediaUrl'], video_id),
-        }
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -0,0 +1,52 @@
+# encoding: utf-8
+import json
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class ComCarCoffIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]+)/?'
+    _TESTS = [{
+        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
+        'info_dict': {
+            'id': 'miranda-sings-happy-thanksgiving-miranda',
+            'ext': 'mp4',
+            'upload_date': '20141127',
+            'timestamp': 1417107600,
+            'title': 'Happy Thanksgiving Miranda',
+            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
+            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
+        },
+        'params': {
+            'skip_download': 'requires ffmpeg',
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        full_data = json.loads(self._search_regex(
+            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
+            webpage, 'full data json'))
+
+        video_id = full_data['activeVideo']['video']
+        video_data = full_data['videos'][video_id]
+        thumbnails = [{
+            'url': video_data['images']['thumb'],
+        }, {
+            'url': video_data['images']['poster'],
+        }]
+        formats = self._extract_m3u8_formats(
+            video_data['mediaUrl'], video_id, ext='mp4')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': video_data['title'],
+            'description': video_data.get('description'),
+            'timestamp': parse_iso8601(video_data.get('pubDate')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }