Compare commits
15 Commits
2014.10.15
...
2014.10.18
Author | SHA1 | Date | |
---|---|---|---|
![]() |
0f175a932f | ||
![]() |
849b269273 | ||
![]() |
95fa5fb569 | ||
![]() |
77c3c5c5ed | ||
![]() |
159444a668 | ||
![]() |
f9befee1f5 | ||
![]() |
9471c44405 | ||
![]() |
013bfdd84c | ||
![]() |
46fd0dd5a5 | ||
![]() |
4698f0d858 | ||
![]() |
355d074ff9 | ||
![]() |
7da224c907 | ||
![]() |
1723edb1a5 | ||
![]() |
4740864508 | ||
![]() |
09a42738fc |
@@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE
|
|||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
@@ -316,6 +315,7 @@ from .sbs import SBSIE
|
|||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
|
from .sexykarma import SexyKarmaIE
|
||||||
from .shared import SharedIE
|
from .shared import SharedIE
|
||||||
from .sharesix import ShareSixIE
|
from .sharesix import ShareSixIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
@@ -395,6 +395,7 @@ from .tutv import TutvIE
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE
|
from .tvp import TvpIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
|
from .twitch import TwitchIE
|
||||||
from .ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
|
@@ -160,6 +160,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
format_id = '%s-%s' % (quality, format_info['versionCode'])
|
format_id = '%s-%s' % (quality, format_info['versionCode'])
|
||||||
else:
|
else:
|
||||||
format_id = quality
|
format_id = quality
|
||||||
|
media_type = format_info.get('mediaType')
|
||||||
|
if media_type is not None:
|
||||||
|
format_id += '-%s' % media_type
|
||||||
info = {
|
info = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format_note': format_info.get('versionLibelle'),
|
'format_note': format_info.get('versionLibelle'),
|
||||||
|
@@ -609,13 +609,13 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
player_url = unescapeHTML(mobj.group('url'))
|
player_url = unescapeHTML(mobj.group('url'))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl)
|
||||||
|
|
||||||
# Look for embedded (swf embed) Vimeo player
|
# Look for embedded (swf embed) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result(mobj.group(1), 'Vimeo')
|
return self.url_result(mobj.group(1))
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
matches = re.findall(r'''(?x)
|
matches = re.findall(r'''(?x)
|
||||||
@@ -654,15 +654,17 @@ class GenericIE(InfoExtractor):
|
|||||||
match = re.search(
|
match = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||||
if match:
|
if match:
|
||||||
|
embed_url = self._proto_relative_url(
|
||||||
|
unescapeHTML(match.group('url')))
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': unescapeHTML(match.group('url')),
|
'url': embed_url,
|
||||||
'ie_key': 'Wistia',
|
'ie_key': 'Wistia',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
if match:
|
if match:
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
@@ -1,155 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import itertools
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
compat_str,
|
|
||||||
ExtractorError,
|
|
||||||
formatSeconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JustinTVIE(InfoExtractor):
|
|
||||||
"""Information extractor for justin.tv and twitch.tv"""
|
|
||||||
# TODO: One broadcast may be split into multiple videos. The key
|
|
||||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
|
||||||
# starts at 1 and increases. Can we treat all parts as one video?
|
|
||||||
|
|
||||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
|
|
||||||
(?:
|
|
||||||
(?P<channelid>[^/]+)|
|
|
||||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
|
||||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
|
||||||
)
|
|
||||||
/?(?:\#.*)?$
|
|
||||||
"""
|
|
||||||
_JUSTIN_PAGE_LIMIT = 100
|
|
||||||
IE_NAME = 'justin.tv'
|
|
||||||
IE_DESC = 'justin.tv and twitch.tv'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
|
|
||||||
'md5': 'ecaa8a790c22a40770901460af191c9a',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '296128360',
|
|
||||||
'ext': 'flv',
|
|
||||||
'upload_date': '20110927',
|
|
||||||
'uploader_id': 25114803,
|
|
||||||
'uploader': 'thegamedevhub',
|
|
||||||
'title': 'Beginner Series - Scripting With Python Pt.1'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Return count of items, list of *valid* items
|
|
||||||
def _parse_page(self, url, video_id, counter):
|
|
||||||
info_json = self._download_webpage(
|
|
||||||
url, video_id,
|
|
||||||
'Downloading video info JSON on page %d' % counter,
|
|
||||||
'Unable to download video info JSON %d' % counter)
|
|
||||||
|
|
||||||
response = json.loads(info_json)
|
|
||||||
if type(response) != list:
|
|
||||||
error_text = response.get('error', 'unknown error')
|
|
||||||
raise ExtractorError('Justin.tv API: %s' % error_text)
|
|
||||||
info = []
|
|
||||||
for clip in response:
|
|
||||||
video_url = clip['video_file_url']
|
|
||||||
if video_url:
|
|
||||||
video_extension = os.path.splitext(video_url)[1][1:]
|
|
||||||
video_date = re.sub('-', '', clip['start_time'][:10])
|
|
||||||
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
|
|
||||||
video_id = clip['id']
|
|
||||||
video_title = clip.get('title', video_id)
|
|
||||||
info.append({
|
|
||||||
'id': compat_str(video_id),
|
|
||||||
'url': video_url,
|
|
||||||
'title': video_title,
|
|
||||||
'uploader': clip.get('channel_name', video_uploader_id),
|
|
||||||
'uploader_id': video_uploader_id,
|
|
||||||
'upload_date': video_date,
|
|
||||||
'ext': video_extension,
|
|
||||||
})
|
|
||||||
return (len(response), info)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
api_base = 'http://api.justin.tv'
|
|
||||||
paged = False
|
|
||||||
if mobj.group('channelid'):
|
|
||||||
paged = True
|
|
||||||
video_id = mobj.group('channelid')
|
|
||||||
api = api_base + '/channel/archives/%s.json' % video_id
|
|
||||||
elif mobj.group('chapterid'):
|
|
||||||
chapter_id = mobj.group('chapterid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, chapter_id)
|
|
||||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
|
||||||
if not m:
|
|
||||||
raise ExtractorError('Cannot find archive of a chapter')
|
|
||||||
archive_id = m.group(1)
|
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
|
||||||
doc = self._download_xml(
|
|
||||||
api, chapter_id,
|
|
||||||
note='Downloading chapter information',
|
|
||||||
errnote='Chapter information download failed')
|
|
||||||
for a in doc.findall('.//archive'):
|
|
||||||
if archive_id == a.find('./id').text:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Could not find chapter in chapter information')
|
|
||||||
|
|
||||||
video_url = a.find('./video_file_url').text
|
|
||||||
video_ext = video_url.rpartition('.')[2] or 'flv'
|
|
||||||
|
|
||||||
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
|
||||||
chapter_info = self._download_json(
|
|
||||||
chapter_api_url, 'c' + chapter_id,
|
|
||||||
note='Downloading chapter metadata',
|
|
||||||
errnote='Download of chapter metadata failed')
|
|
||||||
|
|
||||||
bracket_start = int(doc.find('.//bracket_start').text)
|
|
||||||
bracket_end = int(doc.find('.//bracket_end').text)
|
|
||||||
|
|
||||||
# TODO determine start (and probably fix up file)
|
|
||||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
|
||||||
#video_url += '?start=' + TODO:start_timestamp
|
|
||||||
# bracket_start is 13290, but we want 51670615
|
|
||||||
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
|
||||||
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': 'c' + chapter_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': video_ext,
|
|
||||||
'title': chapter_info['title'],
|
|
||||||
'thumbnail': chapter_info['preview'],
|
|
||||||
'description': chapter_info['description'],
|
|
||||||
'uploader': chapter_info['channel']['display_name'],
|
|
||||||
'uploader_id': chapter_info['channel']['name'],
|
|
||||||
}
|
|
||||||
return info
|
|
||||||
else:
|
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
offset = 0
|
|
||||||
limit = self._JUSTIN_PAGE_LIMIT
|
|
||||||
for counter in itertools.count(1):
|
|
||||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
|
||||||
page_count, page_info = self._parse_page(
|
|
||||||
page_url, video_id, counter)
|
|
||||||
entries.extend(page_info)
|
|
||||||
if not paged or page_count != limit:
|
|
||||||
break
|
|
||||||
offset += limit
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': video_id,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
106
youtube_dl/extractor/sexykarma.py
Normal file
106
youtube_dl/extractor/sexykarma.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SexyKarmaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
|
||||||
|
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yHI70cOyIHt',
|
||||||
|
'display_id': 'taking-a-quick-pee',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Taking a quick pee.',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'wildginger7',
|
||||||
|
'upload_date': '20141007',
|
||||||
|
'duration': 81,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||||
|
'md5': 'dd216c68d29b49b12842b9babe762a5d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8Id6EZPbuHf',
|
||||||
|
'display_id': 'pot-pixie-tribute',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pot_pixie tribute',
|
||||||
|
'description': 'tribute',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'banffite',
|
||||||
|
'upload_date': '20141013',
|
||||||
|
'duration': 16,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<p>Save this video to your computer: </p><p><a href="([^"]+)"',
|
||||||
|
webpage, 'url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h2 class="he2"><span>(.*?)</span>',
|
||||||
|
webpage, 'title')
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description', fatal=False, default='')
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<span id="container"><img\s+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'class="aupa">\s*(.*?)</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||||
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories',
|
||||||
|
fatal=False, default='').split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
187
youtube_dl/extractor/twitch.py
Normal file
187
youtube_dl/extractor/twitch.py
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchIE(InfoExtractor):
|
||||||
|
# TODO: One broadcast may be split into multiple videos. The key
|
||||||
|
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
||||||
|
# starts at 1 and increases. Can we treat all parts as one video?
|
||||||
|
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
|
||||||
|
(?:
|
||||||
|
(?P<channelid>[^/]+)|
|
||||||
|
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
||||||
|
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
||||||
|
)
|
||||||
|
/?(?:\#.*)?$
|
||||||
|
"""
|
||||||
|
_PAGE_LIMIT = 100
|
||||||
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.twitch.tv/riotgames/b/577357806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a577357806',
|
||||||
|
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.twitch.tv/acracingleague/c/5285812',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c5285812',
|
||||||
|
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.twitch.tv/vanillatv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vanillatv',
|
||||||
|
'title': 'VanillaTV',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 412,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _handle_error(self, response):
|
||||||
|
if not isinstance(response, dict):
|
||||||
|
return
|
||||||
|
error = response.get('error')
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||||
|
response = super(TwitchIE, self)._download_json(url, video_id, note)
|
||||||
|
self._handle_error(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def _extract_media(self, item, item_id):
|
||||||
|
ITEMS = {
|
||||||
|
'a': 'video',
|
||||||
|
'c': 'chapter',
|
||||||
|
}
|
||||||
|
info = self._extract_info(self._download_json(
|
||||||
|
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||||
|
'Downloading %s info JSON' % ITEMS[item]))
|
||||||
|
response = self._download_json(
|
||||||
|
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||||
|
'Downloading %s playlist JSON' % ITEMS[item])
|
||||||
|
entries = []
|
||||||
|
chunks = response['chunks']
|
||||||
|
qualities = list(chunks.keys())
|
||||||
|
for num, fragment in enumerate(zip(*chunks.values()), start=1):
|
||||||
|
formats = []
|
||||||
|
for fmt_num, fragment_fmt in enumerate(fragment):
|
||||||
|
format_id = qualities[fmt_num]
|
||||||
|
fmt = {
|
||||||
|
'url': fragment_fmt['url'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': 1 if format_id == 'live' else 0,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
|
||||||
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entry = dict(info)
|
||||||
|
entry['id'] = '%s_%d' % (entry['id'], num)
|
||||||
|
entry['title'] = '%s part %d' % (entry['title'], num)
|
||||||
|
entry['formats'] = formats
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(entries, info['id'], info['title'])
|
||||||
|
|
||||||
|
def _extract_info(self, info):
|
||||||
|
return {
|
||||||
|
'id': info['_id'],
|
||||||
|
'title': info['title'],
|
||||||
|
'description': info['description'],
|
||||||
|
'duration': info['length'],
|
||||||
|
'thumbnail': info['preview'],
|
||||||
|
'uploader': info['channel']['display_name'],
|
||||||
|
'uploader_id': info['channel']['name'],
|
||||||
|
'timestamp': parse_iso8601(info['recorded_at']),
|
||||||
|
'view_count': info['views'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj.group('chapterid'):
|
||||||
|
return self._extract_media('c', mobj.group('chapterid'))
|
||||||
|
|
||||||
|
"""
|
||||||
|
webpage = self._download_webpage(url, chapter_id)
|
||||||
|
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError('Cannot find archive of a chapter')
|
||||||
|
archive_id = m.group(1)
|
||||||
|
|
||||||
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
|
doc = self._download_xml(
|
||||||
|
api, chapter_id,
|
||||||
|
note='Downloading chapter information',
|
||||||
|
errnote='Chapter information download failed')
|
||||||
|
for a in doc.findall('.//archive'):
|
||||||
|
if archive_id == a.find('./id').text:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Could not find chapter in chapter information')
|
||||||
|
|
||||||
|
video_url = a.find('./video_file_url').text
|
||||||
|
video_ext = video_url.rpartition('.')[2] or 'flv'
|
||||||
|
|
||||||
|
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
||||||
|
chapter_info = self._download_json(
|
||||||
|
chapter_api_url, 'c' + chapter_id,
|
||||||
|
note='Downloading chapter metadata',
|
||||||
|
errnote='Download of chapter metadata failed')
|
||||||
|
|
||||||
|
bracket_start = int(doc.find('.//bracket_start').text)
|
||||||
|
bracket_end = int(doc.find('.//bracket_end').text)
|
||||||
|
|
||||||
|
# TODO determine start (and probably fix up file)
|
||||||
|
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
||||||
|
#video_url += '?start=' + TODO:start_timestamp
|
||||||
|
# bracket_start is 13290, but we want 51670615
|
||||||
|
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
||||||
|
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': 'c' + chapter_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': video_ext,
|
||||||
|
'title': chapter_info['title'],
|
||||||
|
'thumbnail': chapter_info['preview'],
|
||||||
|
'description': chapter_info['description'],
|
||||||
|
'uploader': chapter_info['channel']['display_name'],
|
||||||
|
'uploader_id': chapter_info['channel']['name'],
|
||||||
|
}
|
||||||
|
return info
|
||||||
|
"""
|
||||||
|
elif mobj.group('videoid'):
|
||||||
|
return self._extract_media('a', mobj.group('videoid'))
|
||||||
|
elif mobj.group('channelid'):
|
||||||
|
channel_id = mobj.group('channelid')
|
||||||
|
info = self._download_json(
|
||||||
|
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
|
||||||
|
channel_id, 'Downloading channel info JSON')
|
||||||
|
channel_name = info.get('display_name') or info.get('name')
|
||||||
|
entries = []
|
||||||
|
offset = 0
|
||||||
|
limit = self._PAGE_LIMIT
|
||||||
|
for counter in itertools.count(1):
|
||||||
|
response = self._download_json(
|
||||||
|
'%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
|
||||||
|
% (self._API_BASE, channel_id, offset, limit),
|
||||||
|
channel_id, 'Downloading channel videos JSON page %d' % counter)
|
||||||
|
videos = response['videos']
|
||||||
|
if not videos:
|
||||||
|
break
|
||||||
|
entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
|
||||||
|
offset += limit
|
||||||
|
return self.playlist_result(entries, channel_id, channel_name)
|
@@ -157,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'duration': 62,
|
'duration': 62,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
|
||||||
|
'url': 'https://player.vimeo.com/video/98044508',
|
||||||
|
'note': 'The js code contains assignments to the same variable as the config',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '98044508',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pier Solar OUYA Official Trailer',
|
||||||
|
'uploader': 'Tulio Gonçalves',
|
||||||
|
'uploader_id': 'user28849593',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _verify_video_password(self, url, video_id, webpage):
|
def _verify_video_password(self, url, video_id, webpage):
|
||||||
@@ -244,7 +256,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
# We try to find out to which variable is assigned the config dic
|
# We try to find out to which variable is assigned the config dic
|
||||||
m_variable_name = re.search('(\w)\.video\.id', webpage)
|
m_variable_name = re.search('(\w)\.video\.id', webpage)
|
||||||
if m_variable_name is not None:
|
if m_variable_name is not None:
|
||||||
config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
|
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
||||||
else:
|
else:
|
||||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||||
config = self._search_regex(config_re, webpage, 'info section',
|
config = self._search_regex(config_re, webpage, 'info section',
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.10.15'
|
__version__ = '2014.10.18'
|
||||||
|
Reference in New Issue
Block a user