Compare commits
68 Commits
2014.06.02
...
2014.06.09
Author | SHA1 | Date | |
---|---|---|---|
![]() |
9706f3f802 | ||
![]() |
d5e944359e | ||
![]() |
826ec77fb2 | ||
![]() |
2656f4eb6a | ||
![]() |
2b88feedf7 | ||
![]() |
23566e0d78 | ||
![]() |
828553b614 | ||
![]() |
3048e82a94 | ||
![]() |
09ffa08ba1 | ||
![]() |
e0b4cc489f | ||
![]() |
15e423407f | ||
![]() |
702e522044 | ||
![]() |
814d4257df | ||
![]() |
23ae281b31 | ||
![]() |
94128d6b0d | ||
![]() |
059009c592 | ||
![]() |
9cc977f104 | ||
![]() |
1c0ade7afa | ||
![]() |
f2741c8d3a | ||
![]() |
6ab8f3584a | ||
![]() |
8ae5ce1726 | ||
![]() |
eb92077720 | ||
![]() |
90e0fd4bad | ||
![]() |
05741e05d9 | ||
![]() |
9aa6637644 | ||
![]() |
d30d28156d | ||
![]() |
be6d722904 | ||
![]() |
d551980823 | ||
![]() |
f0a6c3d2bc | ||
![]() |
4e0fb1280a | ||
![]() |
24f5251cce | ||
![]() |
ac1390eee8 | ||
![]() |
4a5b4d34dc | ||
![]() |
63adb0cc61 | ||
![]() |
3c80377b69 | ||
![]() |
24577db241 | ||
![]() |
566bd96da8 | ||
![]() |
ebdb64d605 | ||
![]() |
a6ffb92f0b | ||
![]() |
3217377b3c | ||
![]() |
24da5893fc | ||
![]() |
087ca2cb07 | ||
![]() |
b4e7447458 | ||
![]() |
a45e6aadd7 | ||
![]() |
70e322695d | ||
![]() |
6a15923b77 | ||
![]() |
7ffad0af5a | ||
![]() |
0e3ae92441 | ||
![]() |
b3ae826f7a | ||
![]() |
dede691aca | ||
![]() |
fb6a5b965b | ||
![]() |
6340716b3a | ||
![]() |
b675b32e6b | ||
![]() |
6a3fa81ffb | ||
![]() |
df53a98f2b | ||
![]() |
db23d8d2a2 | ||
![]() |
0d69795014 | ||
![]() |
3374f3fdc2 | ||
![]() |
4bf0727b1f | ||
![]() |
263bd4ec50 | ||
![]() |
9c7b79acd9 | ||
![]() |
14470ac87b | ||
![]() |
0cdf576d86 | ||
![]() |
4ffeca4ea2 | ||
![]() |
211fd6c674 | ||
![]() |
8ae980807a | ||
![]() |
34d863f3fc | ||
![]() |
3ee4b60d56 |
@@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL
|
||||
|
||||
|
||||
def _download_restricted(url, filename, age):
|
||||
""" Returns true iff the file has been downloaded """
|
||||
""" Returns true if the file has been downloaded """
|
||||
|
||||
params = {
|
||||
'age_limit': age,
|
||||
|
@@ -28,6 +28,7 @@ from youtube_dl.extractor import (
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE,
|
||||
TeacherTubeClassroomIE,
|
||||
LivestreamIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
@@ -360,5 +361,13 @@ class TestPlaylists(unittest.TestCase):
|
||||
result['title'], 'Brace Yourself - Today\'s Weirdest News')
|
||||
self.assertTrue(len(result['entries']) >= 10)
|
||||
|
||||
def test_TeacherTubeClassroom(self):
|
||||
dl = FakeYDL()
|
||||
ie = TeacherTubeClassroomIE(dl)
|
||||
result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'rbhagwati2')
|
||||
self.assertTrue(len(result['entries']) >= 20)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 20)
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
|
@@ -717,6 +717,17 @@ class YoutubeDL(object):
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('width'), t.get('height'), t.get('url')))
|
||||
for t in thumbnails:
|
||||
if 'width' in t and 'height' in t:
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
|
||||
if thumbnails and 'thumbnail' not in info_dict:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
|
@@ -57,6 +57,8 @@ __authors__ = (
|
||||
'Jason Normore',
|
||||
'Hoje Lee',
|
||||
'Adam Thalhammer',
|
||||
'Georg Jähnig',
|
||||
'Ralf Haring',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -25,7 +25,7 @@ class HlsFD(FileDownloader):
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
|
@@ -96,6 +96,7 @@ class RtmpFD(FileDownloader):
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
@@ -105,7 +106,7 @@ class RtmpFD(FileDownloader):
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
@@ -133,6 +134,8 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--conn', entry]
|
||||
elif isinstance(conn, compat_str):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
|
@@ -142,6 +142,7 @@ from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .ku6 import Ku6IE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
@@ -258,6 +259,7 @@ from .southparkstudios import (
|
||||
from .space import SpaceIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
@@ -266,6 +268,12 @@ from .streamcz import StreamCZIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeClassroomIE,
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
@@ -298,6 +306,7 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vh1 import VH1IE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@@ -323,6 +332,7 @@ from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
|
@@ -39,16 +39,18 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
streams = [
|
||||
mo.groupdict()
|
||||
for mo in re.finditer(
|
||||
r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
|
||||
|
||||
media_info = self._download_json(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
||||
# The second element of the _mediaArray contains the standard http urls
|
||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
||||
if not streams:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError('This video is only available after 20:00')
|
||||
@@ -56,21 +58,12 @@ class ARDIE(InfoExtractor):
|
||||
formats = []
|
||||
for s in streams:
|
||||
format = {
|
||||
'quality': int(s['quality']),
|
||||
'quality': s['_quality'],
|
||||
'url': s['_stream'],
|
||||
}
|
||||
if s.get('rtmp_url'):
|
||||
format['protocol'] = 'rtmp'
|
||||
format['url'] = s['rtmp_url']
|
||||
format['playpath'] = s['video_url']
|
||||
else:
|
||||
format['url'] = s['video_url']
|
||||
|
||||
quality_name = self._search_regex(
|
||||
r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
|
||||
'quality name', default='NA')
|
||||
format['format_id'] = '%s-%s-%s-%s' % (
|
||||
determine_ext(format['url']), quality_name, s['media_type'],
|
||||
s['quality'])
|
||||
format['format_id'] = '%s-%s' % (
|
||||
determine_ext(format['url']), format['quality'])
|
||||
|
||||
formats.append(format)
|
||||
|
||||
|
@@ -1,19 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
from .mtv import MTVIE
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = u'cmt.com'
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
|
||||
u'info_dict': {
|
||||
u'id': u'989124',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
u'description': u'Blame It All On My Roots',
|
||||
},
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2',
|
||||
'info_dict': {
|
||||
'id': '989124',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
@@ -79,8 +79,11 @@ class CNNIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
|
||||
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
@@ -93,8 +96,7 @@ class CNNIE(InfoExtractor):
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnails[-1][1],
|
||||
'thumbnails': thumbs_dict,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
|
@@ -92,8 +92,12 @@ class InfoExtractor(object):
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "url"
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
|
@@ -1,39 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.mp4',
|
||||
u'md5': u'9dcfe344732808dbfcc901537973c922',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
|
||||
'md5': '9dcfe344732808dbfcc901537973c922',
|
||||
'info_dict': {
|
||||
'id': '36983',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kaffeeland Schweiz',
|
||||
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20130622'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'width': int(te.attrib['key'].partition('x')[0]),
|
||||
'height': int(te.attrib['key'].partition('x')[2]),
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
|
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
@@ -50,10 +50,13 @@ class FC2IE(InfoExtractor):
|
||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'][0],
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -260,7 +260,24 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Spi0n',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}
|
||||
},
|
||||
# YouTube embed
|
||||
{
|
||||
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
|
||||
'info_dict': {
|
||||
'id': 'FXRb4ykk4S0',
|
||||
'ext': 'mp4',
|
||||
'title': 'The NBL Auction 2014',
|
||||
'uploader': 'BADMINTON England',
|
||||
'uploader_id': 'BADMINTONEvents',
|
||||
'upload_date': '20140603',
|
||||
'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -478,8 +495,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:<iframe[^>]+?src=|embedSWF\(\s*)
|
||||
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
@@ -646,6 +668,14 @@ class GenericIE(InfoExtractor):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
# Look for embedded vulture.com player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
@@ -13,59 +14,55 @@ from ..utils import (
|
||||
|
||||
|
||||
class HypemIE(InfoExtractor):
|
||||
"""Information Extractor for hypem"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
u'file': u'1v6ga.mp3',
|
||||
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
u'info_dict': {
|
||||
u"title": u"Tame"
|
||||
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
'info_dict': {
|
||||
'id': '1v6ga',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tame',
|
||||
'uploader': 'BODYWORK',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
track_id = mobj.group(1)
|
||||
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
|
||||
response, urlh = self._download_webpage_handle(
|
||||
request, track_id, 'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
|
||||
self.report_extraction(track_id)
|
||||
|
||||
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
|
||||
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
|
||||
html_tracks = self._html_search_regex(
|
||||
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
|
||||
response, 'tracks')
|
||||
try:
|
||||
track_list = json.loads(html_tracks)
|
||||
track = track_list[u'tracks'][0]
|
||||
track = track_list['tracks'][0]
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
raise ExtractorError('Hypemachine contained invalid JSON.')
|
||||
|
||||
key = track[u"key"]
|
||||
track_id = track[u"id"]
|
||||
artist = track[u"artist"]
|
||||
title = track[u"song"]
|
||||
key = track['key']
|
||||
track_id = track['id']
|
||||
artist = track['artist']
|
||||
title = track['song']
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
|
||||
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
|
||||
request = compat_urllib_request.Request(
|
||||
serve_url, '', {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
|
||||
try:
|
||||
song_data = json.loads(song_data_json)
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
final_url = song_data[u"url"]
|
||||
song_data = self._download_json(request, track_id, 'Downloading metadata')
|
||||
final_url = song_data["url"]
|
||||
|
||||
return [{
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': "mp3",
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
}]
|
||||
return {
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': 'mp3',
|
||||
'title': title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
35
youtube_dl/extractor/ku6.py
Normal file
35
youtube_dl/extractor/ku6.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Ku6IE(InfoExtractor):
|
||||
_VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
|
||||
_TEST = {
|
||||
'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
|
||||
'md5': '01203549b9efbb45f4b87d55bdea1ed1',
|
||||
'info_dict': {
|
||||
'id': 'JG-8yS14xzBr4bCn1pu0xw',
|
||||
'ext': 'f4v',
|
||||
'title': 'techniques test',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
|
||||
dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
|
||||
jsonData = self._download_json(dataUrl, video_id)
|
||||
downloadUrl = jsonData['data']['f']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': downloadUrl
|
||||
}
|
||||
|
@@ -80,6 +80,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
})
|
||||
except (KeyError, TypeError):
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _get_video_info(self, itemdoc):
|
||||
|
@@ -1,4 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvcast.naver.com/v/81652',
|
||||
u'file': u'81652.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
u'upload_date': u'20130903',
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
'upload_date': '20130903',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
raise ExtractorError(u'couldn\'t extract vid and key')
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
vid = m_id.group(1)
|
||||
key = m_id.group(2)
|
||||
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
|
||||
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
|
||||
})
|
||||
info = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||
video_id, u'Downloading video info')
|
||||
video_id, 'Downloading video info')
|
||||
urls = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||
video_id, u'Downloading video formats info')
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
formats = []
|
||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||
domain = format_el.find('Domain').text
|
||||
if domain.startswith('rtmp'):
|
||||
continue
|
||||
formats.append({
|
||||
f = {
|
||||
'url': domain + format_el.find('uri').text,
|
||||
'ext': 'mp4',
|
||||
'width': int(format_el.find('width').text),
|
||||
'height': int(format_el.find('height').text),
|
||||
})
|
||||
}
|
||||
if domain.startswith('rtmp'):
|
||||
f.update({
|
||||
'ext': 'flv',
|
||||
'rtmp_protocol': '1', # rtmpt
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -6,7 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'md5': 'af01795a31f1cf7265c8657534d8077b',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
@@ -111,9 +111,8 @@ class NRKTVIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', page, 'description')
|
||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
||||
duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = float(duration)
|
||||
duration = float_or_none(
|
||||
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
@@ -3,6 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
@@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'ext': 'mp4',
|
||||
"title": "Horny babes show their awesome bodeis and",
|
||||
"age_limit": 18,
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'upload_date': '20140508',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
murl = url.replace('://www.', '://m.')
|
||||
webpage = self._download_webpage(murl, video_id)
|
||||
formats = []
|
||||
|
||||
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://m.nuvid.com/play/%s' % video_id)
|
||||
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading %s page' % format_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||
title = self._html_search_regex(
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
url_end = self._html_search_regex(
|
||||
r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
|
||||
webpage, 'video_url')
|
||||
video_url = 'http://m.nuvid.com' + url_end
|
||||
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
||||
thumbnail = self._html_search_regex(
|
||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
||||
'formats': formats,
|
||||
}
|
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
@@ -12,6 +11,7 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -44,7 +44,8 @@ class SoundcloudIE(InfoExtractor):
|
||||
"upload_date": "20121011",
|
||||
"description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||
"uploader": "E.T. ExTerrestrial Music",
|
||||
"title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||
"title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1",
|
||||
"duration": 143,
|
||||
}
|
||||
},
|
||||
# not streamable song
|
||||
@@ -57,6 +58,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'upload_date': '20120521',
|
||||
'duration': 227,
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
@@ -74,6 +76,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
@@ -87,6 +90,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'description': 'Vocals',
|
||||
'uploader': 'Sim Gretina',
|
||||
'upload_date': '20130815',
|
||||
#'duration': 42,
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -119,6 +123,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
formats = []
|
||||
if info.get('downloadable', False):
|
||||
|
81
youtube_dl/extractor/spiegeltv.py
Normal file
81
youtube_dl/extractor/spiegeltv.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SpiegeltvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.spiegel.tv/filme/flug-mh370/',
|
||||
'info_dict': {
|
||||
'id': 'flug-mh370',
|
||||
'ext': 'm4v',
|
||||
'title': 'Flug MH370',
|
||||
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
|
||||
'thumbnail': 're:http://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
|
||||
version_json = self._download_json(
|
||||
'%s/version.json' % apihost, video_id,
|
||||
note='Downloading version information')
|
||||
version_name = version_json['version_name']
|
||||
|
||||
slug_json = self._download_json(
|
||||
'%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
|
||||
video_id,
|
||||
note='Downloading object information')
|
||||
oid = slug_json['object_id']
|
||||
|
||||
media_json = self._download_json(
|
||||
'%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
|
||||
video_id, note='Downloading media information')
|
||||
uuid = media_json['uuid']
|
||||
is_wide = media_json['is_wide']
|
||||
|
||||
server_json = self._download_json(
|
||||
'http://www.spiegel.tv/streaming_servers/', video_id,
|
||||
note='Downloading server information')
|
||||
server = server_json[0]['endpoint']
|
||||
|
||||
thumbnails = []
|
||||
for image in media_json['images']:
|
||||
thumbnails.append({
|
||||
'url': image['url'],
|
||||
'width': image['width'],
|
||||
'height': image['height'],
|
||||
})
|
||||
|
||||
description = media_json['subtitle']
|
||||
duration = media_json['duration_in_ms'] / 1000.
|
||||
|
||||
if is_wide:
|
||||
format = '16x9'
|
||||
else:
|
||||
format = '4x3'
|
||||
|
||||
url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'm4v',
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
79
youtube_dl/extractor/tagesschau.py
Normal file
79
youtube_dl/extractor/tagesschau.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
|
||||
'md5': 'bcdeac2194fb296d599ce7929dfa4009',
|
||||
'info_dict': {
|
||||
'id': '1399128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
|
||||
'description': 'md5:69da3c61275b426426d711bde96463ab',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
|
||||
'md5': '8aaa8bf3ae1ca2652309718c03019128',
|
||||
'info_dict': {
|
||||
'id': '196',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
|
||||
'description': 'md5:f22e4af75821d174fa6c977349682691',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
's': {'width': 256, 'height': 144, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 544, 'quality': 3},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if video_id.startswith('-'):
|
||||
display_id = video_id.strip('-')
|
||||
else:
|
||||
display_id = video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerpage = self._download_webpage(
|
||||
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
|
||||
display_id, 'Downloading player page')
|
||||
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': self._og_search_title(webpage).strip(),
|
||||
'thumbnail': 'http://www.tagesschau.de' + thumbnail,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage).strip(),
|
||||
}
|
95
youtube_dl/extractor/teachertube.py
Normal file
95
youtube_dl/extractor/teachertube.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class TeacherTubeIE(InfoExtractor):
|
||||
IE_NAME = 'teachertube'
|
||||
IE_DESC = 'teachertube.com videos'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
|
||||
'md5': 'f9434ef992fd65936d72999951ee254c',
|
||||
'info_dict': {
|
||||
'id': '339997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Measures of dispersion from a frequency table_x264',
|
||||
'description': 'md5:a3e9853487185e9fcd7181a07164650b',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
|
||||
'md5': '0d625ec6bc9bf50f70170942ad580676',
|
||||
'info_dict': {
|
||||
'id': '340064',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make Paper Dolls _ Paper Art Projects',
|
||||
'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.teachertube.com/music.php?music_id=8805',
|
||||
'md5': '01e8352006c65757caf7b961f6050e21',
|
||||
'info_dict': {
|
||||
'id': '8805',
|
||||
'ext': 'mp3',
|
||||
'title': 'PER ASPERA AD ASTRA',
|
||||
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
quality = qualities(['mp3', 'flv', 'mp4'])
|
||||
|
||||
_, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': media_url,
|
||||
'quality': quality(determine_ext(media_url))
|
||||
} for media_url in set(media_urls)
|
||||
]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
|
||||
class TeacherTubeClassroomIE(InfoExtractor):
|
||||
IE_NAME = 'teachertube:classroom'
|
||||
IE_DESC = 'teachertube.com online classrooms'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user')
|
||||
|
||||
rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
|
||||
user_id, 'Downloading classroom RSS')
|
||||
|
||||
entries = []
|
||||
for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
|
||||
entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
|
||||
|
||||
return self.playlist_result(entries, user_id)
|
33
youtube_dl/extractor/teachingchannel.py
Normal file
33
youtube_dl/extractor/teachingchannel.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TeachingChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -94,8 +95,12 @@ class VeohIE(InfoExtractor):
|
||||
if video_id.startswith('v'):
|
||||
rsp = self._download_xml(
|
||||
r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
|
||||
if rsp.get('stat') == 'ok':
|
||||
stat = rsp.get('stat')
|
||||
if stat == 'ok':
|
||||
return self._extract_video(rsp.find('./videoList/video'))
|
||||
elif stat == 'fail':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = 0
|
||||
|
124
youtube_dl/extractor/vh1.py
Normal file
124
youtube_dl/extractor/vh1.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
|
||||
import re
|
||||
from ..utils import fix_xml_ampersands
|
||||
|
||||
|
||||
class VH1IE(MTVIE):
|
||||
IE_NAME = 'vh1.com'
|
||||
_FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '7827a7505f59633983165bbd2c119b52',
|
||||
'info_dict': {
|
||||
'id': '731565',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 1',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '34fb4b7321c546b54deda2102a61821f',
|
||||
'info_dict': {
|
||||
'id': '731567',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 2',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '813f38dba4c1b8647196135ebbf7e048',
|
||||
'info_dict': {
|
||||
'id': '731568',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 3',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '51adb72439dfaed11c799115d76e497f',
|
||||
'info_dict': {
|
||||
'id': '731569',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 4',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '93d554aaf79320703b73a95288c76a6e',
|
||||
'info_dict': {
|
||||
'id': '731570',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 5',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
}
|
||||
],
|
||||
'skip': 'Blocked outside the US',
|
||||
}, {
|
||||
# Clip
|
||||
'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118',
|
||||
'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844',
|
||||
'info_dict': {
|
||||
'id': '706675',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip',
|
||||
'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.'
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}, {
|
||||
# Short link
|
||||
'url': 'http://www.vh1.com/video/play.jhtml?id=1678353',
|
||||
'md5': '853192b87ad978732b67dd8e549b266a',
|
||||
'info_dict': {
|
||||
'id': '730355',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak',
|
||||
'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.'
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml',
|
||||
'md5': 'b1bcb5b4380c9d7f544065589432dee7',
|
||||
'info_dict': {
|
||||
'id': '900535',
|
||||
'ext': 'mp4',
|
||||
'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"',
|
||||
'description': 'The Heist'
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://www\.vh1\.com/video/
|
||||
(?:
|
||||
.+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml
|
||||
|
|
||||
(?:
|
||||
play.jhtml\?id=|
|
||||
misc/.+?/.+?\.jhtml\#id=
|
||||
)
|
||||
(?P<video_id>[0-9]+)$
|
||||
|
|
||||
[^/]+/(?P<music_id>[0-9]+)/[^/]+?
|
||||
)
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('music_id'):
|
||||
id_field = 'vid'
|
||||
video_id = mobj.group('music_id')
|
||||
else:
|
||||
video_id = mobj.group('playlist_id') or mobj.group('video_id')
|
||||
id_field = 'id'
|
||||
doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id)
|
||||
|
||||
idoc = self._download_xml(
|
||||
doc_url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
69
youtube_dl/extractor/vulture.py
Normal file
69
youtube_dl/extractor/vulture.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VultureIE(InfoExtractor):
|
||||
IE_NAME = 'vulture.com'
|
||||
_VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
|
||||
'md5': '8d997845642a2b5152820f7257871bc8',
|
||||
'info_dict': {
|
||||
'id': '6GHRQL3RV7MSD1H4',
|
||||
'ext': 'mp4',
|
||||
'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
|
||||
'uploader_id': 'Sarah',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'timestamp': 1401288564,
|
||||
'upload_date': '20140528',
|
||||
'description': 'Uplifting and witty, as predicted.',
|
||||
'duration': 1015,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query_string = self._search_regex(
|
||||
r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
|
||||
video_id = self._search_regex(
|
||||
r'content=([^&]+)', query_string, 'video ID')
|
||||
query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
|
||||
|
||||
query_webpage = self._download_webpage(
|
||||
query_url, display_id, note='Downloading query page')
|
||||
params_json = self._search_regex(
|
||||
r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
|
||||
query_webpage,
|
||||
'player params')
|
||||
params = json.loads(params_json)
|
||||
|
||||
upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
|
||||
uploader_id = params.get('user', {}).get('handle')
|
||||
|
||||
media_item = params['media_item']
|
||||
title = os.path.splitext(media_item['title'])[0]
|
||||
duration = int_or_none(media_item.get('duration_seconds'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': media_item['pipeline_xid'],
|
||||
'title': title,
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': params.get('thumbnail_url'),
|
||||
'uploader_id': uploader_id,
|
||||
'description': params.get('description'),
|
||||
'duration': duration,
|
||||
}
|
@@ -5,18 +5,21 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class XVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
|
||||
_TEST = {
|
||||
'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
|
||||
'file': '939581.flv',
|
||||
'md5': '1d0c835822f0a71a7bf011855db929d0',
|
||||
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
|
||||
'info_dict': {
|
||||
"title": "Funny Porns By >>>>S<<<<<< -1",
|
||||
"age_limit": 18,
|
||||
'id': '4588838',
|
||||
'ext': 'flv',
|
||||
'title': 'Biker Takes his Girl',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||
if mobj:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
|
||||
|
||||
# Extract video URL
|
||||
video_url = compat_urllib_parse.unquote(
|
||||
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
|
||||
|
@@ -21,7 +21,7 @@ class YahooIE(InfoExtractor):
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
||||
'info_dict': {
|
||||
'id': '214727115',
|
||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
|
||||
'description': 'Julian and Travis watch Julian Smith',
|
||||
@@ -31,7 +31,7 @@ class YahooIE(InfoExtractor):
|
||||
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
|
||||
'info_dict': {
|
||||
'id': '103000935',
|
||||
'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
|
||||
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||
@@ -58,9 +58,11 @@ class YahooIE(InfoExtractor):
|
||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||
default=None)
|
||||
if items_json is None:
|
||||
long_id = self._search_regex(
|
||||
CONTENT_ID_REGEXES = [
|
||||
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||
webpage, 'content ID')
|
||||
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
|
||||
]
|
||||
long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
|
||||
video_id = long_id
|
||||
else:
|
||||
items = json.loads(items_json)
|
||||
@@ -68,9 +70,9 @@ class YahooIE(InfoExtractor):
|
||||
# The 'meta' field is not always in the video webpage, we request it
|
||||
# from another page
|
||||
long_id = info['id']
|
||||
return self._get_info(long_id, video_id)
|
||||
return self._get_info(long_id, video_id, webpage)
|
||||
|
||||
def _get_info(self, long_id, video_id):
|
||||
def _get_info(self, long_id, video_id, webpage):
|
||||
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
|
||||
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
|
||||
' AND protocol="http"' % long_id)
|
||||
@@ -113,7 +115,7 @@ class YahooIE(InfoExtractor):
|
||||
'title': meta['title'],
|
||||
'formats': formats,
|
||||
'description': clean_html(meta['description']),
|
||||
'thumbnail': meta['thumbnail'],
|
||||
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
@@ -137,7 +139,7 @@ class YahooNewsIE(YahooIE):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id')
|
||||
return self._get_info(long_id, video_id)
|
||||
return self._get_info(long_id, video_id, webpage)
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
|
@@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
|
||||
@@ -1140,7 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
|
||||
r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
|
||||
video_webpage)
|
||||
if mobj is not None:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
@@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
title_span = (search_title('playlist-title') or
|
||||
search_title('title long-title') or search_title('title'))
|
||||
title = clean_html(title_span)
|
||||
video_re = r'''(?x)data-video-username="(.*?)".*?
|
||||
video_re = r'''(?x)data-video-username=".*?".*?
|
||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
|
||||
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
|
||||
# Some of the videos may have been deleted, their username field is empty
|
||||
ids = [video_id for (username, video_id) in matches if username]
|
||||
ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
|
||||
url_results = self._ids_to_results(ids)
|
||||
|
||||
return self.playlist_result(url_results, playlist_id, title)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.06.02'
|
||||
__version__ = '2014.06.09'
|
||||
|
Reference in New Issue
Block a user