Compare commits

..

31 Commits

Author SHA1 Message Date
Philipp Hagemeister
7575d52a73 release 2014.06.24 2014-06-24 08:59:40 +02:00
Sergey M․
9a2dc4f7ac [teachertube] Fix extraction 2014-06-23 03:07:10 +07:00
Jaime Marquínez Ferrándiz
c5cd249e41 [generic] Extract mtvservices embedded videos 2014-06-22 21:39:36 +02:00
Jaime Marquínez Ferrándiz
8940c1c058 [mtv] Add an extractor for the mtvservices embedded player (closes #2995) 2014-06-22 21:39:27 +02:00
Petr Půlpán
27ec04b232 [BR] replace test 2014-06-22 17:33:27 +02:00
Sergey M․
d2824416aa [firstpost] Fix title extraction and add description 2014-06-22 01:20:40 +07:00
Petr Půlpán
18061bbab0 [Youtube] add DASH format 272 (fixes #3128) 2014-06-21 12:03:27 +02:00
Sergey M․
4ecbbcbcea Merge branch 'eliasp-spiegel' 2014-06-21 16:32:01 +07:00
Sergey M․
55c97a03e1 [spiegel] Add description and modernize 2014-06-21 16:31:18 +07:00
Elias Probst
98aeac6ea9 Use the 'base_url' for building the resulting 'url' as well. 2014-06-21 01:10:10 +02:00
Elias Probst
8bfb6723cb Extract the base_url for the XML download from the JS snippet's 'server' variable. 2014-06-21 01:00:48 +02:00
Elias Probst
a20575e8ae Make debug message useful and also report, which URL failed to download. 2014-06-21 00:35:12 +02:00
Sergey M․
7724572519 [noco] Switch to HTTPS (Closes #3116) 2014-06-20 18:40:47 +07:00
Philipp Hagemeister
d763637f6a release 2014.06.19 2014-06-19 17:13:50 +02:00
Jaime Marquínez Ferrándiz
c26e9ac4b2 [youtube] Recognize signature functions that contain '$' (fixes #3104) 2014-06-19 16:42:49 +02:00
Petr Půlpán
896bf55352 [LifeNews] update thumbnail in test 2014-06-19 16:34:48 +02:00
Petr Půlpán
a23ba9b53c [Steam] update description in test 2014-06-19 16:32:11 +02:00
Sergey M․
38a9339baf [prosiebensat1] Update some regexes 2014-06-19 19:51:49 +07:00
Sergey M․
def8b4039f [bilibili] Fix extraction 2014-06-18 18:53:25 +07:00
Petr Půlpán
a14e1538fe [ustream:channel] replace test for an updated channel 2014-06-17 16:03:03 +02:00
Petr Půlpán
5f28a1acad [GorillaVid] improve extractor 2014-06-17 15:18:46 +02:00
pulpe
25e9953c6f Merge pull request #3059 from marcwebbie/gorillavid
* marcwebbie/gorillavid:
  Changed video url to a public video
  [GorillaVid] Added GorillaVid extractor
2014-06-17 15:14:18 +02:00
Petr Půlpán
f9df094ca5 Merge pull request #3089 from pulpe/ard_fix
[ARDIE] fix formats extraction (fixes #3087)
2014-06-17 14:53:51 +02:00
Sergey M.
b60a469023 Merge pull request #3090 from Kagee/patch-1
tv.nrk.no urls mostly contain capital characters
2014-06-17 02:21:10 +07:00
Anders Einar Hilden
7012631257 Fix test
Didn't use .lower() as planned, so update test with new ID.
2014-06-16 19:37:59 +02:00
Anders Einar Hilden
e6c9f80c48 tv.nrk.no urls mostly contain capital characters
Updated regexp and one of the test cases to reflect this.
tv.nrksuper.no mostly uses lowercase, so that is still there.
2014-06-16 19:29:23 +02:00
pulpe
895ce482b1 [ARDIE] adjustments suggested by @jaimeMF 2014-06-16 18:15:41 +02:00
pulpe
e5da4021eb [ARDIE] fix formats extraction (fixes #3087) 2014-06-16 16:17:49 +02:00
Sergey M․
2371053565 [rai] Skip test 2014-06-16 18:50:15 +07:00
marcwebbie
77abae55df Changed video url to a public video 2014-06-08 03:13:45 -03:00
marcwebbie
617c0b2239 [GorillaVid] Added GorillaVid extractor 2014-06-07 23:09:45 -03:00
19 changed files with 229 additions and 51 deletions

View File

@@ -114,10 +114,10 @@ class TestPlaylists(unittest.TestCase):
def test_ustream_channel(self):
dl = FakeYDL()
ie = UstreamChannelIE(dl)
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
result = ie.extract('http://www.ustream.tv/channel/channeljapan')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '5124905')
self.assertTrue(len(result['entries']) >= 6)
self.assertEqual(result['id'], '10874166')
self.assertTrue(len(result['entries']) >= 54)
def test_soundcloud_set(self):
dl = FakeYDL()

View File

@@ -109,6 +109,7 @@ from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
@@ -169,6 +170,7 @@ from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
MTVIggyIE,
)
from .musicplayon import MusicPlayOnIE

View File

@@ -56,7 +56,18 @@ class ARDIE(InfoExtractor):
raise ExtractorError('This video is only available after 20:00')
formats = []
for s in streams:
if type(s['_stream']) == list:
for index, url in enumerate(s['_stream'][::-1]):
quality = s['_quality'] + index
formats.append({
'quality': quality,
'url': url,
'format_id': '%s-%s' % (determine_ext(url), quality)
})
continue
format = {
'quality': s['_quality'],
'url': s['_stream'],

View File

@@ -13,7 +13,7 @@ from ..utils import (
class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
_TEST = {
'url': 'http://www.bilibili.tv/video/av1074402/',
@@ -56,7 +56,7 @@ class BiliBiliIE(InfoExtractor):
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
player_params = compat_parse_qs(self._html_search_regex(
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
webpage, 'player params'))
if 'cid' in player_params:

View File

@@ -17,15 +17,13 @@ class BRIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
'md5': '93556dd2bcb2948d9259f8670c516d59',
'info_dict': {
'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
'ext': 'mp4',
'title': 'Feiern und Verzichten',
'description': 'Anselm Grün: Feiern und Verzichten',
'uploader': 'BR/Birgit Baier',
'upload_date': '20140301',
'title': 'Am 1. und 2. August in Oberammergau',
'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
}
},
{

View File

@@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor):
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
}
}
@@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id)
title = self._html_search_meta('twitter:title', page, 'title')
description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml(
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
'Downloading video XML')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
title = item.find('./title').text
formats = [
{
@@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -278,6 +278,17 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
# MTVSercices embed
{
'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
'md5': '35727f82f58c76d996fc188f9755b0d5',
'info_dict': {
'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
'ext': 'mp4',
'title': 'Review',
'description': 'Mario\'s life in the fast lane has never looked so good.',
},
},
]
def report_download_webpage(self, video_id):
@@ -676,6 +687,14 @@ class GenericIE(InfoExtractor):
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='Vulture')
# Look for embedded mtvservices player
mobj = re.search(
r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
webpage)
if mobj is not None:
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='MTVServicesEmbedded')
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:

View File

@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
compat_urllib_parse,
compat_urllib_request,
)
class GorillaVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?'
_TESTS = [{
'url': 'http://gorillavid.in/06y9juieqpmi',
'md5': '5ae4a3580620380619678ee4875893ba',
'info_dict': {
'id': '06y9juieqpmi',
'ext': 'flv',
'title': 'Rebecca Black My Moment Official Music Video Reaction',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
'md5': 'c9e293ca74d46cad638e199c3f3fe604',
'info_dict': {
'id': 'z08zf8le23c6',
'ext': 'mp4',
'title': 'Say something nice',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://gorillavid.in/%s' % video_id
webpage = self._download_webpage(url, video_id)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
if fields['op'] == 'download1':
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id, 'Downloading video page')
title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title')
thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail')
url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url')
formats = [{
'format_id': 'sd',
'url': url,
'ext': determine_ext(url),
'quality': 1,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor):
'ext': 'mp4',
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
'thumbnail': 're:http://.*\.jpg',
'upload_date': '20140130',
}
}

View File

@@ -22,6 +22,7 @@ def _media_xml_tag(tag):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
def _get_feed_url(self, uri):
return self._FEED_URL
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
@@ -136,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
idoc = self._download_xml(
self._FEED_URL + '?' + data, video_id,
feed_url + '?' + data, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
@@ -160,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor):
return self._get_videos_info(mgid)
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvservices:embedded'
_VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
_TEST = {
# From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
'info_dict': {
'id': '1043906',
'ext': 'mp4',
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
},
}
def _get_feed_url(self, uri):
video_id = self._id_from_uri(uri)
site_id = uri.replace(video_id, '')
config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id)
config_doc = self._download_xml(config_url, video_id)
feed_node = config_doc.find('.//feed')
feed_url = feed_node.text.strip().split('?')[0]
return feed_url
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
mgid = mobj.group('mgid')
return self._get_videos_info(mgid)
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)^https?://
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|

View File

@@ -35,7 +35,7 @@ class NocoIE(InfoExtractor):
video_id = mobj.group('id')
medias = self._download_json(
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
formats = []
@@ -43,7 +43,7 @@ class NocoIE(InfoExtractor):
format_id = fmt['quality_key']
file = self._download_json(
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
video_id, 'Downloading %s video JSON' % format_id)
file_url = file['file']
@@ -71,7 +71,7 @@ class NocoIE(InfoExtractor):
self._sort_formats(formats)
show = self._download_json(
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
upload_date = unified_strdate(show['indexed'])
uploader = show['partner_name']

View File

@@ -72,14 +72,14 @@ class NRKIE(InfoExtractor):
class NRKTVIE(InfoExtractor):
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
_TESTS = [
{
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014',
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
'info_dict': {
'id': 'muhh48000314',
'id': 'MUHH48000314',
'ext': 'flv',
'title': '20 spørsmål',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
@@ -141,4 +141,4 @@ class NRKTVIE(InfoExtractor):
'upload_date': upload_date,
'duration': duration,
'formats': formats,
}
}

View File

@@ -158,19 +158,19 @@ class ProSiebenSat1IE(InfoExtractor):
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clipId=(\d+)',
r'clip[iI]d=(\d+)',
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
r'<!-- start video -->\s*<h1>(.+?)</h1>',
r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
r'<h1 class="att-name">\s*(.+?)</h1>',
]
_DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>',
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
r'<p class="att-description">\s*(.+?)\s*</p>',
]
_UPLOAD_DATE_REGEXES = [
r'<meta property="og:published_time" content="(.+?)">',

View File

@@ -46,7 +46,8 @@ class RaiIE(SubtitlesInfoExtractor):
'title': 'State of the Net, Antonella La Carpia: regole virali',
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
'upload_date': '20140613',
}
},
'skip': 'Error 404',
},
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
@@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'file': '1259285.mp4',
'md5': '2c2754212136f35fb4b19767d242f66e',
'info_dict': {
'id': '1259285',
'ext': 'mp4',
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49,
},
},
{
}, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
'file': '1309159.mp4',
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
'info_dict': {
'id': '1309159',
'ext': 'mp4',
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
},
}, {
'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
'info_dict': {
'id': '1502367',
'ext': 'mp4',
'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
'duration': 42,
},
}]
@@ -30,18 +46,20 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
title = self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, 'title')
description = self._html_search_meta('description', webpage, 'description')
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
idoc = self._download_xml(
xml_url, video_id,
note='Downloading XML', errnote='Failed to download XML')
base_url = self._search_regex(
r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
xml_url = base_url + video_id + '.xml'
idoc = self._download_xml(xml_url, video_id)
formats = [
{
'format_id': n.tag.rpartition('type')[2],
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'url': base_url + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
@@ -59,7 +77,8 @@ class SpiegelIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
'title': title,
'description': description,
'duration': duration,
'formats': formats,
}

View File

@@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer',
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
'description': 'md5:6df4fe8dd494ae811869672b0767e025',
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}

View File

@@ -22,8 +22,8 @@ class TeacherTubeIE(InfoExtractor):
'info_dict': {
'id': '339997',
'ext': 'mp4',
'title': 'Measures of dispersion from a frequency table_x264',
'description': 'md5:a3e9853487185e9fcd7181a07164650b',
'title': 'Measures of dispersion from a frequency table',
'description': 'Measures of dispersion from a frequency table',
'thumbnail': 're:http://.*\.jpg',
},
}, {
@@ -33,7 +33,7 @@ class TeacherTubeIE(InfoExtractor):
'id': '340064',
'ext': 'mp4',
'title': 'How to Make Paper Dolls _ Paper Art Projects',
'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
'description': 'Learn how to make paper dolls in this simple',
'thumbnail': 're:http://.*\.jpg',
},
}, {
@@ -43,7 +43,7 @@ class TeacherTubeIE(InfoExtractor):
'id': '8805',
'ext': 'mp3',
'title': 'PER ASPERA AD ASTRA',
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA',
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
},
}]
@@ -53,9 +53,19 @@ class TeacherTubeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('title', webpage, 'title')
TITLE_SUFFIX = ' - TeacherTube'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)].strip()
description = self._html_search_meta('description', webpage, 'description')
if description:
description = description.strip()
quality = qualities(['mp3', 'flv', 'mp4'])
_, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
formats = [
{
@@ -68,10 +78,10 @@ class TeacherTubeIE(InfoExtractor):
return {
'id': video_id,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'title': title,
'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'),
'formats': formats,
'description': self._og_search_description(webpage),
'description': description,
}
@@ -85,8 +95,9 @@ class TeacherTubeClassroomIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
user_id, 'Downloading classroom RSS')
rss = self._download_xml(
'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
user_id, 'Downloading classroom RSS')
entries = []
for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):

View File

@@ -224,6 +224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -440,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode,
r'signature=([$a-zA-Z]+)', jscode,
u'Initial JS player signature function name')
jsi = JSInterpreter(jscode)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.06.16'
__version__ = '2014.06.24'