Compare commits

..

36 Commits

Author SHA1 Message Date
Philipp Hagemeister
51745be312 release 2014.04.19 2014-04-19 11:55:33 +02:00
Sergey M․
d7f1e7c88f [rutube] Fix extraction 2014-04-19 15:59:12 +07:00
Sergey M․
525dc9809e [noco] Fix test description md5 2014-04-18 21:36:04 +07:00
Sergey M․
1bf3210816 [noco] Add support for noco.tv (Closes #2712) 2014-04-18 21:11:09 +07:00
Sergey M․
e6c6d10d99 [podomatic] Improve video URL extraction (Closes #2763) 2014-04-17 19:59:52 +07:00
Jaime Marquínez Ferrándiz
f270256e06 [tlc] Add an extractor for tlc.com
It uses the same system as discovery.com
2014-04-16 20:29:31 +02:00
Jaime Marquínez Ferrándiz
f401c6f69f [canalplus] Download the video in the test
It doesn't use rtmpdump now.
2014-04-16 15:54:00 +02:00
Sergey M․
b075d25bed [canalplus] Prefer f4m and modernize (Closes #2749) 2014-04-16 20:47:39 +07:00
Jaime Marquínez Ferrándiz
3d1bb6b4dd Add an extractor for tlc.de (fixes #2748) 2014-04-16 15:45:05 +02:00
Philipp Hagemeister
1db2666916 [youtube:playlist] Correct playlist ID output
The ID now starts with PL, so we don't need to output that twice.
2014-04-15 17:55:52 +02:00
Jaime Marquínez Ferrándiz
8f5c0218d8 [fivemin] Get the 'sid' from the embed page (fixes #2745)
It allows to download some videos that failed.
2014-04-15 16:18:37 +02:00
Sergey M․
d7666dff82 [9gag] Fix and improve extraction 2014-04-15 19:49:38 +07:00
Jaime Marquínez Ferrándiz
2d4c98dbd1 [ted] Use the rtmp links if there http downloads are not available. 2014-04-14 15:23:12 +02:00
Sergey M․
fd50bf623c [generic] Modernize tests 2014-04-14 18:56:29 +07:00
Sergey M․
d360a14678 [generic] Update test 2014-04-14 18:51:46 +07:00
Philipp Hagemeister
d0f2ab6969 release 2014.04.13 2014-04-13 03:22:30 +02:00
Philipp Hagemeister
de906ef543 [aol] Add support for playlists (Fixes #2730) 2014-04-13 03:22:24 +02:00
Sergey M․
2fb3deeca1 [tube8] Fix extraction and modernize 2014-04-13 03:56:32 +07:00
Philipp Hagemeister
66398056f1 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-12 17:15:16 +02:00
Jaime Marquínez Ferrándiz
77477fa4c9 Merge branch 'atomicparsley' (closes #2436) 2014-04-12 15:52:42 +02:00
Jaime Marquínez Ferrándiz
a169e18ce1 [atomicparsley] Remove unneeded __init__ method 2014-04-12 15:51:40 +02:00
Jaime Marquínez Ferrándiz
381640e3ac [brightcove] Only use url from meta element if it has the 'playerKey' field (fixes #2738) 2014-04-12 12:53:48 +02:00
Sergey M․
37e3410137 [prosiebensat1] Add one more clip id pattern (Closes #2737) 2014-04-12 02:53:55 +07:00
Jaime Marquínez Ferrándiz
97b5196960 [weibo] Modernize 2014-04-11 16:02:34 +02:00
Sergey M․
6a4f3528c8 [firstpost] Fix extraction 2014-04-11 20:40:42 +07:00
Philipp Hagemeister
b9c76aa1a9 [youtube] Add support for cleanvideosearch.com (Fixes #2734) 2014-04-11 13:53:05 +02:00
Philipp Hagemeister
0d3070d364 release 2014.04.11.2 2014-04-11 09:44:33 +02:00
Philipp Hagemeister
7753cadbfa [comedycentral:shows] Add support for TDS special editions (Fixes #2733) 2014-04-11 09:30:07 +02:00
Philipp Hagemeister
3950450342 [pyvideo] Fix title 2014-04-11 02:20:50 +02:00
Philipp Hagemeister
c82b1fdad6 [slideshare] Fix description 2014-04-11 02:19:15 +02:00
Philipp Hagemeister
b0fb63abe8 [dailymotion:playlist] Fix title 2014-04-11 02:16:46 +02:00
Philipp Hagemeister
3ab34c603e [comedycentral] Fix test md5sum 2014-04-11 02:14:31 +02:00
pulpe
784763c565 we don't need to run ffmpeg more times 2014-03-26 15:22:52 +01:00
pulpe
39c68260c0 fix ffmpeg metadatapp 2014-03-26 15:22:52 +01:00
pulpe
149254d0d5 fix ffmpeg error, if youtube-dl runs more than once with --embed-thumbnail with same video 2014-03-26 15:22:52 +01:00
pulpe
0c14e2fbe3 add post processor 2014-03-26 15:22:51 +01:00
29 changed files with 578 additions and 162 deletions

View File

@@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
default
--embed-subs embed subtitles in the video (only for mp4
videos)
--embed-thumbnail embed thumbnail in the audio as cover art
--add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards)

View File

@@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@@ -165,6 +166,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
['ComedyCentralShows'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
@@ -172,6 +176,5 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
if __name__ == '__main__':
unittest.main()

View File

@@ -43,6 +43,7 @@ from youtube_dl.extractor import (
XTubeUserIE,
InstagramUserIE,
CSpanIE,
AolIE,
)
@@ -327,6 +328,16 @@ class TestPlaylists(unittest.TestCase):
whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855)
def test_aol_playlist(self):
dl = FakeYDL()
ie = AolIE(dl)
result = ie.extract(
'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '152147')
self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10)
if __name__ == '__main__':
unittest.main()

View File

@@ -92,6 +92,8 @@ from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL
from .postprocessor import (
AtomicParsleyPP,
FFmpegAudioFixPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
@@ -503,6 +505,8 @@ def parseOpts(overrideArguments=None):
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
help='embed thumbnail in the audio as cover art')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='write metadata to the video file')
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@@ -808,6 +812,10 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
if opts.embedthumbnail:
if not opts.addmetadata:
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
# Update version
if opts.update_self:

View File

@@ -181,6 +181,7 @@ from .nfb import NFBIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
from .novamov import NovaMovIE
from .nowness import NownessIE
@@ -251,6 +252,7 @@ from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE

View File

@@ -8,7 +8,18 @@ from .fivemin import FiveMinIE
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
_VALID_URL = r'''(?x)
(?:
aol-video:|
http://on\.aol\.com/
(?:
video/.*-|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
)
)
(?P<id>[0-9]+)
(?:$|\?)
'''
_TEST = {
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen('Downloading 5min.com video %s' % video_id)
playlist_id = mobj.group('playlist_id')
if playlist_id and not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
playlist_html = self._search_regex(
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
'playlist HTML')
entries = [{
'_type': 'url',
'url': 'aol-video:%s' % m.group('id'),
'ie_key': 'Aol',
} for m in re.finditer(
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
playlist_html)]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': mobj.group('playlist_display_id'),
'title': title,
'entries': entries,
}
return FiveMinIE._build_result(video_id)

View File

@@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
return [unescapeHTML(url_m.group(1))]
url = unescapeHTML(url_m.group(1))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url:
return [url]
matches = re.findall(
r'''(?sx)<object

View File

@@ -1,4 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -8,46 +10,56 @@ from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr'
IE_NAME = 'canalplus.fr'
_TEST = {
u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
u'file': u'922470.flv',
u'info_dict': {
u'title': u'Zapping - 26/08/13',
u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
u'upload_date': u'20130826',
},
u'params': {
u'skip_download': True,
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
'md5': '60c29434a416a83c15dae2587d47027d',
'info_dict': {
'id': '922470',
'ext': 'flv',
'title': 'Zapping - 26/08/13',
'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
'upload_date': '20130826',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.groupdict().get('id')
video_id = mobj.group('id')
if video_id is None:
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
doc = self._download_xml(info_url,video_id,
u'Downloading video info')
doc = self._download_xml(info_url, video_id, 'Downloading video XML')
self.report_extraction(video_id)
video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS')
media = video_info.find('MEDIA')
formats = [media.find('VIDEOS/%s' % format)
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
video_url = [format.text for format in formats if format is not None][-1]
infos = video_info.find('INFOS')
return {'id': video_id,
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
infos.find('TITRAGE/SOUS_TITRE').text),
'url': video_url,
'ext': 'flv',
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
'thumbnail': media.find('IMAGES/GRAND').text,
'description': infos.find('DESCRIPTION').text,
'view_count': int(infos.find('NB_VUES').text),
}
preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']
formats = [
{
'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text,
'format_id': fmt.tag,
'ext': 'mp4' if fmt.tag == 'HLS' else 'flv',
'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1,
} for fmt in media.find('VIDEOS') if fmt.text
]
self._sort_formats(formats)
return {
'id': video_id,
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
infos.find('TITRAGE/SOUS_TITRE').text),
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
'thumbnail': media.find('IMAGES/GRAND').text,
'description': infos.find('DESCRIPTION').text,
'view_count': int(infos.find('NB_VUES').text),
'like_count': int(infos.find('NB_LIKES').text),
'comment_count': int(infos.find('NB_COMMENTS').text),
'formats': formats,
}

View File

@@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
_TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': '4167875aae411f903b751a21f357f1ee',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'ext': 'mp4',
@@ -43,7 +43,7 @@ class ComedyCentralShowsIE(InfoExtractor):
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip>
(?:(?:guests/[^/]+|videos|video-playlists)/[^/]+/(?P<videotitle>[^/?#]+))
(?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)|

View File

@@ -201,11 +201,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': self._extract_entries(playlist_id),
}
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):

View File

@@ -6,7 +6,6 @@ from .common import InfoExtractor
class FirstpostIE(InfoExtractor):
IE_NAME = 'Firstpost.com'
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = {
@@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
}
}
@@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
webpage, 'video URL')
data = self._download_xml(
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
'Downloading video XML')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
title = item.find('./title').text
formats = [
{
'url': details.find('./file').text,
'format_id': details.find('./label').text.strip(),
'width': int(details.find('./width').text.strip()),
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
)
@@ -16,16 +17,28 @@ class FiveMinIE(InfoExtractor):
(?P<id>\d+)
'''
_TEST = {
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
'md5': '4f7b0b79bf1a470e5004f7112385941d',
'info_dict': {
'id': '518013791',
'ext': 'mp4',
'title': 'iPad Mini with Retina Display Review',
_TESTS = [
{
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
'md5': '4f7b0b79bf1a470e5004f7112385941d',
'info_dict': {
'id': '518013791',
'ext': 'mp4',
'title': 'iPad Mini with Retina Display Review',
},
},
}
{
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
'url': '5min:518086247',
'md5': 'e539a9dd682c288ef5a498898009f69e',
'info_dict': {
'id': '518086247',
'ext': 'mp4',
'title': 'How to Make a Next-Level Fruit Salad',
},
},
]
@classmethod
def _build_result(cls, video_id):
@@ -34,9 +47,19 @@ class FiveMinIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page')
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
query = compat_urllib_parse.urlencode({
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
})
info = self._download_json(
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
'playlist=%s&url=https' % video_id,
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
video_id)['binding'][0]
second_id = compat_str(int(video_id[:-2]) + 1)

View File

@@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
'file': '13601338388002.mp4',
'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
'info_dict': {
'id': '13601338388002',
'ext': 'mp4',
'uploader': 'www.hodiho.fr',
'title': 'R\u00e9gis plante sa Jeep',
}
@@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
{
'add_ie': ['Bandcamp'],
'url': 'http://bronyrock.com/track/the-pony-mash',
'file': '3235767654.mp3',
'info_dict': {
'id': '3235767654',
'ext': 'mp3',
'title': 'The Pony Mash',
'uploader': 'M_Pallante',
},
@@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
{
# https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'file': '3101154703001.mp4',
'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': {
'id': '3101154703001',
'ext': 'mp4',
'title': 'Still no power',
'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',

View File

@@ -1,8 +1,10 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import str_to_int
class NineGagIE(InfoExtractor):
@@ -44,23 +46,14 @@ class NineGagIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
youtube_id = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
webpage, 'video ID')
title = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
webpage, 'title', default=None)
if not title:
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
'description', fatal=False)
view_count_str = self._html_search_regex(
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
fatal=False)
view_count = (
None if view_count_str is None
else int(view_count_str.replace(',', '')))
post_view = json.loads(self._html_search_regex(
r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
youtube_id = post_view['videoExternalId']
title = post_view['title']
description = post_view['description']
view_count = str_to_int(post_view['externalView'])
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
return {
'_type': 'url_transparent',
@@ -71,5 +64,5 @@ class NineGagIE(InfoExtractor):
'title': title,
'description': description,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
'thumbnail': thumbnail,
}

View File

@@ -0,0 +1,105 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
compat_str,
)
class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_TEST = {
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
'md5': '0a993f0058ddbcd902630b2047ef710e',
'info_dict': {
'id': '11538',
'ext': 'mp4',
'title': 'Ami Ami Idol - Hello! France',
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
'upload_date': '20140412',
'uploader': 'Nolife',
'uploader_id': 'NOL',
'duration': 2851.2,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
medias = self._download_json(
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
formats = []
for fmt in medias['fr']['video_list']['default']['quality_list']:
format_id = fmt['quality_key']
file = self._download_json(
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
video_id, 'Downloading %s video JSON' % format_id)
file_url = file['file']
if not file_url:
continue
if file_url == 'forbidden':
raise ExtractorError(
'%s returned error: %s - %s' % (
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
expected=True)
formats.append({
'url': file_url,
'format_id': format_id,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': fmt['quality_name'],
'preference': fmt['priority'],
})
self._sort_formats(formats)
show = self._download_json(
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
upload_date = unified_strdate(show['indexed'])
uploader = show['partner_name']
uploader_id = show['partner_key']
duration = show['duration_ms'] / 1000.0
thumbnail = show['screenshot']
episode = show.get('show_TT') or show.get('show_OT')
family = show.get('family_TT') or show.get('family_OT')
episode_number = show.get('episode_number')
title = ''
if family:
title += family
if episode_number:
title += ' #' + compat_str(episode_number)
if episode:
title += ' - ' + episode
description = show.get('show_resume') or show.get('family_resume')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'formats': formats,
}

View File

@@ -6,22 +6,36 @@ import re
from .common import InfoExtractor
from ..utils import int_or_none
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
"file": "2009-01-02T16_03_35-08_00.mp3",
"md5": "84bb855fcf3429e6bf72460e1eed782d",
"info_dict": {
"uploader": "Science Teaching Tips",
"uploader_id": "scienceteachingtips",
"title": "64. When the Moon Hits Your Eye",
"duration": 446,
}
}
_TESTS = [
{
'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
'md5': '84bb855fcf3429e6bf72460e1eed782d',
'info_dict': {
'id': '2009-01-02T16_03_35-08_00',
'ext': 'mp3',
'uploader': 'Science Teaching Tips',
'uploader_id': 'scienceteachingtips',
'title': '64. When the Moon Hits Your Eye',
'duration': 446,
}
},
{
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
'md5': 'd2cf443931b6148e27638650e2638297',
'info_dict': {
'id': '2013-11-15T16_31_21-08_00',
'ext': 'mp3',
'uploader': 'Ostbahnhof / Techno Mix',
'uploader_id': 'ostbahnhof',
'title': 'Einunddreizig',
'duration': 3799,
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor):
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
json_url, video_id, 'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
if not video_url:
video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']

View File

@@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clipId=(\d+)',
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',

View File

@@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex(
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
webpage, 'video url', flags=re.DOTALL)

View File

@@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON')
trackinfo = self._download_json(
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
video_id, 'Downloading trackinfo JSON')
# Some videos don't have the author field
author = trackinfo.get('author') or {}
m3u8_url = trackinfo['video_balancer'].get('m3u8')
author = video.get('author') or {}
options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' %video_id,
video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url')

View File

@@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return {
'_type': 'video',

View File

@@ -49,6 +49,19 @@ class TEDIE(SubtitlesInfoExtractor):
'thumbnail': 're:^https?://.+\.jpg',
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
}
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'info_dict': {
'id': '1972',
'ext': 'flv',
'title': 'Be passionate. Be courageous. Be your best.',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
},
'params': {
# rtmp download
'skip_download': True,
},
}]
_NATIVE_FORMATS = {
@@ -102,11 +115,23 @@ class TEDIE(SubtitlesInfoExtractor):
'url': format_url,
'format_id': format_id,
'format': format_id,
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
if formats:
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
else:
# Use rtmp downloads
formats = [{
'format_id': f['name'],
'url': talk_info['streamer'],
'play_path': f['file'],
'ext': 'flv',
'width': f['width'],
'height': f['height'],
'tbr': f['bitrate'],
} for f in talk_info['resources']['rtmp']]
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])

View File

@@ -0,0 +1,60 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE
class TlcIE(DiscoveryIE):
IE_NAME = 'tlc.com'
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = {
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
'info_dict': {
'id': '853232',
'ext': 'mp4',
'title': 'Cake Boss: Too Big to Fly',
'description': 'Buddy has taken on a high flying task.',
'duration': 119,
},
}
class TlcDeIE(InfoExtractor):
IE_NAME = 'tlc.de'
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
_TEST = {
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
'info_dict': {
'id': '3235167922001',
'ext': 'mp4',
'title': 'Breaking Amish: Die Welt da draußen',
'uploader': 'Discovery Networks - Germany',
'description': 'Vier Amische und eine Mennonitin wagen in New York'
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
' ihrem spannenden Weg.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
iframe_url = self._search_regex(
'<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
'iframe url')
# Otherwise we don't get the correct 'BrightcoveExperience' element,
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
iframe_url = iframe_url.replace('.htm?', '.php?')
iframe = self._download_webpage(iframe_url, title)
return {
'_type': 'url',
'url': BrightcoveIE._extract_brightcove_url(iframe),
'ie': BrightcoveIE.ie_key(),
}

View File

@@ -1,63 +1,83 @@
import os
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
int_or_none,
str_to_int,
)
from ..aes import (
aes_decrypt_text
)
from ..aes import aes_decrypt_text
class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4',
u'md5': u'e9e0b0c86734e5e3766e653509475db0',
u'info_dict': {
u"description": u"hot teen Kasia grinding",
u"uploader": u"unknown",
u"title": u"Kasia music video",
u"age_limit": 18,
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
'file': '229795.mp4',
'md5': 'e9e0b0c86734e5e3766e653509475db0',
'info_dict': {
'description': 'hot teen Kasia grinding',
'uploader': 'unknown',
'title': 'Kasia music video',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
video_id = mobj.group('id')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title')
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
if thumbnail:
thumbnail = thumbnail.replace('\\/', '/')
flashvars = json.loads(self._html_search_regex(
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
if webpage.find('"encrypted":true')!=-1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
video_url = flashvars['video_url']
if flashvars.get('encrypted') is True:
video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
format_id = '-'.join(path.split('/')[4].split('_')[:2])
thumbnail = flashvars.get('image_url')
title = self._html_search_regex(
r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
webpage, 'uploader', fatal=False)
like_count = int_or_none(self._html_search_regex(
r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._html_search_regex(
r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
view_count = self._html_search_regex(
r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = self._html_search_regex(
r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
if comment_count:
comment_count = str_to_int(comment_count)
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'format_id': format_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18,
}

View File

@@ -1,10 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class WeiboIE(InfoExtractor):
"""
The videos in Weibo come from different sites, this IE just finds the link
@@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = {
u'add_ie': ['Sina'],
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'file': u'98322879.flv',
u'info_dict': {
u'title': u'魔声耳机最新广告“All Eyes On Us”',
'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
'info_dict': {
'id': '98322879',
'ext': 'flv',
'title': '魔声耳机最新广告“All Eyes On Us”',
},
u'note': u'Sina video',
u'params': {
u'skip_download': True,
'params': {
'skip_download': True,
},
'add_ie': ['Sina'],
}
# Additional example videos from different sites
@@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id')
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
info_page = self._download_webpage(info_url, video_id)
info = json.loads(info_page)
info = self._download_json(info_url, video_id)
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
#Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
# Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1]
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
player_url)
if m_sina is not None:
self.to_screen('Sina video detected')
sina_id = m_sina.group(1)
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
return self.url_result(player_url)

View File

@@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
)
))
|youtu\.be/ # just youtu.be/xxxx
|https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
@@ -1418,7 +1419,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result(video_id, 'Youtube', video_id=video_id)
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if playlist_id.startswith('RD'):
# Mixes require a custom extraction process

View File

@@ -1,5 +1,7 @@
from .atomicparsley import AtomicParsleyPP
from .ffmpeg import (
FFmpegAudioFixPP,
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
@@ -9,6 +11,8 @@ from .ffmpeg import (
from .xattrpp import XAttrMetadataPP
__all__ = [
'AtomicParsleyPP',
'FFmpegAudioFixPP',
'FFmpegMergerPP',
'FFmpegMetadataPP',
'FFmpegVideoConvertor',

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import subprocess
from .common import PostProcessor
from ..utils import (
check_executable,
compat_urlretrieve,
encodeFilename,
PostProcessingError,
prepend_extension,
shell_quote
)
class AtomicParsleyPPError(PostProcessingError):
pass
class AtomicParsleyPP(PostProcessor):
def run(self, info):
if not check_executable('AtomicParsley', ['-v']):
raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
temp_thumbnail = prepend_extension(filename, 'thumb')
if not info.get('thumbnail'):
raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
raise AtomicParsleyPPError(msg)
os.remove(encodeFilename(filename))
os.remove(encodeFilename(temp_thumbnail))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -464,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy']
if info['ext'] == u'm4a':
options = ['-vn', '-acodec', 'copy']
else:
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
@@ -483,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
class FFmpegAudioFixPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-vn', '-acodec', 'copy']
self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -1,2 +1,2 @@
__version__ = '2014.04.11.1'
__version__ = '2014.04.19'