Compare commits

..

17 Commits

Author SHA1 Message Date
Philipp Hagemeister
8fe56478f8 release 2013.12.16.5 2013-12-16 21:34:47 +01:00
Philipp Hagemeister
0e2a436dce [radiofrance] Add support (Fixes #1942) 2013-12-16 21:34:41 +01:00
Philipp Hagemeister
24050dd11c release 2013.12.16.4 2013-12-16 21:10:18 +01:00
Philipp Hagemeister
8c8e3eec79 [facebook] Recognize #! URLs (Fixes #1988) 2013-12-16 21:10:06 +01:00
Philipp Hagemeister
7ebc9dee69 Merge pull request #1987 from rzhxeo/blip
[GenericIE] Add support for embedded blip.tv
2013-12-16 11:28:34 -08:00
rzhxeo
ee3e63e477 [GenericIE] Add support for embedded blip.tv 2013-12-16 20:08:23 +01:00
Philipp Hagemeister
e9c424c144 Merge pull request #1984 from alimirjamali/patch-1
Incorrect variable is used to check whether thumbnail exists
2013-12-16 09:04:36 -08:00
alimirjamali
0a9ce268ba Incorrect variable is used to check whether thumbnail exists
Dear @phihag

I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn'

Kindly advise

Mit freundlichen Gruessen
Ali
2013-12-16 20:14:28 +03:30
Philipp Hagemeister
4b2da48ea7 release 2013.12.16.3 2013-12-16 14:44:29 +01:00
Philipp Hagemeister
e64eaaa97d Fix execution under Python 3 2013-12-16 14:44:17 +01:00
Philipp Hagemeister
780603027f [videopremium] Skip test 2013-12-16 14:42:07 +01:00
Philipp Hagemeister
00902cd601 release 2013.12.16.2 2013-12-16 14:13:51 +01:00
Philipp Hagemeister
d67b0b1596 Reorder info_dict documentation 2013-12-16 14:13:40 +01:00
Philipp Hagemeister
d7dda16888 [blinkx] Add extractor (Fixes #1972) 2013-12-16 13:56:30 +01:00
Philipp Hagemeister
a19fd00cc4 Simplify --playlist-start / --playlist-end interface 2013-12-16 13:16:20 +01:00
Philipp Hagemeister
d66152a898 [ndtv] Remove unused imports 2013-12-16 08:16:38 +01:00
Philipp Hagemeister
8c5f0c9fbc [mdr] Clean up 2013-12-16 08:16:11 +01:00
14 changed files with 224 additions and 57 deletions

View File

@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
JustinTVIE,
YoutubeIE,
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
def test_no_duplicates(self):
ies = gen_extractors()
for tc in get_testcases():
url = tc['url']
for ie in ies:
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))

View File

@@ -557,16 +557,16 @@ class YoutubeDL(object):
n_all_entries = len(ie_result['entries'])
playliststart = self.params.get('playliststart', 1) - 1
playlistend = self.params.get('playlistend', -1)
playlistend = self.params.get('playlistend', None)
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
entries = ie_result['entries'][playliststart:]
else:
entries = ie_result['entries'][playliststart:playlistend]
playlistend = None
entries = ie_result['entries'][playliststart:playlistend]
n_entries = len(entries)
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
self.to_screen(
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
for i, entry in enumerate(entries, 1):
@@ -845,7 +845,7 @@ class YoutubeDL(object):
if info_dict.get('thumbnail') is not None:
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen(u'[%s] %s: Thumbnail is already present' %
(info_dict['extractor'], info_dict['id']))
else:

View File

@@ -198,10 +198,14 @@ def parseOpts(overrideArguments=None):
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
selection.add_option('--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
selection.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
help='playlist video to start at (default is %default)')
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='playlist video to end at (default is last)')
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER',
@@ -576,18 +580,10 @@ def _real_main(argv=None):
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
try:
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError(u'Playlist end must be greater than playlist start')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error(u'invalid audio format specified')

View File

@@ -13,6 +13,7 @@ from .arte import (
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .breakcom import BreakIE
@@ -118,6 +119,7 @@ from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE

View File

@@ -0,0 +1,86 @@
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
_IE_NAME = u'blinkx'
_TEST = {
u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
u'file': u'8aQUy7GV.mp4',
u'md5': u'2e9a07364af40163a908edbf10bb2492',
u'info_dict': {
u"title": u"Police Car Rolls Away",
u"uploader": u"stupidvideos.com",
u"upload_date": u"20131215",
u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
u"duration": 14.886,
u"thumbnails": [{
"width": 100,
"height": 76,
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
}],
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
display_id = video_id[:8]
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
u'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
upload_date = dt.strftime('%Y%m%d')
duration = None
thumbnails = []
formats = []
for m in data['media']:
if m['type'] == 'jpg':
thumbnails.append({
'url': m['link'],
'width': int(m['w']),
'height': int(m['h']),
})
elif m['type'] == 'original':
duration = m['d']
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
format_id = (u'%s-%sk-%s' %
(vcodec,
(int(m['vbr']) + int(m['abr'])) // 1000,
m['w']))
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': int(m['abr']) // 1000,
'vbr': int(m['vbr']) // 1000,
'width': int(m['w']),
'height': int(m['h']),
})
formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
return {
'id': display_id,
'fullid': video_id,
'title': data['title'],
'formats': formats,
'uploader': data['channel_name'],
'upload_date': upload_date,
'description': data.get('description'),
'thumbnails': thumbnails,
'duration': duration,
}

View File

@@ -35,14 +35,38 @@ class InfoExtractor(object):
id: Video identifier.
title: Video title, unescaped.
Additionally, it must contain either a formats entry or url and ext:
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
* player_url SWF Player URL (used for rtmpdump).
url: Final video URL.
ext: Video filename extension.
Instead of url and ext, formats can also specified.
format: The video format, defaults to ext (used for --get-format)
player_url: SWF Player URL (used for rtmpdump).
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image.
@@ -51,7 +75,6 @@ class InfoExtractor(object):
upload_date: Video upload date (YYYYMMDD).
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
duration: Length of the video in seconds, as an integer.
@@ -59,28 +82,7 @@ class InfoExtractor(object):
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)

View File

@@ -17,7 +17,7 @@ from ..utils import (
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
u'file': u'120708114770723.mp4',
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
u'info_dict': {
u"duration": 279,
u"duration": 279,
u"title": u"PEOPLE ARE AWESOME 2013"
}
}

View File

@@ -222,6 +222,18 @@ class GenericIE(InfoExtractor):
'id': video_id,
}
# Look for embedded blip.tv player
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
if mobj:
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
player_page = self._download_webpage(player_url, mobj.group(1))
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
if blip_video_id:
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:

View File

@@ -30,7 +30,6 @@ class MDRIE(InfoExtractor):
m = re.match(self._VALID_URL, url)
video_id = m.group('video_id')
domain = m.group('domain')
mediatype = m.group('type')
# determine title and media streams from webpage
html = self._download_webpage(url, video_id)
@@ -70,7 +69,7 @@ class MDRIE(InfoExtractor):
formats.append(format)
formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
if not formats:
raise ValueError('Could not find any valid formats')
raise ExtractorError(u'Could not find any valid formats')
return {
'id': video_id,

View File

@@ -1,6 +1,4 @@
import json
import re
import time
from .common import InfoExtractor
from ..utils import month_by_name

View File

@@ -0,0 +1,60 @@
# coding: utf-8
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = u'radiofrance'
_TEST = {
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
u'file': u'one-one.mp4',
u'md5': u'todo',
u'info_dict': {
u"title": u"One to one",
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
u"uploader": u"ferdi",
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
webpage, u'description', fatal=False)
uploader = self._html_search_regex(
r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
webpage, u'uploader', fatal=False)
formats_str = self._html_search_regex(
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
webpage, u'audio URLs')
formats = [
{
'format_id': m[0],
'url': m[1],
'vcodec': 'none',
}
for m in
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
]
# No sorting, we don't know any more about these formats
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
}

View File

@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
u'params': {
u'skip_download': True,
},
u'skip': u'Test file has been deleted.',
}
def _real_extract(self, url):

View File

@@ -1066,14 +1066,21 @@ def fix_xml_all_ampersand(xml_str):
def setproctitle(title):
assert isinstance(title, type(u''))
try:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
except OSError:
return
title = title
buf = ctypes.create_string_buffer(len(title) + 1)
buf.value = title
buf.value = title.encode('utf-8')
try:
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
def remove_start(s, start):
if s.startswith(start):
return s[len(start):]
return s

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.16.1'
__version__ = '2013.12.16.5'