Compare commits

..

1 Commits

Author SHA1 Message Date
Filippo Valsorda
00b350d209 [test] tell Travis to install rtmpdump and add initial support to rtmp testing 2013-11-25 17:46:33 -05:00
52 changed files with 201 additions and 740 deletions

View File

@@ -3,6 +3,9 @@ python:
- "2.6"
- "2.7"
- "3.3"
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq rtmpdump
script: nosetests test --verbose
notifications:
email:

View File

@@ -55,9 +55,8 @@ which means you can modify it, redistribute it or use it however you like.
--dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not listed in the archive
file. Record the IDs of all downloaded videos in
it.
--download-archive FILE Download only videos not present in the archive
file. Record all downloaded videos in it.
## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
@@ -131,11 +130,11 @@ which means you can modify it, redistribute it or use it however you like.
-v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very
verbose)
--write-pages Write downloaded intermediary pages to files in
the current directory to debug problems
--write-pages Write downloaded pages to files in the current
directory
## Video Format Options:
-f, --format FORMAT video format code, specify the order of
-f, --format FORMAT video format code, specifiy the order of
preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported
--all-formats download all available video formats
@@ -183,7 +182,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
# OUTPUT TEMPLATE

View File

@@ -1,21 +1,10 @@
__youtube_dl()
{
local cur prev opts fileopts diropts keywords
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies"
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )

View File

@@ -39,6 +39,5 @@
"writeinfojson": true,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
"socket_timeout": 20
"listssubtitles": false
}

View File

@@ -106,10 +106,6 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
if __name__ == '__main__':
unittest.main()

View File

@@ -15,16 +15,13 @@ from youtube_dl.extractor import (
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
VimeoUserIE,
UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
BandcampAlbumIE
)
@@ -57,14 +54,6 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_ustream_channel(self):
dl = FakeYDL()
ie = UstreamChannelIE(dl)
@@ -121,24 +110,6 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'kommuna')
self.assertEqual(result['title'], u'КПРФ')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'inspector')
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
if __name__ == '__main__':
unittest.main()

View File

@@ -72,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')

View File

@@ -107,14 +107,5 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3)
def test_youtube_mix(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
entries = result['entries']
self.assertTrue(len(entries) >= 20)
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
if __name__ == '__main__':
unittest.main()

View File

@@ -132,7 +132,6 @@ class YoutubeDL(object):
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -147,7 +146,7 @@ class YoutubeDL(object):
_num_downloads = None
_screen_file = None
def __init__(self, params=None):
def __init__(self, params={}):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._ies_instances = {}
@@ -156,7 +155,6 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = {} if params is None else params
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -166,8 +164,9 @@ class YoutubeDL(object):
u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
params['restrictfilenames'] = True
self.params = params
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params.get('outtmpl', ''):
@@ -970,10 +969,7 @@ class YoutubeDL(object):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
timeout = 600 if timeout_val is None else float(timeout_val)
def _setup_opener(self, timeout=20):
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')

View File

@@ -36,7 +36,6 @@ __authors__ = (
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
'Sergey M.',
)
__license__ = 'Public Domain'
@@ -81,11 +80,11 @@ from .PostProcessor import (
def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes, default=[]):
def _readOptions(filename_bytes):
try:
optionf = open(filename_bytes)
except IOError:
return default # silently skip if file is not present
return [] # silently skip if file is not present
try:
res = []
for l in optionf:
@@ -199,9 +198,6 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=optparse.SUPPRESS_HELP)
selection.add_option('--playlist-start',
@@ -224,7 +220,7 @@ def parseOpts(overrideArguments=None):
default=None, type=int)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -239,7 +235,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
@@ -321,7 +317,7 @@ def parseOpts(overrideArguments=None):
help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False,
help='Write downloaded intermediary pages to files in the current directory to debug problems')
help='Write downloaded pages to files in the current directory')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -419,8 +415,6 @@ def parseOpts(overrideArguments=None):
if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -430,31 +424,8 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
@@ -680,8 +651,6 @@ def _real_main(argv=None):
'download_archive': opts.download_archive,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
}
with YoutubeDL(ydl_opts) as ydl:

View File

@@ -21,7 +21,6 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -72,7 +71,6 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
@@ -107,7 +105,6 @@ from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
@@ -121,11 +118,6 @@ from .rutube import RutubeIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
)
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import (
@@ -164,11 +156,7 @@ from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
)
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
@@ -181,11 +169,7 @@ from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeIE
from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE
from .youku import YoukuIE
from .youporn import YouPornIE

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -27,8 +28,9 @@ class AnitubeIE(InfoExtractor):
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text

View File

@@ -1,6 +1,7 @@
# encoding: utf-8
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -77,7 +78,8 @@ class ArteTvIE(InfoExtractor):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -107,8 +109,9 @@ class ArteTvIE(InfoExtractor):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -30,10 +31,11 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
doc = self._download_xml(info_url,video_id,
info_page = self._download_webpage(info_url,video_id,
u'Downloading video info')
self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS')
media = video_info.find('MEDIA')

View File

@@ -12,27 +12,21 @@ class CinemassacreIE(InfoExtractor):
_TESTS = [{
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
u'file': u'19911.flv',
u'md5': u'f9bb7ede54d1229c9846e197b4737e06',
u'info_dict': {
u'upload_date': u'20121110',
u'title': u'“Angry Video Game Nerd: The Movie” Trailer',
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
},
{
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
u'file': u'521be8ef82b16.flv',
u'md5': u'9509ee44dcaa7c1068604817c19a9e50',
u'info_dict': {
u'upload_date': u'20131002',
u'title': u'The Mummys Hand (1940)',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
}]
def _real_extract(self, url):

View File

@@ -3,7 +3,6 @@ import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
class ClipfishIE(InfoExtractor):
@@ -11,14 +10,13 @@ class ClipfishIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
u'file': u'4028320.f4v',
u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
},
u'skip': 'Blocked in the US'
u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
u'duration': 399,
}
}
def _real_extract(self, url):
@@ -27,14 +25,11 @@ class ClipfishIE(InfoExtractor):
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
info_xml = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
doc = xml.etree.ElementTree.fromstring(info_xml)
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(

View File

@@ -1,52 +0,0 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
playlist_page = self._download_webpage(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info')
# Fix broken xml
playlist_page = re.sub('&', '&amp;', playlist_page)
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -32,7 +33,8 @@ class CNNIE(InfoExtractor):
path = mobj.group('path')
page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info = self._download_xml(info_url, page_title)
info_xml = self._download_webpage(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = []
for f in info.findall('files/file'):

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag
@@ -157,12 +158,13 @@ class ComedyCentralShowsIE(InfoExtractor):
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
idoc = self._download_xml(indexUrl, epTitle,
indexXml = self._download_webpage(indexUrl, epTitle,
u'Downloading show index',
u'unable to download episode index')
results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
@@ -173,9 +175,10 @@ class ComedyCentralShowsIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
cdoc = self._download_xml(configUrl, epTitle,
configXml = self._download_webpage(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = []
for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)

View File

@@ -210,8 +210,7 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML'):
def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
@@ -364,8 +363,7 @@ class InfoExtractor(object):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -31,12 +32,14 @@ class DaumIE(InfoExtractor):
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
info_xml = self._download_webpage(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
urls = self._download_xml(
urls_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
@@ -46,9 +49,10 @@ class DaumIE(InfoExtractor):
'vid': full_id,
'profile': profile,
})
url_doc = self._download_xml(
url_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,

View File

@@ -1,6 +1,7 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -29,7 +30,8 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -20,8 +21,9 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config = self._download_xml(
config_xml = self._download_webpage(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text
return {

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -27,8 +28,9 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config = self._download_xml(config_xml_url, video_id,
config_xml = self._download_webpage(config_xml_url, video_id,
u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS')
formats = []

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
@@ -10,10 +11,11 @@ from ..utils import (
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
info = self._download_xml(
xml_desc = self._download_webpage(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')

View File

@@ -195,15 +195,6 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:

View File

@@ -1,59 +0,0 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
u'duration': 151,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
'height': int(info['titleObject']['encoding']['selected'][:-1]),
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
'duration': int(info['titleObject']['title']['duration_seconds']),
}

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -42,8 +43,9 @@ class InternetVideoArchiveIE(InfoExtractor):
video_id = query_dic['publishedid'][0]
url = self._build_url(query)
flashconfiguration = self._download_xml(url, video_id,
flashconfiguration_xml = self._download_webpage(url, video_id,
u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
@@ -51,8 +53,9 @@ class InternetVideoArchiveIE(InfoExtractor):
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info = self._download_xml(file_url, video_id,
info_xml = self._download_webpage(file_url, video_id,
u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item')
def _bp(p):

View File

@@ -2,6 +2,7 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -31,9 +32,12 @@ class JeuxVideoIE(InfoExtractor):
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID')
config = self._download_xml(
xml_config = self._download_webpage(
xml_link, title, u'Downloading XML config')
info_json = config.find('format.json').text
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info_json = self._search_regex(
r'(?sm)<format\.json>(.*?)</format\.json>',
xml_config, u'JSON information')
info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']

View File

@@ -1,6 +1,7 @@
import json
import os
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -93,9 +94,10 @@ class JustinTVIE(InfoExtractor):
archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
doc = self._download_xml(api, chapter_id,
chapter_info_xml = self._download_webpage(api, chapter_id,
note=u'Downloading chapter information',
errnote=u'Chapter information download failed')
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text:
break

View File

@@ -1,5 +1,6 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -79,7 +80,8 @@ class LivestreamOriginalIE(InfoExtractor):
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id)
api_response = self._download_webpage(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']

View File

@@ -109,8 +109,9 @@ class MTVIE(InfoExtractor):
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
u'Downloading info')
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url):

View File

@@ -1,4 +1,5 @@
import os.path
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -32,7 +33,8 @@ class MySpassIE(InfoExtractor):
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata = self._download_xml(metadata_url, video_id)
metadata_text = self._download_webpage(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
# extract values from metadata
url_flv_el = metadata.find('url_flv')

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -37,12 +38,14 @@ class NaverIE(InfoExtractor):
'protocol': 'p2p',
'inKey': key,
})
info = self._download_xml(
info_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info')
urls = self._download_xml(
urls_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
@@ -20,8 +21,8 @@ class NBCNewsIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = all_info.find('video')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
return {'id': video_id,
'title': info.find('headline').text,

View File

@@ -1,5 +1,6 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -25,8 +26,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml(path_url, video_id,
path_response = self._download_webpage(path_url, video_id,
u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin

View File

@@ -2,6 +2,7 @@
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -80,7 +81,7 @@ class NiconicoIE(InfoExtractor):
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info = self._download_xml(
video_info_webpage = self._download_webpage(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
@@ -91,6 +92,7 @@ class NiconicoIE(InfoExtractor):
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
@@ -105,11 +107,13 @@ class NiconicoIE(InfoExtractor):
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info = self._download_xml(
user_info_webpage = self._download_webpage(
url, video_id, note=u'Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return {
'id': video_id,

View File

@@ -1,49 +0,0 @@
import json
import re
from .common import InfoExtractor
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
u"file": u"2009-01-02T16_03_35-08_00.mp3",
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
u"info_dict": {
u"uploader": u"Science Teaching Tips",
u"uploader_id": u"scienceteachingtips",
u"title": u"64. When the Moon Hits Your Eye",
u"duration": 446,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
channel = mobj.group('channel')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
duration = int(data['length'] / 1000.0)
return {
'id': video_id,
'url': video_url,
'title': title,
'uploader': uploader,
'uploader_id': channel,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -1,6 +1,7 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -34,11 +35,12 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id,
'url': url_doc.find('./durl/url').text,

View File

@@ -1,252 +0,0 @@
# encoding: utf-8
import re
import json
import hashlib
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError
)
class SmotriIE(InfoExtractor):
IE_DESC = u'Smotri.com'
IE_NAME = u'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_TESTS = [
# real video id 2610366
{
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
u'file': u'v261036632ab.mp4',
u'md5': u'2a7b08249e6f5636557579c368040eb9',
u'info_dict': {
u'title': u'катастрофа с камер видеонаблюдения',
u'uploader': u'rbc2008',
u'uploader_id': u'rbc08',
u'upload_date': u'20131118',
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
u'file': u'v57591cb20.flv',
u'md5': u'830266dfc21f077eac5afd1883091bcd',
u'info_dict': {
u'title': u'test',
u'uploader': u'Support Photofile@photofile',
u'uploader_id': u'support-photofile',
u'upload_date': u'20070704',
u'description': u'test, видео test',
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password
{
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
u'file': u'v1390466a13c.mp4',
u'md5': u'f6331cef33cad65a0815ee482a54440b',
u'info_dict': {
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
u'uploader': u'timoxa40',
u'uploader_id': u'timoxa40',
u'upload_date': u'20100404',
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
u'params': {
u'videopassword': u'qwerty',
},
},
# age limit + video-password
{
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
u'file': u'v15408898bcf.flv',
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
u'info_dict': {
u'title': u'этот ролик не покажут по ТВ',
u'uploader': u'zzxxx',
u'uploader_id': u'ueggb',
u'upload_date': u'20101001',
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
u'age_limit': 18,
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
u'params': {
u'videopassword': u'333'
}
}
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError(u'Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError(u'Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, u'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'video_duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}
class SmotriCommunityIE(InfoExtractor):
IE_DESC = u'Smotri.com community videos'
IE_NAME = u'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
rss = self._download_xml(url, community_id, u'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
community_title = self._html_search_regex(
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor):
IE_DESC = u'Smotri.com user videos'
IE_NAME = u'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
rss = self._download_xml(url, user_id, u'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
user_nickname = self._html_search_regex(
u'^Видео режиссера (.*)$', description_text,
u'user nickname')
return self.playlist_result(entries, user_id, user_nickname)

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -32,10 +33,12 @@ class SpiegelIE(InfoExtractor):
r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
idoc = self._download_xml(
xml_code = self._download_webpage(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
formats = [
{
'format_id': n.tag.rpartition('type')[2],

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -31,7 +32,8 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
qualities = ['500k', '480p', '1000k', '720p', '1080p']

View File

@@ -1,5 +1,6 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -39,9 +40,11 @@ class TouTvIE(InfoExtractor):
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_doc = self._download_xml(
streams_webpage = self._download_webpage(
streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)

View File

@@ -1,5 +1,6 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -35,10 +36,12 @@ class TriluliluIE(InfoExtractor):
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_doc = self._download_xml(
format_str = self._download_webpage(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -26,8 +27,9 @@ class VideofyMeIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video')
sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)

View File

@@ -5,16 +5,14 @@ from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
_TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v',
u'md5': u'e51e4a266aab7531c6ac06f4ffee3b0d',
u'info_dict': {
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
},
u'params': {
u'skip_download': True,
},
}
}
def _real_extract(self, url):
@@ -41,4 +39,4 @@ class VideoPremiumIE(InfoExtractor):
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v',
'title': video_title,
}
}

View File

@@ -249,46 +249,25 @@ class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
def _extract_videos(self, list_id, base_url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage(
'%s/videos/page:%d/' % (base_url, pagenum),list_id,
u'Downloading page %s' % pagenum)
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
channel_id, u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
list_title = self._html_search_regex(self._TITLE_RE, webpage,
u'list title')
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
webpage, u'channel title')
return {'_type': 'playlist',
'id': list_id,
'title': list_title,
'id': channel_id,
'title': channel_title,
'entries': entries,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = u'vimeo:user'
_VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
@classmethod
def suitable(cls, url):
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url):
return False
return super(VimeoUserIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'http://vimeo.com/%s' % name)

View File

@@ -17,21 +17,27 @@ class YahooIE(InfoExtractor):
_TESTS = [
{
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.mp4',
u'md5': u'4962b075c08be8690a922ee026d05e69',
u'file': u'214727115.flv',
u'info_dict': {
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
{
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.mp4',
u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
u'file': u'103000935.flv',
u'info_dict': {
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
]
@@ -40,19 +46,15 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'mediaItems: ({.*?})$',
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
return self._get_info(info['id'], video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
' AND protocol="http"' % long_id)
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
data = compat_urllib_parse.urlencode({
'q': query,
'env': 'prod',
@@ -89,39 +91,17 @@ class YahooIE(InfoExtractor):
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
return {
info = {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
u'info_dict': {
u'id': u'104538833',
u'ext': u'mp4',
u'title': u'China Moses Is Crazy About the Blues',
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
return self._get_info(long_id, video_id)
return info
class YahooSearchIE(SearchInfoExtractor):

View File

@@ -11,6 +11,7 @@ import socket
import string
import struct
import traceback
import xml.etree.ElementTree
import zlib
from .common import InfoExtractor, SearchInfoExtractor
@@ -28,7 +29,6 @@ from ..utils import (
clean_html,
get_cachedir,
get_element_by_id,
get_element_by_attribute,
ExtractorError,
unescapeHTML,
unified_strdate,
@@ -1144,7 +1144,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asrs': 1,
})
list_url = caption_url + '&' + list_params
caption_list = self._download_xml(list_url, video_id)
list_page = self._download_webpage(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
original_lang_node = caption_list.find('track')
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1527,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
IE_NAME = u'youtube:playlist'
@classmethod
@@ -1538,24 +1539,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _ids_to_results(self, ids):
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage))
title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
ids = orderedSet(re.findall(video_re, webpage))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1573,20 +1556,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
matches = re.finditer(self._VIDEO_RE, page)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
# The ids are duplicated
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
@@ -1594,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page)
url_results = self._ids_to_results(ids)
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1791,6 +1769,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
_PAGING_STEP = 30
# use action_load_personal_feed instead of action_load_system_feed
_PERSONAL_FEED = False
@@ -1810,8 +1789,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
feed_entries = []
paging = 0
for i in itertools.count(1):
# The step argument is available only in 2.7 or higher
for i in itertools.count(0):
paging = i*self._PAGING_STEP
info = self._download_webpage(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i)
@@ -1824,7 +1804,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
for video_id in ids)
if info['paging'] is None:
break
paging = info['paging']
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1844,6 +1823,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = u'Youtube Watch Later'
_PAGING_STEP = 100
_PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
@@ -1853,6 +1833,13 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
_PERSONAL_FEED = True
_PLAYLIST_TITLE = u'Youtube Watch History'
def _real_extract(self, url):
webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
# The step is actually a ridiculously big number (like 1374343569725646)
self._PAGING_STEP = int(data_paging)
return super(YoutubeHistoryIE, self)._real_extract(url)
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'

View File

@@ -1,38 +1,24 @@
# coding: utf-8
import operator
import re
from .common import InfoExtractor
from ..utils import (
parse_xml_doc,
unified_strdate,
)
class ZDFIE(InfoExtractor):
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
u"file": u"2037704.webm",
u"info_dict": {
u"upload_date": u"20131127",
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
u"uploader": u"spezial",
u"title": u"ZDFspezial - Ende des Machtpokers"
},
u"skip": u"Videos on ZDF.de are depublicised in short order",
}
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
doc = self._download_xml(
xml_url, video_id,
note=u'Downloading video info',
errnote=u'Failed to download video info')
info_xml = self._download_webpage(
xml_url, video_id, note=u'Downloading video info')
doc = parse_xml_doc(info_xml)
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text

View File

@@ -1009,6 +1009,11 @@ def unsmuggle_url(smug_url):
return url, data
def parse_xml_doc(s):
assert isinstance(s, type(u''))
return xml.etree.ElementTree.fromstring(s.encode('utf-8'))
def format_bytes(bytes):
if bytes is None:
return u'N/A'

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.03'
__version__ = '2013.11.25.3'