Compare commits
58 Commits
2014.11.02
...
2014.11.12
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8f6ec4bbe6 | ||
![]() |
c295490830 | ||
![]() |
eb4cb42a02 | ||
![]() |
7a8cbc72b2 | ||
![]() |
2774852c2f | ||
![]() |
bbcc21efd1 | ||
![]() |
60526d6bcb | ||
![]() |
1d4df56d09 | ||
![]() |
a1cf99d03a | ||
![]() |
3c6af203cc | ||
![]() |
1a92e086a7 | ||
![]() |
519c73f267 | ||
![]() |
a6dae6c09c | ||
![]() |
f866e474f3 | ||
![]() |
8bb9b97c97 | ||
![]() |
d6fdc38682 | ||
![]() |
c2b61af548 | ||
![]() |
2fdbf27ad8 | ||
![]() |
29ed169cd6 | ||
![]() |
9908e03528 | ||
![]() |
1fe8fb8c20 | ||
![]() |
5d63b0aa93 | ||
![]() |
4164f0117e | ||
![]() |
37aab27808 | ||
![]() |
6110bbbfdd | ||
![]() |
cde9b380e6 | ||
![]() |
dab647a7b6 | ||
![]() |
a316a83d2b | ||
![]() |
81b22aee8b | ||
![]() |
a80c96eab0 | ||
![]() |
20436c30c9 | ||
![]() |
3828505646 | ||
![]() |
11fba1751d | ||
![]() |
12ea2f30cf | ||
![]() |
9c3e870393 | ||
![]() |
44789f2457 | ||
![]() |
711ede6e1b | ||
![]() |
a32f253112 | ||
![]() |
94bd361318 | ||
![]() |
acd40f64ed | ||
![]() |
766306450d | ||
![]() |
e7642ab572 | ||
![]() |
bdf9701729 | ||
![]() |
b5af6fcdad | ||
![]() |
278143df5b | ||
![]() |
fdca55fe34 | ||
![]() |
4f195f55f0 | ||
![]() |
ac35c26686 | ||
![]() |
42f7d2f588 | ||
![]() |
39f0a2a6b7 | ||
![]() |
ecc0c5ee01 | ||
![]() |
451948b28c | ||
![]() |
baa708036c | ||
![]() |
8c25f81bee | ||
![]() |
4c83c96795 | ||
![]() |
9580711841 | ||
![]() |
c30ae9594c | ||
![]() |
ccdd0ffb80 |
29
README.md
29
README.md
@@ -131,17 +131,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the
|
||||
video id, %(playlist)s for the playlist the
|
||||
video id, %(playlist_title)s,
|
||||
%(playlist_id)s, or %(playlist)s (=title if
|
||||
present, ID otherwise) for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist and %% for a
|
||||
literal percent. %(height)s and %(width)s
|
||||
for the width and height of the video
|
||||
format. %(resolution)s for a textual
|
||||
position in the playlist. %(height)s and
|
||||
%(width)s for the width and height of the
|
||||
video format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. Use - to output to stdout. Can also
|
||||
be used to download to a different
|
||||
directory, for example with -o '/my/downloa
|
||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
format. %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to
|
||||
download to a different directory, for
|
||||
example with -o '/my/downloads/%(uploader)s
|
||||
/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
@@ -239,8 +241,13 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||
You can merge the video and audio of two
|
||||
formats into a single file using -f <video-
|
||||
format>+<audio-format> (requires ffmpeg or
|
||||
avconv), for example -f
|
||||
bestvideo+bestaudio.
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
|
@@ -57,7 +57,7 @@ class FakeYDL(YoutubeDL):
|
||||
# Different instances of the downloader can't share the same dictionary
|
||||
# some test set the "sublang" parameter, which would break the md5 checks.
|
||||
params = get_params(override=override)
|
||||
super(FakeYDL, self).__init__(params)
|
||||
super(FakeYDL, self).__init__(params, auto_init=False)
|
||||
self.result = []
|
||||
|
||||
def to_screen(self, s, skip_eol=None):
|
||||
|
44
test/test_compat.py
Normal file
44
test/test_compat.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.utils import get_filesystem_encoding
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_expanduser,
|
||||
)
|
||||
|
||||
|
||||
class TestCompat(unittest.TestCase):
|
||||
def test_compat_getenv(self):
|
||||
test_str = 'тест'
|
||||
os.environ['YOUTUBE-DL-TEST'] = (
|
||||
test_str if sys.version_info >= (3, 0)
|
||||
else test_str.encode(get_filesystem_encoding()))
|
||||
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||
os.environ['HOME'] = (
|
||||
test_str if sys.version_info >= (3, 0)
|
||||
else test_str.encode(get_filesystem_encoding()))
|
||||
self.assertEqual(compat_expanduser('~'), test_str)
|
||||
|
||||
def test_all_present(self):
|
||||
import youtube_dl.compat
|
||||
all_names = youtube_dl.compat.__all__
|
||||
present_names = set(filter(
|
||||
lambda c: '_' in c and not c.startswith('_'),
|
||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
||||
self.assertEqual(all_names, sorted(present_names))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -23,10 +23,12 @@ import json
|
||||
import socket
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
from youtube_dl.compat import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from youtube_dl.utils import (
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
@@ -94,7 +96,7 @@ def generator(test_case):
|
||||
params.setdefault('extract_flat', True)
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params)
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
ydl.add_default_info_extractors()
|
||||
finished_hook_called = set()
|
||||
def _hook(status):
|
||||
|
@@ -37,7 +37,9 @@ def _make_testfunc(testfile):
|
||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||
# Recompile
|
||||
try:
|
||||
subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
|
||||
subprocess.check_call([
|
||||
'mxmlc', '-output', swf_file,
|
||||
'-static-link-runtime-shared-libraries', as_file])
|
||||
except OSError as ose:
|
||||
if ose.errno == errno.ENOENT:
|
||||
print('mxmlc not found! Skipping test.')
|
||||
|
@@ -20,7 +20,6 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_meta_content,
|
||||
orderedSet,
|
||||
OnDemandPagedList,
|
||||
InAdvancePagedList,
|
||||
@@ -46,8 +45,6 @@ from youtube_dl.utils import (
|
||||
escape_url,
|
||||
js_to_json,
|
||||
get_filesystem_encoding,
|
||||
compat_getenv,
|
||||
compat_expanduser,
|
||||
)
|
||||
|
||||
|
||||
@@ -157,17 +154,6 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||
|
||||
def test_meta_parser(self):
|
||||
testhtml = '''
|
||||
<head>
|
||||
<meta name="description" content="foo & bar">
|
||||
<meta content='Plato' name='author'/>
|
||||
</head>
|
||||
'''
|
||||
get_meta = lambda name: get_meta_content(name, testhtml)
|
||||
self.assertEqual(get_meta('description'), 'foo & bar')
|
||||
self.assertEqual(get_meta('author'), 'Plato')
|
||||
|
||||
def test_xpath_with_ns(self):
|
||||
testxml = '''<root xmlns:media="http://example.com/">
|
||||
<media:song>
|
||||
@@ -359,17 +345,5 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": true}')
|
||||
self.assertEqual(json.loads(on), {'abc': True})
|
||||
|
||||
def test_compat_getenv(self):
|
||||
test_str = 'тест'
|
||||
os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0)
|
||||
else test_str.encode(get_filesystem_encoding()))
|
||||
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||
os.environ['HOME'] = (test_str if sys.version_info >= (3, 0)
|
||||
else test_str.encode(get_filesystem_encoding()))
|
||||
self.assertEqual(compat_expanduser('~'), test_str)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -22,13 +22,15 @@ import traceback
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .utils import (
|
||||
from .compat import (
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .utils import (
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
@@ -62,6 +64,7 @@ from .utils import (
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
|
||||
from .version import __version__
|
||||
|
||||
@@ -655,6 +658,8 @@ class YoutubeDL(object):
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
@@ -833,6 +838,13 @@ class YoutubeDL(object):
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
@@ -1321,6 +1333,7 @@ class YoutubeDL(object):
|
||||
platform.python_version(), platform_name()))
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions()
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
for exe, v in sorted(exe_versions.items())
|
||||
|
@@ -13,10 +13,12 @@ import sys
|
||||
from .options import (
|
||||
parseOpts,
|
||||
)
|
||||
from .utils import (
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
)
|
||||
from .utils import (
|
||||
DateRange,
|
||||
DEFAULT_OUTTMPL,
|
||||
decodeOption,
|
||||
|
@@ -8,10 +8,8 @@ import re
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .utils import (
|
||||
compat_expanduser,
|
||||
write_json_file,
|
||||
)
|
||||
from .compat import compat_expanduser
|
||||
from .utils import write_json_file
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
317
youtube_dl/compat.py
Normal file
317
youtube_dl/compat.py
Normal file
@@ -0,0 +1,317 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import getpass
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_request
|
||||
|
||||
try:
|
||||
import urllib.error as compat_urllib_error
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_error
|
||||
|
||||
try:
|
||||
import urllib.parse as compat_urllib_parse
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_parse
|
||||
|
||||
try:
|
||||
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
||||
except ImportError: # Python 2
|
||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
||||
|
||||
try:
|
||||
import urllib.parse as compat_urlparse
|
||||
except ImportError: # Python 2
|
||||
import urlparse as compat_urlparse
|
||||
|
||||
try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
import htmlentitydefs as compat_html_entities
|
||||
|
||||
try:
|
||||
import html.parser as compat_html_parser
|
||||
except ImportError: # Python 2
|
||||
import HTMLParser as compat_html_parser
|
||||
|
||||
try:
|
||||
import http.client as compat_http_client
|
||||
except ImportError: # Python 2
|
||||
import httplib as compat_http_client
|
||||
|
||||
try:
|
||||
from urllib.error import HTTPError as compat_HTTPError
|
||||
except ImportError: # Python 2
|
||||
from urllib2 import HTTPError as compat_HTTPError
|
||||
|
||||
try:
|
||||
from urllib.request import urlretrieve as compat_urlretrieve
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
except ImportError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
if string == '':
|
||||
return string
|
||||
res = string.split('%')
|
||||
if len(res) == 1:
|
||||
return string
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
if errors is None:
|
||||
errors = 'replace'
|
||||
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
||||
pct_sequence = b''
|
||||
string = res[0]
|
||||
for item in res[1:]:
|
||||
try:
|
||||
if not item:
|
||||
raise ValueError
|
||||
pct_sequence += item[:2].decode('hex')
|
||||
rest = item[2:]
|
||||
if not rest:
|
||||
# This segment was just a single percent-encoded character.
|
||||
# May be part of a sequence of code units, so delay decoding.
|
||||
# (Stored in pct_sequence).
|
||||
continue
|
||||
except ValueError:
|
||||
rest = '%' + item
|
||||
# Encountered non-percent-encoded characters. Flush the current
|
||||
# pct_sequence.
|
||||
string += pct_sequence.decode(encoding, errors) + rest
|
||||
pct_sequence = b''
|
||||
if pct_sequence:
|
||||
# Flush the final pct_sequence
|
||||
string += pct_sequence.decode(encoding, errors)
|
||||
return string
|
||||
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||
# Python 2's version is apparently totally broken
|
||||
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, unicode
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError("bad query field: %r" % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = nv[0].replace('+', ' ')
|
||||
name = compat_urllib_parse_unquote(
|
||||
name, encoding=encoding, errors=errors)
|
||||
name = _coerce_result(name)
|
||||
value = nv[1].replace('+', ' ')
|
||||
value = compat_urllib_parse_unquote(
|
||||
value, encoding=encoding, errors=errors)
|
||||
value = _coerce_result(value)
|
||||
r.append((name, value))
|
||||
return r
|
||||
|
||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
parsed_result = {}
|
||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||
encoding=encoding, errors=errors)
|
||||
for name, value in pairs:
|
||||
if name in parsed_result:
|
||||
parsed_result[name].append(value)
|
||||
else:
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
try:
|
||||
from shlex import quote as shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def shlex_quote(s):
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
if type(c) is int: return c
|
||||
else: return ord(c)
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
else:
|
||||
# Environment variables should be decoded with filesystem encoding.
|
||||
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
|
||||
|
||||
def compat_getenv(key, default=None):
|
||||
from .utils import get_filesystem_encoding
|
||||
env = os.getenv(key, default)
|
||||
if env:
|
||||
env = env.decode(get_filesystem_encoding())
|
||||
return env
|
||||
|
||||
# HACK: The default implementations of os.path.expanduser from cpython do not decode
|
||||
# environment variables with filesystem encoding. We will work around this by
|
||||
# providing adjusted implementations.
|
||||
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
|
||||
# for different platforms with correct environment variables decoding.
|
||||
|
||||
if os.name == 'posix':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
|
||||
do nothing."""
|
||||
if not path.startswith('~'):
|
||||
return path
|
||||
i = path.find('/', 1)
|
||||
if i < 0:
|
||||
i = len(path)
|
||||
if i == 1:
|
||||
if 'HOME' not in os.environ:
|
||||
import pwd
|
||||
userhome = pwd.getpwuid(os.getuid()).pw_dir
|
||||
else:
|
||||
userhome = compat_getenv('HOME')
|
||||
else:
|
||||
import pwd
|
||||
try:
|
||||
pwent = pwd.getpwnam(path[1:i])
|
||||
except KeyError:
|
||||
return path
|
||||
userhome = pwent.pw_dir
|
||||
userhome = userhome.rstrip('/')
|
||||
return (userhome + path[i:]) or '/'
|
||||
elif os.name == 'nt' or os.name == 'ce':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructs.
|
||||
|
||||
If user or $HOME is unknown, do nothing."""
|
||||
if path[:1] != '~':
|
||||
return path
|
||||
i, n = 1, len(path)
|
||||
while i < n and path[i] not in '/\\':
|
||||
i = i + 1
|
||||
|
||||
if 'HOME' in os.environ:
|
||||
userhome = compat_getenv('HOME')
|
||||
elif 'USERPROFILE' in os.environ:
|
||||
userhome = compat_getenv('USERPROFILE')
|
||||
elif not 'HOMEPATH' in os.environ:
|
||||
return path
|
||||
else:
|
||||
try:
|
||||
drive = compat_getenv('HOMEDRIVE')
|
||||
except KeyError:
|
||||
drive = ''
|
||||
userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
|
||||
|
||||
if i != 1: #~user
|
||||
userhome = os.path.join(os.path.dirname(userhome), path[1:i])
|
||||
|
||||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
def compat_print(s):
|
||||
from .utils import preferredencoding
|
||||
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
|
||||
else:
|
||||
def compat_print(s):
|
||||
assert type(s) == type(u'')
|
||||
print(s)
|
||||
|
||||
|
||||
try:
|
||||
subprocess_check_output = subprocess.check_output
|
||||
except AttributeError:
|
||||
def subprocess_check_output(*args, **kwargs):
|
||||
assert 'input' not in kwargs
|
||||
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
||||
output, _ = p.communicate()
|
||||
ret = p.poll()
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
from .utils import preferredencoding
|
||||
prompt = prompt.encode(preferredencoding())
|
||||
return getpass.getpass(prompt, *args, **kwargs)
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_expanduser',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
'compat_html_entities',
|
||||
'compat_html_parser',
|
||||
'compat_http_client',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_str',
|
||||
'compat_subprocess_get_DEVNULL',
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'shlex_quote',
|
||||
'subprocess_check_output',
|
||||
]
|
@@ -12,9 +12,15 @@ from ..utils import (
|
||||
compat_str,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
|
||||
def rtmpdump_version():
|
||||
return get_exe_version(
|
||||
'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
|
||||
|
||||
|
||||
class RtmpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
def run_rtmpdump(args):
|
||||
|
@@ -421,6 +421,7 @@ from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
|
@@ -3,12 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
@@ -14,6 +14,7 @@ from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -23,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
|
||||
_TESTS = [
|
||||
@@ -260,11 +261,19 @@ class BrightcoveIE(InfoExtractor):
|
||||
formats = []
|
||||
for rend in renditions:
|
||||
url = rend['defaultURL']
|
||||
if not url:
|
||||
continue
|
||||
if rend['remote']:
|
||||
# This type of renditions are served through akamaihd.net,
|
||||
# but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(url, info['id'], 'mp4'))
|
||||
continue
|
||||
elif 'akamaihd.net' in url_comp.netloc:
|
||||
# This type of renditions are served through
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
else:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
|
@@ -27,7 +27,7 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
@@ -43,7 +43,7 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'authors': [ 'Mike Wilmot' ],
|
||||
},
|
||||
}
|
||||
@@ -94,7 +94,7 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
def _extract_title(self, html):
|
||||
title = self._html_search_meta('title', html, 'title')
|
||||
if title is None:
|
||||
if title is None:
|
||||
title = self._og_search_title(html)
|
||||
TITLE_SUFFIX = ' (Channel 9)'
|
||||
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||
@@ -115,7 +115,7 @@ class Channel9IE(InfoExtractor):
|
||||
return self._html_search_meta('description', html, 'description')
|
||||
|
||||
def _extract_duration(self, html):
|
||||
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
||||
|
||||
def _extract_slides(self, html):
|
||||
@@ -167,7 +167,7 @@ class Channel9IE(InfoExtractor):
|
||||
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
||||
|
||||
def _extract_content(self, html, content_path):
|
||||
# Look for downloadable content
|
||||
# Look for downloadable content
|
||||
formats = self._formats_from_html(html)
|
||||
slides = self._extract_slides(html)
|
||||
zip_ = self._extract_zip(html)
|
||||
@@ -258,16 +258,17 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
|
||||
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
|
||||
if page_type_m is None:
|
||||
raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
||||
if page_type_m is not None:
|
||||
page_type = page_type_m.group('pagetype')
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
elif page_type == 'Event':
|
||||
return self._extract_list(content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
page_type = page_type_m.group('pagetype')
|
||||
if page_type == 'List': # List page, may contain list of 'item'-like objects
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
|
@@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError('Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group('embed_url')
|
||||
video_id = mobj.group('video_id')
|
||||
full_video_id = mobj.group('full_video_id')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
@@ -60,37 +61,52 @@ class CinemassacreIE(InfoExtractor):
|
||||
vidurl = self._search_regex(
|
||||
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||
|
||||
videolist_url = self._search_regex(
|
||||
r"file\s*:\s*'(http.+?/jwplayer\.smil)'", playerdata, 'jwplayer.smil')
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
videolist_url = None
|
||||
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
|
||||
if mobj:
|
||||
videoserver = mobj.group('videoserver')
|
||||
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
|
||||
vidid = mobj.group('vidid') if mobj else full_video_id
|
||||
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
|
||||
else:
|
||||
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
|
||||
if mobj:
|
||||
videolist_url = mobj.group('smil')
|
||||
|
||||
if videolist_url:
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': vidurl,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -4,14 +4,16 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
|
@@ -16,9 +16,10 @@ class CNNIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'info_dict': {
|
||||
'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
@@ -27,9 +28,10 @@ class CNNIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||
"file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
|
||||
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
||||
"info_dict": {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'ext': 'mp4',
|
||||
"title": "Student's epic speech stuns new freshmen",
|
||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
"upload_date": "20130821",
|
||||
|
@@ -12,13 +12,14 @@ import sys
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
@@ -403,7 +404,7 @@ class InfoExtractor(object):
|
||||
video_info['title'] = playlist_title
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
@@ -424,8 +425,11 @@ class InfoExtractor(object):
|
||||
_name = name
|
||||
|
||||
if mobj:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
if group is None:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
else:
|
||||
return mobj.group(group)
|
||||
elif default is not _NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
@@ -435,11 +439,11 @@ class InfoExtractor(object):
|
||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
"""
|
||||
res = self._search_regex(pattern, string, name, default, fatal, flags)
|
||||
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
||||
if res:
|
||||
return clean_html(res).strip()
|
||||
else:
|
||||
@@ -533,9 +537,9 @@ class InfoExtractor(object):
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=fatal, **kwargs)
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||
|
@@ -17,7 +17,6 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..aes import (
|
||||
|
@@ -5,7 +5,8 @@ import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse_unquote, url_basename
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
|
@@ -5,12 +5,14 @@ import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
limit_length,
|
||||
|
@@ -8,12 +8,11 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
unescapeHTML,
|
||||
get_meta_content,
|
||||
)
|
||||
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
@@ -26,10 +25,10 @@ class GameSpotIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('page_id')
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video_json = self._search_regex(
|
||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video = json.loads(unescapeHTML(data_video_json))
|
||||
|
||||
# Transform the manifest url to a link to the mp4 files
|
||||
@@ -41,7 +40,8 @@ class GameSpotIE(InfoExtractor):
|
||||
http_path = f4m_path[1:].split('/', 1)[1]
|
||||
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
|
||||
http_template = http_template.replace('.csmil/manifest.f4m', '')
|
||||
http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template)
|
||||
http_template = compat_urlparse.urljoin(
|
||||
'http://video.gamespotcdn.com/', http_template)
|
||||
formats = []
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
@@ -52,8 +52,9 @@ class GameSpotIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'display_id': page_id,
|
||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||
'formats': formats,
|
||||
'description': get_meta_content('description', webpage),
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -7,11 +7,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -99,6 +100,22 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Championat',
|
||||
},
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/issues/3541
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '3866516442001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leer mij vrouwen kennen: Aflevering 1',
|
||||
'description': 'Leer mij vrouwen kennen: Aflevering 1',
|
||||
'uploader': 'SBS Broadcasting',
|
||||
},
|
||||
'skip': 'Restricted to Netherlands',
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
# Direct link to a video
|
||||
{
|
||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||
@@ -559,6 +576,7 @@ class GenericIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'direct': True,
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
|
@@ -5,13 +5,15 @@ import random
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
|
@@ -1,15 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
str_to_int,
|
||||
ExtractorError,
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
@@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, default='false')
|
||||
config_url = self._search_regex(
|
||||
r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
|
||||
|
||||
player_config = self._search_regex(
|
||||
r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
|
||||
player_vars = json.loads(player_config.replace("'", '"'))
|
||||
width = str_to_int(player_vars.get('width'))
|
||||
height = str_to_int(player_vars.get('height'))
|
||||
config_uri = player_vars.get('config')
|
||||
config = self._download_xml(
|
||||
config_url, video_id, 'Downloading player config XML')
|
||||
|
||||
if config_uri is None:
|
||||
raise ExtractorError('Missing config URI')
|
||||
node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
|
||||
errnote='Unable to download XML')
|
||||
if node is None:
|
||||
if config is None:
|
||||
raise ExtractorError('Missing config XML')
|
||||
if node.tag != 'config':
|
||||
if config.tag != 'config':
|
||||
raise ExtractorError('Missing config attribute')
|
||||
fns = node.findall('file')
|
||||
imgs = node.findall('image')
|
||||
if len(fns) != 1:
|
||||
fns = config.findall('file')
|
||||
if len(fns) < 1:
|
||||
raise ExtractorError('Missing media URI')
|
||||
video_url = fns[0].text
|
||||
if len(imgs) < 1:
|
||||
thumbnail = None
|
||||
else:
|
||||
thumbnail = imgs[0].text
|
||||
|
||||
url_comp = compat_urlparse.urlparse(url)
|
||||
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
|
||||
@@ -65,9 +52,7 @@ class GoshgayIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': ref,
|
||||
'age_limit': 18,
|
||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
||||
}
|
||||
|
@@ -8,12 +8,13 @@ import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
|
||||
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_html_parser,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_meta_content,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor):
|
||||
'title': (
|
||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||
),
|
||||
'format_id': 'mp4_720',
|
||||
'format_id': 'mp4_720p',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor):
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_meta('date', webpage)),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
title = get_meta_content('fulltitle', webpage)
|
||||
title = self._html_search_meta('fulltitle', webpage)
|
||||
if title:
|
||||
info['title'] = title
|
||||
else:
|
||||
@@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor):
|
||||
label = source_node.attrib['label']
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
|
||||
video_url = source_node.attrib['file']
|
||||
ext = determine_ext(video_url, '')
|
||||
formats.append({
|
||||
'url': source_node.attrib['file'],
|
||||
'url': video_url,
|
||||
'format_note': label,
|
||||
'format_id': '%s_%s' % (ext, label),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
@@ -6,7 +6,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,10 +26,11 @@ class ImdbIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
|
||||
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||
descr = self._html_search_regex(
|
||||
r'(?s)<span itemprop="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
available_formats = re.findall(
|
||||
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||
flags=re.MULTILINE)
|
||||
@@ -73,9 +73,7 @@ class ImdbListIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
list_id = mobj.group('id')
|
||||
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
entries = [
|
||||
self.url_result('http://www.imdb.com' + m, 'Imdb')
|
||||
|
@@ -5,11 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'timestamp': 1404302298,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
@@ -46,7 +46,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': 1163318593,
|
||||
'timestamp': 1163322193,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
@@ -55,10 +55,9 @@ class IzleseneIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
@@ -4,6 +4,7 @@ import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class Laola1TvIE(InfoExtractor):
|
||||
|
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
return rtmp_video_url
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||
return base + m.group('finalid')
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
|
@@ -7,11 +7,12 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
@@ -67,7 +67,7 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
||||
if thumbnails:
|
||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -108,6 +109,9 @@ class NiconicoIE(InfoExtractor):
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
|
||||
if 'deleted=' in flv_info_webpage:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
@@ -171,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
|
@@ -6,6 +6,7 @@ import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
@@ -29,6 +30,12 @@ class PlayedIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(m_error.group('msg'), expected=True)
|
||||
|
||||
fields = re.findall(
|
||||
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class Ro220IE(InfoExtractor):
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = 'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
@@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://streamcloud.eu/%s' % video_id
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
|
||||
|
||||
if media_type == 'Video':
|
||||
fmt.update({
|
||||
'format_note': ['144p', '288p', '544p'][quality-1],
|
||||
'format_note': ['144p', '288p', '544p', '720p'][quality-1],
|
||||
'vcodec': codec,
|
||||
})
|
||||
elif media_type == 'Audio':
|
||||
@@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor):
|
||||
'uploader': attr['channel_title'],
|
||||
'uploader_id': attr['channel_idkey'],
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
@@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor):
|
||||
request = compat_urllib_request.Request(playlist_url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
request.add_header('Accept', 'application/json')
|
||||
request.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_json(request, display_id)
|
||||
|
||||
|
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 854,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'title': 'Be passionate. Be courageous. Be your best.',
|
||||
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
'duration': 1128,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, talk_info):
|
||||
|
@@ -29,7 +29,7 @@ class TruTubeIE(InfoExtractor):
|
||||
|
||||
# filehd is always 404
|
||||
video_url = xpath_text(config, './file', 'video URL', fatal=True)
|
||||
title = xpath_text(config, './title', 'title')
|
||||
title = xpath_text(config, './title', 'title').strip()
|
||||
thumbnail = xpath_text(config, './image', ' thumbnail')
|
||||
|
||||
return {
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
get_meta_content,
|
||||
)
|
||||
|
||||
|
||||
@@ -79,7 +78,7 @@ class UstreamChannelIE(InfoExtractor):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
display_id = m.group('slug')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
channel_id = get_meta_content('ustream:channel_id', webpage)
|
||||
channel_id = self._html_search_meta('ustream:channel_id', webpage)
|
||||
|
||||
BASE = 'http://www.ustream.tv'
|
||||
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
|
||||
|
38
youtube_dl/extractor/vice.py
Normal file
38
youtube_dl/extractor/vice.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
print(ooyala_url)
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
@@ -7,11 +7,13 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
|
@@ -37,7 +37,7 @@ class WimpIE(InfoExtractor):
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r"'file'\s*:\s*'([^']+)'", webpage, 'video URL')
|
||||
r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
|
||||
if YoutubeIE.suitable(video_url):
|
||||
self.to_screen('Found YouTube video')
|
||||
return {
|
||||
|
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
|
||||
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
|
||||
'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
|
||||
'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
|
||||
'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
|
||||
'info_dict': {
|
||||
'id': '9oXJqdcndqv',
|
||||
'id': '063jOPX5ue2',
|
||||
'ext': 'ogg',
|
||||
'title': 'David Guetta & Showtek ft. Vassy - Bad',
|
||||
'duration': 270,
|
||||
'uploader_id': 'w729',
|
||||
'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
|
||||
'title': 'Liber & Natalia Szroeder - Teraz Ty',
|
||||
'duration': 203,
|
||||
'uploader_id': 'jolka85',
|
||||
'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
|
||||
|
||||
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
|
||||
|
||||
audio_table = {'flv': 'mp3', 'webm': 'ogg'}
|
||||
audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
|
||||
|
||||
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
|
||||
|
||||
formats = []
|
||||
for media in embedpage['url']:
|
||||
fmt = media['type'].split('@')[0]
|
||||
if typ == 'audio':
|
||||
ext = audio_table[media['type'].split('@')[0]]
|
||||
ext = audio_table.get(fmt, fmt)
|
||||
else:
|
||||
ext = media['type'].split('@')[0]
|
||||
ext = fmt
|
||||
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (ext, media['quality'].lower()),
|
||||
|
@@ -684,7 +684,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Get video info
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
self.report_age_confirmation()
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
@@ -692,12 +691,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'sts': self._search_regex(
|
||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
|
||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
|
||||
})
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info_url, video_id,
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
else:
|
||||
age_gate = False
|
||||
@@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
'info_dict': {
|
||||
'title': 'ytdl test PL',
|
||||
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
|
@@ -5,9 +5,11 @@ import optparse
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
from .utils import (
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getenv,
|
||||
)
|
||||
from .utils import (
|
||||
get_term_width,
|
||||
write_string,
|
||||
)
|
||||
@@ -259,7 +261,16 @@ def parseOpts(overrideArguments=None):
|
||||
video_format.add_option(
|
||||
'-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
|
||||
help='video format code, specify the order of preference using'
|
||||
' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
|
||||
' supported. You can also use the special names "best",'
|
||||
' "bestvideo", "bestaudio", "worst", "worstvideo" and'
|
||||
' "worstaudio". By default, youtube-dl will pick the best quality.'
|
||||
' Use commas to download multiple audio formats, such as'
|
||||
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
|
||||
' You can merge the video and audio of two formats into a single'
|
||||
' file using -f <video-format>+<audio-format> (requires ffmpeg or'
|
||||
' avconv), for example -f bestvideo+bestaudio.')
|
||||
video_format.add_option(
|
||||
'--all-formats',
|
||||
action='store_const', dest='format', const='all',
|
||||
@@ -479,10 +490,12 @@ def parseOpts(overrideArguments=None):
|
||||
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
|
||||
'%(upload_date)s for the upload date (YYYYMMDD), '
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
'%(id)s for the video id, '
|
||||
'%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist. '
|
||||
'%(height)s and %(width)s for the width and height of the video format. '
|
||||
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||
'%% for a literal percent. '
|
||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option(
|
||||
|
@@ -6,10 +6,11 @@ import os
|
||||
import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlretrieve,
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
compat_urlretrieve,
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
|
@@ -3,10 +3,8 @@ from __future__ import unicode_literals
|
||||
import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..utils import (
|
||||
shlex_quote,
|
||||
PostProcessingError,
|
||||
)
|
||||
from ..compat import shlex_quote
|
||||
from ..utils import PostProcessingError
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -7,10 +6,13 @@ import time
|
||||
|
||||
from .common import AudioConversionError, PostProcessor
|
||||
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_subprocess_get_DEVNULL,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
@@ -19,23 +21,6 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
def get_version(executable):
|
||||
""" Returns the version of the specified executable,
|
||||
or False if the executable is not present """
|
||||
try:
|
||||
out, err = subprocess.Popen(
|
||||
[executable, '-version'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
except OSError:
|
||||
return False
|
||||
firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
|
||||
m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline)
|
||||
if not m:
|
||||
return u'present'
|
||||
else:
|
||||
return m.group(1)
|
||||
|
||||
|
||||
class FFmpegPostProcessorError(PostProcessingError):
|
||||
pass
|
||||
|
||||
@@ -61,7 +46,7 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
@staticmethod
|
||||
def get_versions():
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
return dict((program, get_version(program)) for program in programs)
|
||||
return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
|
||||
|
||||
@property
|
||||
def _executable(self):
|
||||
|
@@ -3,10 +3,12 @@ import subprocess
|
||||
import sys
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import (
|
||||
subprocess_check_output
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
hyphenate_date,
|
||||
subprocess_check_output
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import codecs
|
||||
import contextlib
|
||||
@@ -8,7 +10,6 @@ import ctypes
|
||||
import datetime
|
||||
import email.utils
|
||||
import errno
|
||||
import getpass
|
||||
import gzip
|
||||
import itertools
|
||||
import io
|
||||
@@ -29,254 +30,18 @@ import traceback
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_request
|
||||
|
||||
try:
|
||||
import urllib.error as compat_urllib_error
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_error
|
||||
|
||||
try:
|
||||
import urllib.parse as compat_urllib_parse
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_parse
|
||||
|
||||
try:
|
||||
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
||||
except ImportError: # Python 2
|
||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
||||
|
||||
try:
|
||||
import urllib.parse as compat_urlparse
|
||||
except ImportError: # Python 2
|
||||
import urlparse as compat_urlparse
|
||||
|
||||
try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
import htmlentitydefs as compat_html_entities
|
||||
|
||||
try:
|
||||
import html.parser as compat_html_parser
|
||||
except ImportError: # Python 2
|
||||
import HTMLParser as compat_html_parser
|
||||
|
||||
try:
|
||||
import http.client as compat_http_client
|
||||
except ImportError: # Python 2
|
||||
import httplib as compat_http_client
|
||||
|
||||
try:
|
||||
from urllib.error import HTTPError as compat_HTTPError
|
||||
except ImportError: # Python 2
|
||||
from urllib2 import HTTPError as compat_HTTPError
|
||||
|
||||
try:
|
||||
from urllib.request import urlretrieve as compat_urlretrieve
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
except ImportError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
if string == '':
|
||||
return string
|
||||
res = string.split('%')
|
||||
if len(res) == 1:
|
||||
return string
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
if errors is None:
|
||||
errors = 'replace'
|
||||
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
||||
pct_sequence = b''
|
||||
string = res[0]
|
||||
for item in res[1:]:
|
||||
try:
|
||||
if not item:
|
||||
raise ValueError
|
||||
pct_sequence += item[:2].decode('hex')
|
||||
rest = item[2:]
|
||||
if not rest:
|
||||
# This segment was just a single percent-encoded character.
|
||||
# May be part of a sequence of code units, so delay decoding.
|
||||
# (Stored in pct_sequence).
|
||||
continue
|
||||
except ValueError:
|
||||
rest = '%' + item
|
||||
# Encountered non-percent-encoded characters. Flush the current
|
||||
# pct_sequence.
|
||||
string += pct_sequence.decode(encoding, errors) + rest
|
||||
pct_sequence = b''
|
||||
if pct_sequence:
|
||||
# Flush the final pct_sequence
|
||||
string += pct_sequence.decode(encoding, errors)
|
||||
return string
|
||||
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||
# Python 2's version is apparently totally broken
|
||||
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, unicode
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError("bad query field: %r" % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = nv[0].replace('+', ' ')
|
||||
name = compat_urllib_parse_unquote(
|
||||
name, encoding=encoding, errors=errors)
|
||||
name = _coerce_result(name)
|
||||
value = nv[1].replace('+', ' ')
|
||||
value = compat_urllib_parse_unquote(
|
||||
value, encoding=encoding, errors=errors)
|
||||
value = _coerce_result(value)
|
||||
r.append((name, value))
|
||||
return r
|
||||
|
||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
parsed_result = {}
|
||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||
encoding=encoding, errors=errors)
|
||||
for name, value in pairs:
|
||||
if name in parsed_result:
|
||||
parsed_result[name].append(value)
|
||||
else:
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
try:
|
||||
from shlex import quote as shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def shlex_quote(s):
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
if type(c) is int: return c
|
||||
else: return ord(c)
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
else:
|
||||
# Environment variables should be decoded with filesystem encoding.
|
||||
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
|
||||
|
||||
def compat_getenv(key, default=None):
|
||||
env = os.getenv(key, default)
|
||||
if env:
|
||||
env = env.decode(get_filesystem_encoding())
|
||||
return env
|
||||
|
||||
# HACK: The default implementations of os.path.expanduser from cpython do not decode
|
||||
# environment variables with filesystem encoding. We will work around this by
|
||||
# providing adjusted implementations.
|
||||
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
|
||||
# for different platforms with correct environment variables decoding.
|
||||
|
||||
if os.name == 'posix':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
|
||||
do nothing."""
|
||||
if not path.startswith('~'):
|
||||
return path
|
||||
i = path.find('/', 1)
|
||||
if i < 0:
|
||||
i = len(path)
|
||||
if i == 1:
|
||||
if 'HOME' not in os.environ:
|
||||
import pwd
|
||||
userhome = pwd.getpwuid(os.getuid()).pw_dir
|
||||
else:
|
||||
userhome = compat_getenv('HOME')
|
||||
else:
|
||||
import pwd
|
||||
try:
|
||||
pwent = pwd.getpwnam(path[1:i])
|
||||
except KeyError:
|
||||
return path
|
||||
userhome = pwent.pw_dir
|
||||
userhome = userhome.rstrip('/')
|
||||
return (userhome + path[i:]) or '/'
|
||||
elif os.name == 'nt' or os.name == 'ce':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructs.
|
||||
|
||||
If user or $HOME is unknown, do nothing."""
|
||||
if path[:1] != '~':
|
||||
return path
|
||||
i, n = 1, len(path)
|
||||
while i < n and path[i] not in '/\\':
|
||||
i = i + 1
|
||||
|
||||
if 'HOME' in os.environ:
|
||||
userhome = compat_getenv('HOME')
|
||||
elif 'USERPROFILE' in os.environ:
|
||||
userhome = compat_getenv('USERPROFILE')
|
||||
elif not 'HOMEPATH' in os.environ:
|
||||
return path
|
||||
else:
|
||||
try:
|
||||
drive = compat_getenv('HOMEDRIVE')
|
||||
except KeyError:
|
||||
drive = ''
|
||||
userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
|
||||
|
||||
if i != 1: #~user
|
||||
userhome = os.path.join(os.path.dirname(userhome), path[1:i])
|
||||
|
||||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
from .compat import (
|
||||
compat_chr,
|
||||
compat_getenv,
|
||||
compat_html_entities,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
# This is not clearly defined otherwise
|
||||
@@ -304,14 +69,6 @@ def preferredencoding():
|
||||
|
||||
return pref
|
||||
|
||||
if sys.version_info < (3,0):
|
||||
def compat_print(s):
|
||||
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
|
||||
else:
|
||||
def compat_print(s):
|
||||
assert type(s) == type(u'')
|
||||
print(s)
|
||||
|
||||
|
||||
def write_json_file(obj, fn):
|
||||
""" Encode obj as JSON and write it to fn, atomically """
|
||||
@@ -394,127 +151,32 @@ def xpath_text(node, xpath, name=None, fatal=False):
|
||||
return n.text
|
||||
|
||||
|
||||
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
|
||||
class BaseHTMLParser(compat_html_parser.HTMLParser):
|
||||
def __init(self):
|
||||
compat_html_parser.HTMLParser.__init__(self)
|
||||
self.html = None
|
||||
|
||||
def loads(self, html):
|
||||
self.html = html
|
||||
self.feed(html)
|
||||
self.close()
|
||||
|
||||
class AttrParser(BaseHTMLParser):
|
||||
"""Modified HTMLParser that isolates a tag with the specified attribute"""
|
||||
def __init__(self, attribute, value):
|
||||
self.attribute = attribute
|
||||
self.value = value
|
||||
self.result = None
|
||||
self.started = False
|
||||
self.depth = {}
|
||||
self.watch_startpos = False
|
||||
self.error_count = 0
|
||||
BaseHTMLParser.__init__(self)
|
||||
|
||||
def error(self, message):
|
||||
if self.error_count > 10 or self.started:
|
||||
raise compat_html_parser.HTMLParseError(message, self.getpos())
|
||||
self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
|
||||
self.error_count += 1
|
||||
self.goahead(1)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs = dict(attrs)
|
||||
if self.started:
|
||||
self.find_startpos(None)
|
||||
if self.attribute in attrs and attrs[self.attribute] == self.value:
|
||||
self.result = [tag]
|
||||
self.started = True
|
||||
self.watch_startpos = True
|
||||
if self.started:
|
||||
if not tag in self.depth: self.depth[tag] = 0
|
||||
self.depth[tag] += 1
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if self.started:
|
||||
if tag in self.depth: self.depth[tag] -= 1
|
||||
if self.depth[self.result[0]] == 0:
|
||||
self.started = False
|
||||
self.result.append(self.getpos())
|
||||
|
||||
def find_startpos(self, x):
|
||||
"""Needed to put the start position of the result (self.result[1])
|
||||
after the opening tag with the requested id"""
|
||||
if self.watch_startpos:
|
||||
self.watch_startpos = False
|
||||
self.result.append(self.getpos())
|
||||
handle_entityref = handle_charref = handle_data = handle_comment = \
|
||||
handle_decl = handle_pi = unknown_decl = find_startpos
|
||||
|
||||
def get_result(self):
|
||||
if self.result is None:
|
||||
return None
|
||||
if len(self.result) != 3:
|
||||
return None
|
||||
lines = self.html.split('\n')
|
||||
lines = lines[self.result[1][0]-1:self.result[2][0]]
|
||||
lines[0] = lines[0][self.result[1][1]:]
|
||||
if len(lines) == 1:
|
||||
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
|
||||
lines[-1] = lines[-1][:self.result[2][1]]
|
||||
return '\n'.join(lines).strip()
|
||||
# Hack for https://github.com/rg3/youtube-dl/issues/662
|
||||
if sys.version_info < (2, 7, 3):
|
||||
AttrParser.parse_endtag = (lambda self, i:
|
||||
i + len("</scr'+'ipt>")
|
||||
if self.rawdata[i:].startswith("</scr'+'ipt>")
|
||||
else compat_html_parser.HTMLParser.parse_endtag(self, i))
|
||||
|
||||
def get_element_by_id(id, html):
|
||||
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
||||
return get_element_by_attribute("id", id, html)
|
||||
|
||||
|
||||
def get_element_by_attribute(attribute, value, html):
|
||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||
parser = AttrParser(attribute, value)
|
||||
try:
|
||||
parser.loads(html)
|
||||
except compat_html_parser.HTMLParseError:
|
||||
pass
|
||||
return parser.get_result()
|
||||
|
||||
class MetaParser(BaseHTMLParser):
|
||||
"""
|
||||
Modified HTMLParser that isolates a meta tag with the specified name
|
||||
attribute.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
BaseHTMLParser.__init__(self)
|
||||
self.name = name
|
||||
self.content = None
|
||||
self.result = None
|
||||
m = re.search(r'''(?xs)
|
||||
<([a-zA-Z0-9:._-]+)
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
|
||||
\s+%s=['"]?%s['"]?
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
|
||||
\s*>
|
||||
(?P<content>.*?)
|
||||
</\1>
|
||||
''' % (re.escape(attribute), re.escape(value)), html)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag != 'meta':
|
||||
return
|
||||
attrs = dict(attrs)
|
||||
if attrs.get('name') == self.name:
|
||||
self.result = attrs.get('content')
|
||||
if not m:
|
||||
return None
|
||||
res = m.group('content')
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
if res.startswith('"') or res.startswith("'"):
|
||||
res = res[1:-1]
|
||||
|
||||
def get_meta_content(name, html):
|
||||
"""
|
||||
Return the content attribute from the meta tag with the given name attribute.
|
||||
"""
|
||||
parser = MetaParser(name)
|
||||
try:
|
||||
parser.loads(html)
|
||||
except compat_html_parser.HTMLParseError:
|
||||
pass
|
||||
return parser.get_result()
|
||||
return unescapeHTML(res)
|
||||
|
||||
|
||||
def clean_html(html):
|
||||
@@ -1472,6 +1134,25 @@ def check_executable(exe, args=[]):
|
||||
return exe
|
||||
|
||||
|
||||
def get_exe_version(exe, args=['--version'],
|
||||
version_re=r'version\s+([0-9._-a-zA-Z]+)',
|
||||
unrecognized=u'present'):
|
||||
""" Returns the version of the specified executable,
|
||||
or False if the executable is not present """
|
||||
try:
|
||||
out, err = subprocess.Popen(
|
||||
[exe] + args,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
except OSError:
|
||||
return False
|
||||
firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
|
||||
m = re.search(version_re, firstline)
|
||||
if m:
|
||||
return m.group(1)
|
||||
else:
|
||||
return unrecognized
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __len__(self):
|
||||
# This is only useful for tests
|
||||
@@ -1562,7 +1243,7 @@ def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
|
||||
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
@@ -1636,15 +1317,6 @@ def parse_xml(s):
|
||||
return tree
|
||||
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
prompt = prompt.encode(preferredencoding())
|
||||
return getpass.getpass(prompt, *args, **kwargs)
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
|
||||
US_RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
@@ -1702,18 +1374,6 @@ def qualities(quality_ids):
|
||||
|
||||
DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
|
||||
|
||||
try:
|
||||
subprocess_check_output = subprocess.check_output
|
||||
except AttributeError:
|
||||
def subprocess_check_output(*args, **kwargs):
|
||||
assert 'input' not in kwargs
|
||||
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
||||
output, _ = p.communicate()
|
||||
ret = p.poll()
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
||||
|
||||
def limit_length(s, length):
|
||||
""" Add ellipses to overly long strings """
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.11.02'
|
||||
__version__ = '2014.11.12.1'
|
||||
|
Reference in New Issue
Block a user