Compare commits
74 Commits
2015.11.19
...
2015.11.23
Author | SHA1 | Date | |
---|---|---|---|
![]() |
20e98bf6c0 | ||
![]() |
5c2266df4b | ||
![]() |
67dda51722 | ||
![]() |
e4c4bcf36f | ||
![]() |
82d8a8b6e2 | ||
![]() |
13a10d5aa3 | ||
![]() |
9022726446 | ||
![]() |
94bfcd23b7 | ||
![]() |
526b3b0716 | ||
![]() |
61f92af1cf | ||
![]() |
a72778d364 | ||
![]() |
5ae17037a3 | ||
![]() |
02f0da20b0 | ||
![]() |
b41631c4e6 | ||
![]() |
0e49d9a6b0 | ||
![]() |
4a7d108ab3 | ||
![]() |
3cfd000849 | ||
![]() |
1b38185361 | ||
![]() |
9cb9a5df77 | ||
![]() |
5035536e3f | ||
![]() |
3e12bc583a | ||
![]() |
e568c2233e | ||
![]() |
061a75edd6 | ||
![]() |
82c4d7b0ce | ||
![]() |
136dadde95 | ||
![]() |
0c14841585 | ||
![]() |
0eebf34d9d | ||
![]() |
cf186b77a7 | ||
![]() |
a3372437bf | ||
![]() |
4c57b4853d | ||
![]() |
38eb2968ab | ||
![]() |
bea56c9569 | ||
![]() |
7e508ff2cf | ||
![]() |
563772eda4 | ||
![]() |
0533915aad | ||
![]() |
c3a227d1c4 | ||
![]() |
f6c903e708 | ||
![]() |
7dc011c063 | ||
![]() |
4e3b303016 | ||
![]() |
7e1f5447e7 | ||
![]() |
7e3472758b | ||
![]() |
328a22e175 | ||
![]() |
417b453699 | ||
![]() |
6ea7190a3e | ||
![]() |
b54b08c91b | ||
![]() |
c30943b1c0 | ||
![]() |
2abf7cab80 | ||
![]() |
4137196899 | ||
![]() |
019839faaa | ||
![]() |
f52183a878 | ||
![]() |
750b9ff032 | ||
![]() |
28602e747c | ||
![]() |
6cc37c69e2 | ||
![]() |
a5cd0eb8a4 | ||
![]() |
c23e266427 | ||
![]() |
651acffbe5 | ||
![]() |
71bd93b89c | ||
![]() |
6da620de58 | ||
![]() |
bdceea7afd | ||
![]() |
d80a39cec8 | ||
![]() |
5b5fae5f20 | ||
![]() |
01b06aedcf | ||
![]() |
c711383811 | ||
![]() |
17cc153435 | ||
![]() |
67446fd49b | ||
![]() |
325bb615a7 | ||
![]() |
ee5cd8418e | ||
![]() |
342609a1b4 | ||
![]() |
f270cf1a26 | ||
![]() |
371c3b796c | ||
![]() |
6b7ceee1b9 | ||
![]() |
fdb20a27a3 | ||
![]() |
2c94198eb6 | ||
![]() |
741dd8ea65 |
@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatic subtitle file (YouTube
|
||||
only)
|
||||
--write-auto-sub Write automatically generated subtitle file
|
||||
(YouTube only)
|
||||
--all-subs Download all the available subtitles of the
|
||||
video
|
||||
--list-subs List all available subtitles for the video
|
||||
|
@@ -494,6 +494,7 @@
|
||||
- **soompi:show**
|
||||
- **soundcloud**
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:search**: Soundcloud search
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:user**
|
||||
- **soundgasm**
|
||||
@@ -707,6 +708,7 @@
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:user:playlists**: YouTube.com user playlists
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
|
@@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
||||
clean_html,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
<node/>
|
||||
|
@@ -28,6 +28,7 @@ if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
@@ -63,6 +64,7 @@ from .utils import (
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
@@ -156,7 +158,7 @@ class YoutubeDL(object):
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
write_all_thumbnails: Write all thumbnail formats to files
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
writeautomaticsub: Write the automatically generated subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
@@ -833,6 +835,7 @@ class YoutubeDL(object):
|
||||
extra_info=extra)
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
@@ -937,7 +940,7 @@ class YoutubeDL(object):
|
||||
filter_parts.append(string)
|
||||
|
||||
def _remove_unused_ops(tokens):
|
||||
# Remove operators that we don't use and join them with the sourrounding strings
|
||||
# Remove operators that we don't use and join them with the surrounding strings
|
||||
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
@@ -1186,7 +1189,7 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
pr = compat_urllib_request.Request(info_dict['url'])
|
||||
pr = sanitized_Request(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.get_header('Cookie')
|
||||
|
||||
@@ -1870,6 +1873,8 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, compat_basestring):
|
||||
req = sanitized_Request(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@@ -42,7 +42,7 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
(experimenatal)
|
||||
(experimental)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class DashSegmentsFD(FileDownloader):
|
||||
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
|
||||
|
||||
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||
req = compat_urllib_request.Request(target_url)
|
||||
req = sanitized_Request(target_url)
|
||||
if remaining_bytes is not None:
|
||||
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
|
@@ -7,14 +7,12 @@ import time
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
basic_request = sanitized_Request(url, None, headers)
|
||||
request = sanitized_Request(url, None, headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
|
@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# the connection was interrupted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = [
|
||||
'rtmpdump', '--verbose', '-r', url,
|
||||
|
@@ -576,7 +576,8 @@ from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
SoundcloudPlaylistIE,
|
||||
SoundcloudSearchIE
|
||||
)
|
||||
from .soundgasm import (
|
||||
SoundgasmIE,
|
||||
@@ -833,6 +834,7 @@ from .youtube import (
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeUserPlaylistsIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
|
@@ -7,11 +7,11 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'j_password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
response = self._download_webpage(
|
||||
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for fmt in ['windows', 'android_tablet']:
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||
request.add_header('User-Agent', self._USER_AGENT)
|
||||
|
||||
|
@@ -6,13 +6,13 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
|
||||
'pass': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
req = compat_urllib_request.Request(req_url)
|
||||
req = sanitized_Request(req_url)
|
||||
# Without setting this header, we wouldn't get any result
|
||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
||||
data = self._download_json(
|
||||
|
@@ -4,14 +4,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
|
||||
for lang, url in subtitles_urls.items():
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
subtitles[lang] = [{
|
||||
# The extension is 'srt' but it's actually an 'ass' file
|
||||
|
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = self._match_id(url)
|
||||
|
@@ -11,7 +11,6 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
@@ -24,6 +23,7 @@ from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
@@ -250,7 +250,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
req = compat_urllib_request.Request(request_url)
|
||||
req = sanitized_Request(request_url)
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
@@ -443,7 +443,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
|
||||
% (account_id, video_id),
|
||||
headers={'Accept': 'application/json;pk=%s' % policy_key})
|
||||
|
@@ -1,8 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
@@ -48,7 +50,7 @@ class CBSIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
# Android UA is served with higher quality (720p) streams (see
|
||||
# https://github.com/rg3/youtube-dl/issues/7490)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -13,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
|
||||
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
json.dumps(player_options_request))
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
|
@@ -19,7 +19,6 @@ from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_etree_fromstring,
|
||||
@@ -37,6 +36,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
@@ -891,6 +891,11 @@ class InfoExtractor(object):
|
||||
if not media_nodes:
|
||||
manifest_version = '2.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
base_url = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
@@ -898,7 +903,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
manifest_url = (
|
||||
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
|
||||
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
|
||||
# If media_url is itself a f4m manifest do the recursive extraction
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
|
@@ -23,6 +23,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request = sanitized_Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else compat_urllib_request.Request(url_or_request))
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/rg3/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_uploader', fatal=False)
|
||||
|
||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||
playerdata_req = sanitized_Request(playerdata_url)
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
||||
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request(
|
||||
streamdata_req = sanitized_Request(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||
% (stream_id, stream_format, stream_quality),
|
||||
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
|
||||
|
@@ -7,15 +7,13 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _build_request(url):
|
||||
"""Build a request with the family filter disabled"""
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
|
@@ -2,13 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
|
@@ -7,7 +7,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -16,6 +15,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
@@ -5,8 +5,10 @@ import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import qualities
|
||||
from ..utils import (
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
@@ -32,7 +34,7 @@ class DumpertIE(InfoExtractor):
|
||||
protocol = mobj.group('protocol')
|
||||
|
||||
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
|
@@ -3,13 +3,12 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
|
||||
video_id = ims_video['videoID']
|
||||
key = ims_video['hash']
|
||||
|
||||
config_req = compat_urllib_request.Request(
|
||||
config_req = sanitized_Request(
|
||||
'http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
|
||||
config_req.add_header('Referer', url)
|
||||
|
@@ -3,11 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
||||
pllist_req = compat_urllib_request.Request(pllist_url)
|
||||
pllist_req = sanitized_Request(pllist_url)
|
||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
playlist_list = self._download_json(
|
||||
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
|
||||
raise ExtractorError('Playlist id not found')
|
||||
|
||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
||||
pl_req = compat_urllib_request.Request(pl_url)
|
||||
pl_req = sanitized_Request(pl_url)
|
||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
playlist = self._download_json(
|
||||
pl_req, playlist_id, note='Downloading playlist info')
|
||||
|
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -10,11 +10,11 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
|
||||
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
|
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
|
||||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
req = sanitized_Request(webpage_url)
|
||||
req.add_header(
|
||||
'User-Agent',
|
||||
# it needs a more recent version
|
||||
|
@@ -3,12 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
b'Origin': b'http://www.4tube.com',
|
||||
}
|
||||
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
|
||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||
tokens = self._download_json(token_req, video_id)
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(request, display_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||
|
@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
@@ -22,6 +21,7 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
is_html,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -823,6 +823,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Kaltura embed protected with referrer
|
||||
{
|
||||
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
|
||||
'info_dict': {
|
||||
'id': '1_g4fbemnq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violetta - Achter De Schermen - Ruggero',
|
||||
'description': 'Achter de schermen met Ruggero',
|
||||
'timestamp': 1435133761,
|
||||
'upload_date': '20150624',
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -1045,6 +1058,20 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Tabletop: Dread, Last Thoughts',
|
||||
'duration': 51690,
|
||||
},
|
||||
},
|
||||
# JWPlayer with M3U8
|
||||
{
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1188,7 +1215,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
full_response = None
|
||||
if head_response is False:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Accept-Encoding', '*')
|
||||
full_response = self._request_webpage(request, video_id)
|
||||
head_response = full_response
|
||||
@@ -1217,7 +1244,7 @@ class GenericIE(InfoExtractor):
|
||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||
|
||||
if not full_response:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
|
||||
@@ -1694,7 +1721,9 @@ class GenericIE(InfoExtractor):
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
return self.url_result(smuggle_url(
|
||||
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
|
||||
{'source_url': url}), 'Kaltura')
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
@@ -1859,6 +1888,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||
|
||||
@@ -1870,25 +1900,24 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
entry_info_dict = {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
entries.append(entry_info_dict)
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
@@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
urlencode_postdata,
|
||||
urlhandle_detect_ext,
|
||||
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
|
||||
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
|
||||
|
||||
payload = urlencode_postdata({'tracks[]': track_id})
|
||||
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
|
||||
req = sanitized_Request(self._PLAYLIST_URL, payload)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
track = self._download_json(req, track_id, 'Downloading playlist')[0]
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
('mediaType', 's'),
|
||||
('mediaId', video_id),
|
||||
])
|
||||
r = compat_urllib_request.Request(
|
||||
r = sanitized_Request(
|
||||
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
|
||||
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
mkd = self._download_json(
|
||||
|
@@ -4,12 +4,10 @@ import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
request = sanitized_Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(
|
||||
request, track_id, 'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
|
||||
title = track['song']
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
serve_url, '', {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data = self._download_json(request, track_id, 'Downloading metadata')
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https://instagram\.com/p/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
|
@@ -6,12 +6,10 @@ from random import random
|
||||
from math import floor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
req = sanitized_Request(player_url)
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -5,11 +5,9 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
request = sanitized_Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
|
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
|
||||
video_id, actions, note='Downloading video info JSON')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
|
||||
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
|
||||
|
||||
info, source_data = self._get_video_info(entry_id, partner_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f['fileExt'],
|
||||
'tbr': f['bitrate'],
|
||||
'fps': f.get('frameRate'),
|
||||
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||
'container': f.get('containerFormat'),
|
||||
'vcodec': f.get('videoCodecId'),
|
||||
'height': f.get('height'),
|
||||
'width': f.get('width'),
|
||||
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
|
||||
} for f in source_data['flavorAssets']]
|
||||
source_url = smuggled_data.get('source_url')
|
||||
if source_url:
|
||||
referrer = base64.b64encode(
|
||||
'://'.join(compat_urlparse.urlparse(source_url)[:2])
|
||||
.encode('utf-8')).decode('utf-8')
|
||||
else:
|
||||
referrer = None
|
||||
|
||||
formats = []
|
||||
for f in source_data['flavorAssets']:
|
||||
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
|
||||
if referrer:
|
||||
video_url += '?referrer=%s' % referrer
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f.get('fileExt'),
|
||||
'tbr': int_or_none(f['bitrate']),
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||
'container': f.get('containerFormat'),
|
||||
'vcodec': f.get('videoCodecId'),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'url': video_url,
|
||||
})
|
||||
self._check_formats(formats, entry_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': entry_id,
|
||||
'title': info['name'],
|
||||
'formats': formats,
|
||||
'description': info.get('description'),
|
||||
'description': clean_html(info.get('description')),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
|
@@ -4,10 +4,8 @@ import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class KeezMoviesIE(InfoExtractor):
|
||||
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -8,13 +8,13 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
encode_data_uri,
|
||||
)
|
||||
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.letv.com'
|
||||
}
|
||||
play_json_req = compat_urllib_request.Request(
|
||||
play_json_req = sanitized_Request(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
|
@@ -7,12 +7,12 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'remember': 'false',
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None,
|
||||
|
@@ -7,12 +7,12 @@ from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
|
@@ -2,14 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
|
||||
('fileId', video_id),
|
||||
('__RequestVerificationToken', token),
|
||||
]
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://minhateca.com.br/action/License/Download',
|
||||
data=compat_urllib_parse.urlencode(token_data))
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||
video_id)
|
||||
|
||||
vid_config_request = compat_urllib_request.Request(
|
||||
vid_config_request = sanitized_Request(
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||
headers=http_headers)
|
||||
|
||||
|
@@ -5,13 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
|
||||
]
|
||||
r_json = json.dumps(r)
|
||||
post = compat_urllib_parse.urlencode({'r': r_json})
|
||||
req = compat_urllib_request.Request(self._API_URL, post)
|
||||
req = sanitized_Request(self._API_URL, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_json(req, video_id)
|
||||
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -5,13 +5,11 @@ import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
|
||||
orig_webpage, 'builtin URL', default=None, group='url')
|
||||
|
||||
if builtin_url:
|
||||
req = compat_urllib_request.Request(builtin_url)
|
||||
req = sanitized_Request(builtin_url)
|
||||
req.add_header('Referer', url)
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
|
||||
title = self._og_search_title(orig_webpage).strip()
|
||||
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
req = sanitized_Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
|
@@ -3,12 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
|
||||
'hash': hash_key,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
|
@@ -2,9 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
# it doesn't work if it thinks the browser it's too old
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -13,6 +12,7 @@ from ..utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_mobile_video_formats(self, mtvn_id):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
req = sanitized_Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('User-Agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
|
@@ -11,10 +11,10 @@ from ..compat import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
|
@@ -8,11 +8,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
return int(round(ms / 1000.0))
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req.add_header('Referer', self._API_BASE)
|
||||
return self._download_json(req, video_id, note)
|
||||
|
||||
|
@@ -1,10 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request = sanitized_Request(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
|
||||
|
@@ -8,7 +8,6 @@ import datetime
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
determine_ext,
|
||||
)
|
||||
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||
login_results = self._download_webpage(
|
||||
request, None, note='Logging in', errnote='Unable to log in')
|
||||
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'k': thumb_play_key,
|
||||
'v': video_id
|
||||
})
|
||||
flv_info_request = compat_urllib_request.Request(
|
||||
flv_info_request = sanitized_Request(
|
||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
flv_info_webpage = self._download_webpage(
|
||||
|
@@ -9,7 +9,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
|
||||
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
@@ -4,11 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
|
||||
'op': 'download1',
|
||||
'method_free': 'Continue to Video',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
req = sanitized_Request(url, urlencode_postdata(fields))
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading download page')
|
||||
|
@@ -3,14 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
encode_dict,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -65,7 +63,7 @@ class NovaMovIE(InfoExtractor):
|
||||
'post url', default=url, group='url')
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(url, post_url)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(encode_dict(fields)))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('Referer', post_url)
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
|
||||
|
||||
def _api_request(self, url, request_path):
|
||||
display_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://api.nowness.com/api/' + request_path % display_id,
|
||||
headers={
|
||||
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
|
||||
|
@@ -3,11 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
|
||||
formats = []
|
||||
|
||||
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://m.nuvid.com/play/%s' % video_id)
|
||||
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||
webpage = self._download_webpage(
|
||||
|
@@ -2,9 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
)
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'https://www.patreon.com/processLogin',
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
)
|
||||
|
@@ -5,12 +5,10 @@ import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
req = sanitized_Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
|
@@ -1,29 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
import collections
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class PluralsightIE(InfoExtractor):
|
||||
class PluralsightBaseIE(InfoExtractor):
|
||||
_API_BASE = 'http://app.pluralsight.com'
|
||||
|
||||
|
||||
class PluralsightIE(PluralsightBaseIE):
|
||||
IE_NAME = 'pluralsight'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
|
||||
_LOGIN_URL = 'https://www.pluralsight.com/id/'
|
||||
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
|
||||
_LOGIN_URL = 'https://app.pluralsight.com/id/'
|
||||
|
||||
_NETRC_MACHINE = 'pluralsight'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
|
||||
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
|
||||
'info_dict': {
|
||||
@@ -33,7 +39,14 @@ class PluralsightIE(InfoExtractor):
|
||||
'duration': 338,
|
||||
},
|
||||
'skip': 'Requires pluralsight account credentials',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# available without pluralsight account
|
||||
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -41,7 +54,7 @@ class PluralsightIE(InfoExtractor):
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('Pluralsight account is required')
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
@@ -60,7 +73,7 @@ class PluralsightIE(InfoExtractor):
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -73,30 +86,47 @@ class PluralsightIE(InfoExtractor):
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
author = mobj.group('author')
|
||||
name = mobj.group('name')
|
||||
clip_id = mobj.group('clip')
|
||||
course = mobj.group('course')
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
|
||||
author = qs.get('author', [None])[0]
|
||||
name = qs.get('name', [None])[0]
|
||||
clip_id = qs.get('clip', [None])[0]
|
||||
course = qs.get('course', [None])[0]
|
||||
|
||||
if any(not f for f in (author, name, clip_id, course,)):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
display_id = '%s-%s' % (name, clip_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
collection = self._parse_json(
|
||||
self._search_regex(
|
||||
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
|
||||
webpage, 'modules'),
|
||||
display_id)
|
||||
modules = self._search_regex(
|
||||
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
|
||||
webpage, 'modules', default=None)
|
||||
|
||||
if modules:
|
||||
collection = self._parse_json(modules, display_id)
|
||||
else:
|
||||
# Webpage may be served in different layout (see
|
||||
# https://github.com/rg3/youtube-dl/issues/7607)
|
||||
collection = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
|
||||
display_id)['course']['modules']
|
||||
|
||||
module, clip = None, None
|
||||
|
||||
for module_ in collection:
|
||||
if module_.get('moduleName') == name:
|
||||
if name in (module_.get('moduleName'), module_.get('name')):
|
||||
module = module_
|
||||
for clip_ in module_.get('clips', []):
|
||||
clip_index = clip_.get('clipIndex')
|
||||
if clip_index is None:
|
||||
clip_index = clip_.get('index')
|
||||
if clip_index is None:
|
||||
continue
|
||||
if compat_str(clip_index) == clip_id:
|
||||
@@ -112,13 +142,33 @@ class PluralsightIE(InfoExtractor):
|
||||
'high': {'width': 1024, 'height': 768},
|
||||
}
|
||||
|
||||
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
|
||||
|
||||
ALLOWED_QUALITIES = (
|
||||
('webm', ('high',)),
|
||||
('mp4', ('low', 'medium', 'high',)),
|
||||
AllowedQuality('webm', ('high',)),
|
||||
AllowedQuality('mp4', ('low', 'medium', 'high',)),
|
||||
)
|
||||
|
||||
# In order to minimize the number of calls to ViewClip API and reduce
|
||||
# the probability of being throttled or banned by Pluralsight we will request
|
||||
# only single format until formats listing was explicitly requested.
|
||||
if self._downloader.params.get('listformats', False):
|
||||
allowed_qualities = ALLOWED_QUALITIES
|
||||
else:
|
||||
def guess_allowed_qualities():
|
||||
req_format = self._downloader.params.get('format') or 'best'
|
||||
req_format_split = req_format.split('-')
|
||||
if len(req_format_split) > 1:
|
||||
req_ext, req_quality = req_format_split
|
||||
for allowed_quality in ALLOWED_QUALITIES:
|
||||
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
|
||||
return (AllowedQuality(req_ext, (req_quality, )), )
|
||||
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
|
||||
return (AllowedQuality(req_ext, ('high', )), )
|
||||
allowed_qualities = guess_allowed_qualities()
|
||||
|
||||
formats = []
|
||||
for ext, qualities in ALLOWED_QUALITIES:
|
||||
for ext, qualities in allowed_qualities:
|
||||
for quality in qualities:
|
||||
f = QUALITIES[quality].copy()
|
||||
clip_post = {
|
||||
@@ -131,13 +181,24 @@ class PluralsightIE(InfoExtractor):
|
||||
'mt': ext,
|
||||
'q': '%dx%d' % (f['width'], f['height']),
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
'http://www.pluralsight.com/training/Player/ViewClip',
|
||||
request = sanitized_Request(
|
||||
'%s/training/Player/ViewClip' % self._API_BASE,
|
||||
json.dumps(clip_post).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/json;charset=utf-8')
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
clip_url = self._download_webpage(
|
||||
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
|
||||
|
||||
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
||||
# to return 429 HTTP errors after some time (see
|
||||
# https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
|
||||
# to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
|
||||
# To somewhat reduce the probability of these consequences
|
||||
# we will sleep random amount of time before each call to ViewClip.
|
||||
self._sleep(
|
||||
random.randint(2, 5), display_id,
|
||||
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
|
||||
|
||||
if not clip_url:
|
||||
continue
|
||||
f.update({
|
||||
@@ -163,10 +224,10 @@ class PluralsightIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PluralsightCourseIE(InfoExtractor):
|
||||
class PluralsightCourseIE(PluralsightBaseIE):
|
||||
IE_NAME = 'pluralsight:course'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
|
||||
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
|
||||
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
|
||||
@@ -176,7 +237,14 @@ class PluralsightCourseIE(InfoExtractor):
|
||||
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
|
||||
},
|
||||
'playlist_count': 31,
|
||||
}
|
||||
}, {
|
||||
# available without pluralsight account
|
||||
'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
@@ -184,14 +252,14 @@ class PluralsightCourseIE(InfoExtractor):
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/%s' % course_id,
|
||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
title = course['title']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/content/%s' % course_id,
|
||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
@@ -201,7 +269,7 @@ class PluralsightCourseIE(InfoExtractor):
|
||||
if not player_parameters:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://www.pluralsight.com/training/player?%s' % player_parameters,
|
||||
'%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'Pluralsight'))
|
||||
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
|
@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+) porn HD.+?</title>', webpage, 'title')
|
||||
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
|
||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
|
@@ -8,10 +8,10 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import (
|
||||
@@ -53,7 +53,7 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
@@ -3,11 +3,9 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -46,7 +44,7 @@ class PornotubeIE(InfoExtractor):
|
||||
'authenticationSpaceKey': originAuthenticationSpaceKey,
|
||||
'credentials': 'Clip Application',
|
||||
}
|
||||
token_req = compat_urllib_request.Request(
|
||||
token_req = sanitized_Request(
|
||||
'https://api.aebn.net/auth/v1/token/primal',
|
||||
data=json.dumps(token_req_data).encode('utf-8'))
|
||||
token_req.add_header('Content-Type', 'application/json')
|
||||
@@ -56,7 +54,7 @@ class PornotubeIE(InfoExtractor):
|
||||
token = token_answer['tokenKey']
|
||||
|
||||
# Get video URL
|
||||
delivery_req = compat_urllib_request.Request(
|
||||
delivery_req = sanitized_Request(
|
||||
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
|
||||
delivery_req.add_header('Authorization', token)
|
||||
delivery_info = self._download_json(
|
||||
@@ -64,7 +62,7 @@ class PornotubeIE(InfoExtractor):
|
||||
video_url = delivery_info['mediaUrl']
|
||||
|
||||
# Get additional info (title etc.)
|
||||
info_req = compat_urllib_request.Request(
|
||||
info_req = sanitized_Request(
|
||||
'https://api.aebn.net/content/v1/clips/%s?expand='
|
||||
'title,description,primaryImageNumber,startSecond,endSecond,'
|
||||
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class PrimeShareTVIE(InfoExtractor):
|
||||
@@ -41,7 +41,7 @@ class PrimeShareTVIE(InfoExtractor):
|
||||
webpage, 'wait time', default=7)) + 1
|
||||
self._sleep(wait_time, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
url, compat_urllib_parse.urlencode(fields), headers)
|
||||
video_page = self._download_webpage(
|
||||
req, video_id, 'Downloading video page')
|
||||
|
@@ -4,13 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,7 +35,7 @@ class PromptFileIE(InfoExtractor):
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req = sanitized_Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, 'Downloading video page')
|
||||
|
@@ -7,11 +7,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
clean_html,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
@@ -201,7 +201,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
singer_desc = None
|
||||
|
||||
if singer_id:
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
|
||||
req.add_header(
|
||||
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
|
||||
|
@@ -6,11 +6,11 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
struct_unpack,
|
||||
)
|
||||
@@ -102,7 +102,7 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png_request = compat_urllib_request.Request(png_url)
|
||||
png_request = sanitized_Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
@@ -9,7 +9,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -51,10 +51,25 @@ class RutubeIE(InfoExtractor):
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading options JSON')
|
||||
|
||||
m3u8_url = options['video_balancer'].get('m3u8')
|
||||
if m3u8_url is None:
|
||||
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
formats = []
|
||||
for format_id, format_url in options['video_balancer'].items():
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
@@ -74,9 +89,9 @@ class RutubeIE(InfoExtractor):
|
||||
class RutubeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:embed'
|
||||
IE_DESC = 'Rutube embedded videos'
|
||||
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
|
||||
_VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'info_dict': {
|
||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||
@@ -90,7 +105,10 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'Requires ffmpeg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/8083783',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
embed_id = self._match_id(url)
|
||||
|
@@ -6,12 +6,10 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
)
|
||||
@@ -58,7 +56,7 @@ class SafariBaseIE(InfoExtractor):
|
||||
'next': '',
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
@@ -6,14 +6,12 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -37,7 +35,7 @@ class SandiaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -46,7 +44,7 @@ class SharedIE(InfoExtractor):
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = self._hidden_inputs(webpage)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
url, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
|
@@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,7 +48,7 @@ class ShareSixIE(InfoExtractor):
|
||||
'method_free': 'Free'
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req = sanitized_Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
|
@@ -4,10 +4,8 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class SinaIE(InfoExtractor):
|
||||
@@ -61,7 +59,7 @@ class SinaIE(InfoExtractor):
|
||||
if mobj.group('token') is not None:
|
||||
# The video id is in the redirected url
|
||||
self.to_screen('Getting video id')
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.get_method = lambda: 'HEAD'
|
||||
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
|
||||
return self._real_extract(urlh.geturl())
|
||||
|
@@ -7,13 +7,11 @@ import hashlib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -176,7 +174,7 @@ class SmotriIE(InfoExtractor):
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -339,7 +337,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(
|
||||
|
@@ -6,11 +6,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ class SohuIE(InfoExtractor):
|
||||
else:
|
||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
|
||||
req = compat_urllib_request.Request(base_data_url + vid_id)
|
||||
req = sanitized_Request(base_data_url + vid_id)
|
||||
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
|
@@ -4,13 +4,17 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
SearchInfoExtractor
|
||||
)
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
'description': data.get('description'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
|
||||
IE_NAME = 'soundcloud:search'
|
||||
IE_DESC = 'Soundcloud search'
|
||||
_MAX_RESULTS = float('inf')
|
||||
_TESTS = [{
|
||||
'url': 'scsearch15:post-avant jazzcore',
|
||||
'info_dict': {
|
||||
'title': 'post-avant jazzcore',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}]
|
||||
|
||||
_SEARCH_KEY = 'scsearch'
|
||||
_MAX_RESULTS_PER_PAGE = 200
|
||||
_DEFAULT_RESULTS_PER_PAGE = 50
|
||||
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||
|
||||
def _get_collection(self, endpoint, collection_id, **query):
|
||||
limit = min(
|
||||
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
|
||||
self._MAX_RESULTS_PER_PAGE)
|
||||
query['limit'] = limit
|
||||
query['client_id'] = self._CLIENT_ID
|
||||
query['linked_partitioning'] = '1'
|
||||
query['offset'] = 0
|
||||
data = compat_urllib_parse.urlencode(encode_dict(query))
|
||||
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
|
||||
|
||||
collected_results = 0
|
||||
|
||||
for i in itertools.count(1):
|
||||
response = self._download_json(
|
||||
next_url, collection_id, 'Downloading page {0}'.format(i),
|
||||
'Unable to download API page')
|
||||
|
||||
collection = response.get('collection', [])
|
||||
if not collection:
|
||||
break
|
||||
|
||||
collection = list(filter(bool, collection))
|
||||
collected_results += len(collection)
|
||||
|
||||
for item in collection:
|
||||
yield self.url_result(item['uri'], SoundcloudIE.ie_key())
|
||||
|
||||
if not collection or collected_results >= limit:
|
||||
break
|
||||
|
||||
next_url = response.get('next_href')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
|
||||
return self.playlist_result(tracks, playlist_title=query)
|
||||
|
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -51,7 +51,7 @@ class SpankwireIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
|
||||
req = sanitized_Request('http://www.' + mobj.group('url'))
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -4,11 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -54,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
|
||||
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||
sport_id, video_id)
|
||||
req = compat_urllib_request.Request(api_url, headers={
|
||||
req = sanitized_Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
'Referer': url,
|
||||
})
|
||||
|
@@ -4,10 +4,8 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
@@ -43,7 +41,7 @@ class StreamcloudIE(InfoExtractor):
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
req = sanitized_Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
@@ -5,11 +5,9 @@ import hashlib
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -54,7 +52,7 @@ class StreamCZIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
api_path = '/episode/%s' % video_id
|
||||
|
||||
req = compat_urllib_request.Request(self._API_URL + api_path)
|
||||
req = sanitized_Request(self._API_URL + api_path)
|
||||
req.add_header('Api-Password', _get_api_key(api_path))
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
|
@@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,7 +51,7 @@ class TapelyIE(InfoExtractor):
|
||||
display_id = mobj.group('id')
|
||||
|
||||
playlist_url = self._API_URL.format(display_id)
|
||||
request = compat_urllib_request.Request(playlist_url)
|
||||
request = sanitized_Request(playlist_url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
request.add_header('Accept', 'application/json')
|
||||
request.add_header('Referer', url)
|
||||
|
@@ -187,8 +187,12 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
# Seems there's no pattern for the interested script filename, so
|
||||
# I try one by one
|
||||
for script in reversed(scripts):
|
||||
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
|
||||
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
|
||||
feed_script = self._download_webpage(
|
||||
self._proto_relative_url(script, 'http:'),
|
||||
video_id, 'Downloading feed script')
|
||||
feed_id = self._search_regex(
|
||||
r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
|
||||
'default feed id', default=None)
|
||||
if feed_id is not None:
|
||||
break
|
||||
if feed_id is None:
|
||||
|
@@ -4,12 +4,10 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
@@ -42,7 +40,7 @@ class Tube8IE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
|
@@ -5,13 +5,11 @@ import codecs
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -44,7 +42,7 @@ class TubiTvIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, payload)
|
||||
request = sanitized_Request(self._LOGIN_URL, payload)
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_page = self._download_webpage(
|
||||
request, None, False, 'Wrong login info')
|
||||
|
@@ -11,7 +11,6 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -20,6 +19,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
for cookie in self._downloader.cookiejar:
|
||||
if cookie.name == 'api_token':
|
||||
headers['Twitch-Api-Token'] = cookie.value
|
||||
request = compat_urllib_request.Request(url, headers=headers)
|
||||
request = sanitized_Request(url, headers=headers)
|
||||
response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
@@ -80,7 +80,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(redirect_url, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
|
||||
request.add_header('Referer', redirect_url)
|
||||
response = self._download_webpage(
|
||||
|
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
remove_end,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ class TwitterCardIE(InfoExtractor):
|
||||
config = None
|
||||
formats = []
|
||||
for user_agent in USER_AGENTS:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
|
@@ -9,6 +9,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -58,7 +59,7 @@ class UdemyIE(InfoExtractor):
|
||||
for header, value in headers.items():
|
||||
url_or_request.add_header(header, value)
|
||||
else:
|
||||
url_or_request = compat_urllib_request.Request(url_or_request, headers=headers)
|
||||
url_or_request = sanitized_Request(url_or_request, headers=headers)
|
||||
|
||||
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
|
||||
self._handle_error(response)
|
||||
@@ -89,7 +90,7 @@ class UdemyIE(InfoExtractor):
|
||||
'password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._ORIGIN_URL)
|
||||
request.add_header('Origin', self._ORIGIN_URL)
|
||||
|
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ class Vbox7IE(InfoExtractor):
|
||||
|
||||
info_url = "http://vbox7.com/play/magare.do"
|
||||
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||
info_request = compat_urllib_request.Request(info_url, data)
|
||||
info_request = sanitized_Request(info_url, data)
|
||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
|
||||
if info_response is None:
|
||||
|
@@ -4,12 +4,10 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -110,7 +108,7 @@ class VeohIE(InfoExtractor):
|
||||
if 'class="adultwarning-container"' in webpage:
|
||||
self.report_age_confirmation()
|
||||
age_limit = 18
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Cookie', 'confirmedAdult=true')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
|
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ class VesselIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def make_json_request(url, data):
|
||||
payload = json.dumps(data).encode('utf-8')
|
||||
req = compat_urllib_request.Request(url, payload)
|
||||
req = sanitized_Request(url, payload)
|
||||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||||
return req
|
||||
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -73,7 +71,7 @@ class VevoIE(InfoExtractor):
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
|
@@ -4,9 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urllib_request
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -65,7 +63,7 @@ class ViddlerIE(InfoExtractor):
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = compat_urllib_request.Request(json_url, None, headers)
|
||||
request = sanitized_Request(json_url, None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
|
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
@@ -30,7 +30,7 @@ class VideoMegaIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
|
||||
req = compat_urllib_request.Request(iframe_url)
|
||||
req = sanitized_Request(iframe_url)
|
||||
req.add_header('Referer', url)
|
||||
req.add_header('Cookie', 'noadvtday=0')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
@@ -13,6 +12,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
@@ -76,7 +76,7 @@ class ViewsterIE(InfoExtractor):
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
request.add_header('Auth-token', self._AUTH_TOKEN)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
|
||||
|
@@ -7,14 +7,14 @@ import hmac
|
||||
import hashlib
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VikiBaseIE(InfoExtractor):
|
||||
@@ -43,7 +43,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
hashlib.sha1
|
||||
).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (query, sig)
|
||||
return compat_urllib_request.Request(
|
||||
return sanitized_Request(
|
||||
url, json.dumps(post_data).encode('utf-8')) if post_data else url
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
|
@@ -8,7 +8,6 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
unified_strdate,
|
||||
@@ -47,7 +47,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
}))
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request = sanitized_Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
@@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'note': 'Video not completely processed, "failed" seed status',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -218,7 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request = sanitized_Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
@@ -232,7 +236,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
data = urlencode_postdata(encode_dict({'password': password}))
|
||||
pass_url = url + '/check-password'
|
||||
password_request = compat_urllib_request.Request(pass_url, data)
|
||||
password_request = sanitized_Request(pass_url, data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
return self._download_json(
|
||||
password_request, video_id,
|
||||
@@ -261,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
request = sanitized_Request(url, None, headers)
|
||||
try:
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
except ExtractorError as ee:
|
||||
@@ -477,7 +481,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
password_path = self._search_regex(
|
||||
r'action="([^"]+)"', login_form, 'password URL')
|
||||
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||
password_request = compat_urllib_request.Request(password_url, post)
|
||||
password_request = sanitized_Request(password_url, post)
|
||||
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._set_vimeo_cookie('xsrft', token)
|
||||
@@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
password_request, list_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
|
||||
def _extract_videos(self, list_id, base_url):
|
||||
video_ids = []
|
||||
def _title_and_entries(self, list_id, base_url):
|
||||
for pagenum in itertools.count(1):
|
||||
page_url = self._page_url(base_url, pagenum)
|
||||
webpage = self._download_webpage(
|
||||
@@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
|
||||
if pagenum == 1:
|
||||
webpage = self._login_list_password(page_url, list_id, webpage)
|
||||
yield self._extract_list_title(webpage)
|
||||
|
||||
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
|
||||
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
return {'_type': 'playlist',
|
||||
'id': list_id,
|
||||
'title': self._extract_list_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
def _extract_videos(self, list_id, base_url):
|
||||
title_and_entries = self._title_and_entries(list_id, base_url)
|
||||
list_title = next(title_and_entries)
|
||||
return self.playlist_result(title_and_entries, list_id, list_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
IE_NAME = 'vimeo:group'
|
||||
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
|
||||
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/groups/rolexawards',
|
||||
'info_dict': {
|
||||
@@ -637,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
url = '%s/page:%d/' % (base_url, pagenum)
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
# Set the header to get a partial html page with the ids,
|
||||
# the normal page doesn't contain them.
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user