Compare commits

...

74 Commits

Author SHA1 Message Date
Philipp Hagemeister
20e98bf6c0 release 2015.11.23 2015-11-23 18:07:58 +01:00
Sergey M?
5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00
Sergey M․
67dda51722 Rename compat_urllib_request_Request to sanitized_Request and move to utils 2015-11-23 21:55:15 +06:00
Sergey M․
e4c4bcf36f [vimeo] Use compat_urllib_request_Request 2015-11-23 21:55:14 +06:00
Sergey M․
82d8a8b6e2 [YoutubeDL] Wrap plain-text URL requests in compat_urllib_request_Request 2015-11-23 21:55:13 +06:00
Sergey M․
13a10d5aa3 [compat] Add compat_urllib_request_Request
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified.
The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to
redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request
preprocessing that may be added in future.
2015-11-23 21:55:12 +06:00
Sergey M․
9022726446 [youtube] Fix test 2015-11-23 21:37:21 +06:00
Sergey M․
94bfcd23b7 [youtube] Fix test 2015-11-23 21:35:23 +06:00
Sergey M․
526b3b0716 [youtube] Clarify ytplayer.config extraction rationale 2015-11-23 21:14:03 +06:00
Sergey M․
61f92af1cf [youtube] Add test with '};' in tags 2015-11-23 21:02:37 +06:00
Sergey M․
a72778d364 [youtube] Improve ytplayer.config extraction 2015-11-23 21:00:06 +06:00
Sergey M
5ae17037a3 Merge pull request #7599 from lalinsky/fix-youtube
[youtube] More explicit player config JSON extraction (fixes #7468)
2015-11-23 20:52:23 +06:00
Sergey M․
02f0da20b0 [pluralsight] Add support for alternative webpage layout (Closes #7607) 2015-11-23 03:08:38 +06:00
Lukáš Lalinský
b41631c4e6 [youtube] Send the list of patterns directly to _search_regex 2015-11-22 13:53:26 +01:00
Lukáš Lalinský
0e49d9a6b0 [youtube] Fall back to the original regex for ytplayer.config 2015-11-22 13:49:33 +01:00
Sergey M․
4a7d108ab3 [rutube] Remove unnecessary print 2015-11-22 18:24:17 +06:00
Lukáš Lalinský
3cfd000849 [youtube] More explicit player config JSON extraction (fixes #7468) 2015-11-22 13:14:35 +01:00
Sergey M․
1b38185361 [pornhd] Fix title extraction (Closes #7596) 2015-11-22 18:08:30 +06:00
Sergey M․
9cb9a5df77 [utils] Check ext with trailing slash against the list of known extensions 2015-11-22 17:27:13 +06:00
Sergey M․
5035536e3f [test_utils] Add tests for determine_ext 2015-11-22 06:33:52 +06:00
Sergey M․
3e12bc583a [utils] Improve determine_ext (Closes #7593) 2015-11-22 06:29:39 +06:00
Sergey M․
e568c2233e [youtube] Add test for multi page list of playlists 2015-11-22 05:03:23 +06:00
Sergey M․
061a75edd6 [youtube] Extract base for entry list extractors and support multi page lists of playlists 2015-11-22 05:01:01 +06:00
Philipp Hagemeister
82c4d7b0ce release 2015.11.21 2015-11-21 23:36:27 +01:00
Sergey M․
136dadde95 [youtube:show] Rework in terms of playlists base extractor 2015-11-22 04:18:20 +06:00
Sergey M․
0c14841585 [youtube:user:playlists] Add extractor (Closes #3817) 2015-11-22 04:17:07 +06:00
Sergey M․
0eebf34d9d [pluralsight] Rephrase 2015-11-22 00:58:25 +06:00
Sergey M․
cf186b77a7 [pluralsight] Clarify allowed qualities guessing rationale 2015-11-22 00:56:40 +06:00
Sergey M․
a3372437bf [soundcloud] Remove unused variable 2015-11-22 00:49:58 +06:00
Sergey M․
4c57b4853d [pluralsight] Until listing formats request only single format 2015-11-22 00:42:58 +06:00
Sergey M․
38eb2968ab [pluralsight] Clarify and randomize ViewClip sleep interval 2015-11-22 00:07:09 +06:00
Andrzej Lichnerowicz
bea56c9569 [pluralsight] prevent error 429 when sensing video formats 2015-11-21 23:49:58 +06:00
Sergey M․
7e508ff2cf [pluralsight] Improve login detection 2015-11-21 21:49:37 +06:00
Sergey M․
563772eda4 [pluralsight] Extract base class 2015-11-21 21:37:29 +06:00
Sergey M․
0533915aad [pluralsight] Update some more URLs 2015-11-21 21:35:08 +06:00
Sergey M․
c3a227d1c4 [pluralsight] Update _LOGIN_URL 2015-11-21 21:25:48 +06:00
Sergey M․
f6c903e708 [soundcloud:search] Simplify (Closes #7213) 2015-11-21 21:21:21 +06:00
Sergey M․
7dc011c063 [soundcloud:search] Remove no track results message 2015-11-21 21:00:42 +06:00
Sergey M․
4e3b303016 [soundcloud:search] Fix non-ASCII searches 2015-11-21 20:55:48 +06:00
Sergey M․
7e1f5447e7 [utils] Improve encode_dict 2015-11-21 20:46:33 +06:00
Sergey M․
7e3472758b [soundcloud:search] PEP 8 2015-11-21 20:04:35 +06:00
reiv
328a22e175 [soundcloud] Remove limit on search results 2015-11-21 19:41:36 +06:00
reiv
417b453699 [soundcloud] Use correct error message conventions 2015-11-21 19:41:31 +06:00
reiv
6ea7190a3e Rewrite as list comprehension. 2015-11-21 19:41:26 +06:00
reiv
b54b08c91b Simplify with itertools.islice(). 2015-11-21 19:41:19 +06:00
reiv
c30943b1c0 Fix some compatibility issues, cleanup. 2015-11-21 19:41:15 +06:00
reiv
2abf7cab80 [soundcloud] Add Soundcloud search extractor 2015-11-21 19:41:08 +06:00
Sergey M․
4137196899 [rutube] Extract all formats 2015-11-21 18:02:52 +06:00
Sergey M․
019839faaa [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction 2015-11-21 18:01:39 +06:00
Sergey M․
f52183a878 [rutube:embed] Extend _VALID_URL (Closes #7588) 2015-11-21 17:39:24 +06:00
Yen Chi Hsuan
750b9ff032 [generic] Extract M3U8 formats (closes #7582) 2015-11-21 16:43:01 +08:00
Yen Chi Hsuan
28602e747c [generic] Refactor 2015-11-21 16:08:54 +08:00
Yen Chi Hsuan
6cc37c69e2 [generic] Unescape URLs from JWPlayer (#7582) 2015-11-21 14:12:34 +08:00
Sergey M․
a5cd0eb8a4 [pluralsight:course] Improve _VALID_URL 2015-11-21 08:32:48 +06:00
Sergey M․
c23e266427 [pluralsight] Do not require pluralsight account
Looks like some courses are available without pluralsight account
2015-11-21 08:25:52 +06:00
Sergey M․
651acffbe5 [pluralsight] Update ViewClip URL 2015-11-21 08:21:33 +06:00
Sergey M․
71bd93b89c [pluralsight] Do not rely on argument order in query (Closes #7583) 2015-11-21 08:08:34 +06:00
Sergey M․
6da620de58 [kaltura] Add test for referrer protected video (#7409) 2015-11-21 01:40:28 +06:00
Sergey M․
bdceea7afd [kaltura] Clean description 2015-11-21 01:39:29 +06:00
Sergey M․
d80a39cec8 [kaltura] Improve 2015-11-21 01:38:08 +06:00
Sergey M․
5b5fae5f20 [generic] Use referrer from source kaltura embed URLs (#7409) 2015-11-21 01:35:58 +06:00
Sergey M․
01b06aedcf [kaltura] Add support for referrer protected videos (#7409) 2015-11-21 01:34:02 +06:00
Sergey M
c711383811 Merge pull request #7579 from ashutosh-mishra/typo_fix
Typo fix, found while going through the code.
2015-11-20 23:24:54 +06:00
ashutosh-mishra
17cc153435 Typo fix, found while going through the code. 2015-11-20 22:51:46 +05:30
Sergey M․
67446fd49b [instagram] Improve _VALID_URL (Closes #7568) 2015-11-20 04:07:39 +06:00
Sergey M․
325bb615a7 [theplatform] Style 2015-11-19 22:58:43 +06:00
Sergey M․
ee5cd8418e [theplatform] Handle protocolless feed URLs (Closes #7532) 2015-11-19 22:58:29 +06:00
Sergey M․
342609a1b4 [bloomberg] Reax _VALID_URL (Closes #7546) 2015-11-19 22:55:06 +06:00
Sergey M
f270cf1a26 Merge pull request #7519 from barlik/master
Clarify that automatic subtitles are generated.
2015-11-19 22:44:08 +06:00
hedii
371c3b796c [YoutubeDL] Add playlist finished downloading message (Closes #7517)
Conflicts:
	youtube_dl/YoutubeDL.py
2015-11-19 22:39:02 +06:00
Sergey M․
6b7ceee1b9 [vimeo] Add test for #7552 2015-11-19 22:31:16 +06:00
Sergey M․
fdb20a27a3 [vimeo:group] Improve _VALID_URL (Closes #7552) 2015-11-19 22:30:58 +06:00
Sergey M․
2c94198eb6 [vimeo] Improve playlists extraction 2015-11-19 21:29:32 +06:00
Rastislav Barlik
741dd8ea65 Clarify that automatic subtitles are generated.
It wasn't clear what automatic word mean.
2015-11-16 14:15:25 +00:00
114 changed files with 726 additions and 493 deletions

View File

@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
## Subtitle Options: ## Subtitle Options:
--write-sub Write subtitle file --write-sub Write subtitle file
--write-auto-sub Write automatic subtitle file (YouTube --write-auto-sub Write automatically generated subtitle file
only) (YouTube only)
--all-subs Download all the available subtitles of the --all-subs Download all the available subtitles of the
video video
--list-subs List all available subtitles for the video --list-subs List all available subtitles for the video

View File

@@ -494,6 +494,7 @@
- **soompi:show** - **soompi:show**
- **soundcloud** - **soundcloud**
- **soundcloud:playlist** - **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:user** - **soundcloud:user**
- **soundgasm** - **soundgasm**
@@ -707,6 +708,7 @@
- **youtube:show**: YouTube.com (multi-season) shows - **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks** - **Zapiks**
- **ZDF** - **ZDF**

View File

@@ -21,6 +21,7 @@ from youtube_dl.utils import (
clean_html, clean_html,
DateRange, DateRange,
detect_exe_version, detect_exe_version,
determine_ext,
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
def test_find_xpath_attr(self): def test_find_xpath_attr(self):
testxml = '''<root> testxml = '''<root>
<node/> <node/>

View File

@@ -28,6 +28,7 @@ if os.name == 'nt':
import ctypes import ctypes
from .compat import ( from .compat import (
compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_get_terminal_size, compat_get_terminal_size,
@@ -63,6 +64,7 @@ from .utils import (
SameFileError, SameFileError,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitized_Request,
std_headers, std_headers,
subtitles_filename, subtitles_filename,
UnavailableVideoError, UnavailableVideoError,
@@ -156,7 +158,7 @@ class YoutubeDL(object):
writethumbnail: Write the thumbnail image to a file writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub) (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
@@ -833,6 +835,7 @@ class YoutubeDL(object):
extra_info=extra) extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result return ie_result
elif result_type == 'compat_list': elif result_type == 'compat_list':
self.report_warning( self.report_warning(
@@ -937,7 +940,7 @@ class YoutubeDL(object):
filter_parts.append(string) filter_parts.append(string)
def _remove_unused_ops(tokens): def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings # Remove operators that we don't use and join them with the surrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')') ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None last_string, last_start, last_end, last_line = None, None, None, None
@@ -1186,7 +1189,7 @@ class YoutubeDL(object):
return res return res
def _calc_cookies(self, info_dict): def _calc_cookies(self, info_dict):
pr = compat_urllib_request.Request(info_dict['url']) pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr) self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie') return pr.get_header('Cookie')
@@ -1870,6 +1873,8 @@ class YoutubeDL(object):
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout) return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self): def print_debug_header(self):

View File

@@ -42,7 +42,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal) (experimental)
external_downloader_args: A list of additional command-line arguments for the external_downloader_args: A list of additional command-line arguments for the
external downloader. external downloader.

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_urllib_request from ..utils import sanitized_Request
class DashSegmentsFD(FileDownloader): class DashSegmentsFD(FileDownloader):
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url) req = sanitized_Request(target_url)
if remaining_bytes is not None: if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))

View File

@@ -7,14 +7,12 @@ import time
import re import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import compat_urllib_error
compat_urllib_request,
compat_urllib_error,
)
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
sanitized_Request,
) )
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers') add_headers = info_dict.get('http_headers')
if add_headers: if add_headers:
headers.update(add_headers) headers.update(add_headers)
basic_request = compat_urllib_request.Request(url, None, headers) basic_request = sanitized_Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers) request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False) is_test = self.params.get('test', False)

View File

@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
return False return False
# Download using rtmpdump. rtmpdump returns exit code 2 when # Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be # the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [ basic_args = [
'rtmpdump', '--verbose', '-r', url, 'rtmpdump', '--verbose', '-r', url,

View File

@@ -576,7 +576,8 @@ from .soundcloud import (
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE SoundcloudPlaylistIE,
SoundcloudSearchIE
) )
from .soundgasm import ( from .soundgasm import (
SoundgasmIE, SoundgasmIE,
@@ -833,6 +834,7 @@ from .youtube import (
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@@ -7,11 +7,11 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
xpath_text, xpath_text,
ExtractorError, ExtractorError,
) )
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
'j_password': password, 'j_password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage( response = self._download_webpage(
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
formats = [] formats = []
for fmt in ['windows', 'android_tablet']: for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request( request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT) request.add_header('User-Agent', self._USER_AGENT)

View File

@@ -6,13 +6,13 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
'pass': password, 'pass': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL) request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage( response = self._download_webpage(
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
'&sort=created&access_mode=0%2C1%2C2&limit={count}' '&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}' '&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id) ).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url) req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result # Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json( data = self._download_json(

View File

@@ -4,14 +4,12 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
for lang, url in subtitles_urls.items(): for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles # For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA # when we request with a common UA
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl') req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{ subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file # The extension is 'srt' but it's actually an 'ass' file

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
_TEST = { _TESTS = [{
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes # The md5 checksum changes
'info_dict': { 'info_dict': {
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761', 'description': 'md5:a8ba0302912d03d246979735c17d2761',
}, },
} }, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
name = self._match_id(url) name = self._match_id(url)

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
) )
@@ -24,6 +23,7 @@ from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
) )
@@ -250,7 +250,7 @@ class BrightcoveLegacyIE(InfoExtractor):
def _get_video_info(self, video_id, query_str, query, referer=None): def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url) req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL') linkBase = query.get('linkBaseURL')
if linkBase is not None: if linkBase is not None:
referer = linkBase[0] referer = linkBase[0]
@@ -443,7 +443,7 @@ class BrightcoveNewIE(InfoExtractor):
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
req = compat_urllib_request.Request( req = sanitized_Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id), % (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key}) headers={'Accept': 'application/json;pk=%s' % policy_key})

View File

@@ -1,8 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request from ..utils import (
from ..utils import smuggle_url sanitized_Request,
smuggle_url,
)
class CBSIE(InfoExtractor): class CBSIE(InfoExtractor):
@@ -48,7 +50,7 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
# Android UA is served with higher quality (720p) streams (see # Android UA is served with higher quality (720p) streams (see
# https://github.com/rg3/youtube-dl/issues/7490) # https://github.com/rg3/youtube-dl/issues/7490)
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)') request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
@@ -13,6 +12,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani', 'requestSource': 'iVysilani',
} }
req = compat_urllib_request.Request( req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data)) data=compat_urllib_parse.urlencode(data))
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region': if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url) req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage) playlist_title = self._og_search_title(webpage)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
} }
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions', 'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request)) json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json') request.add_header('Content-Type', 'application/json')

View File

@@ -19,7 +19,6 @@ from ..compat import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_str, compat_str,
compat_etree_fromstring, compat_etree_fromstring,
@@ -37,6 +36,7 @@ from ..utils import (
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitize_filename, sanitize_filename,
sanitized_Request,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
url_basename, url_basename,
@@ -891,6 +891,11 @@ class InfoExtractor(object):
if not media_nodes: if not media_nodes:
manifest_version = '2.0' manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
for i, media_el in enumerate(media_nodes): for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0': if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url') media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@ -898,7 +903,7 @@ class InfoExtractor(object):
continue continue
manifest_url = ( manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://') media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url)) else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction # If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest # since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested # may differ leading to inability to resolve the format by requested
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
def _get_cookies(self, url): def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """ """ Return a compat_cookies.SimpleCookie with the cookies for the url """
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req) self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie')) return compat_cookies.SimpleCookie(req.get_header('Cookie'))

View File

@@ -23,6 +23,7 @@ from ..utils import (
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
remove_end, remove_end,
sanitized_Request,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'name': username, 'name': username,
'password': password, 'password': password,
}) })
login_request = compat_urllib_request.Request(login_url, data) login_request = sanitized_Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info') self._download_webpage(login_request, None, False, 'Wrong login info')
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request)) else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues # Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797. # similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_uploader', fatal=False) 'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url) playerdata_req = sanitized_Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt] stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p' video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request( streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality), % (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

View File

@@ -7,15 +7,13 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_str,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
) )
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _build_request(url): def _build_request(url):
"""Build a request with the family filter disabled""" """Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Cookie', 'family_filter=off; ff=off') request.add_header('Cookie', 'family_filter=off; ff=off')
return request return request

View File

@@ -2,13 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
request = compat_urllib_request.Request( request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'}) headers={'Origin': 'http://www.dcndigital.ae'})

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -16,6 +15,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)

View File

@@ -5,8 +5,10 @@ import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request from ..utils import (
from ..utils import qualities qualities,
sanitized_Request,
)
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
@@ -32,7 +34,7 @@ class DumpertIE(InfoExtractor):
protocol = mobj.group('protocol') protocol = mobj.group('protocol')
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id) url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10') req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL') hls_url = media.get('HLS_SURL')
if hls_url: if hls_url:
request = compat_urllib_request.Request( request = sanitized_Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url}) headers={'Referer': url})
token_data = self._download_json( token_data = self._download_json(

View File

@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
clean_html, clean_html,
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID'] video_id = ims_video['videoID']
key = ims_video['hash'] key = ims_video['hash']
config_req = compat_urllib_request.Request( config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/' 'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key)) 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url) config_req.add_header('Referer', url)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
pllist_req = compat_urllib_request.Request(pllist_url) pllist_req = sanitized_Request(pllist_url)
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist_list = self._download_json( playlist_list = self._download_json(
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
raise ExtractorError('Playlist id not found') raise ExtractorError('Playlist id not found')
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
pl_req = compat_urllib_request.Request(pl_url) pl_req = sanitized_Request(pl_url)
pl_req.add_header('X-Requested-With', 'XMLHttpRequest') pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist = self._download_json( playlist = self._download_json(
pl_req, playlist_id, note='Downloading playlist info') pl_req, playlist_id, note='Downloading playlist info')

View File

@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
str_to_int, str_to_int,
) )
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -10,11 +10,11 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
limit_length, limit_length,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
get_element_by_id, get_element_by_id,
clean_html, clean_html,
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
if useremail is None: if useremail is None:
return return
login_page_req = compat_urllib_request.Request(self._LOGIN_URL) login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US') login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None, login_page = self._download_webpage(login_page_req, None,
note='Downloading login page', note='Downloading login page',
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
'timezone': '-60', 'timezone': '-60',
'trynum': '1', 'trynum': '1',
} }
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try: try:
login_results = self._download_webpage(request, None, login_results = self._download_webpage(request, None,
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
} }
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None, check_response = self._download_webpage(check_req, None,
note='Confirming login') note='Confirming login')

View File

@@ -12,6 +12,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
encode_dict, encode_dict,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
} }
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request( request = sanitized_Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
return False return False
# this is also needed # this is also needed
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage( self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed') login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
sanitized_Request,
) )
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id') video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = compat_urllib_request.Request(webpage_url) req = sanitized_Request(webpage_url)
req.add_header( req.add_header(
'User-Agent', 'User-Agent',
# it needs a more recent version # it needs a more recent version

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
sanitized_Request,
str_to_int, str_to_int,
) )
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com', b'Origin': b'http://www.4tube.com',
} }
token_req = compat_urllib_request.Request(token_url, b'{}', headers) token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id) tokens = self._download_json(token_req, video_id)
formats = [{ formats = [{
'url': tokens[format]['token'], 'url': tokens[format]['token'],

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
remove_end, remove_end,
HEADRequest, HEADRequest,
sanitized_Request,
) )
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in') self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')

View File

@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
) )
@@ -22,6 +21,7 @@ from ..utils import (
HEADRequest, HEADRequest,
is_html, is_html,
orderedSet, orderedSet,
sanitized_Request,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@@ -823,6 +823,19 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
}, },
}, },
# Kaltura embed protected with referrer
{
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
'info_dict': {
'id': '1_g4fbemnq',
'ext': 'mp4',
'title': 'Violetta - Achter De Schermen - Ruggero',
'description': 'Achter de schermen met Ruggero',
'timestamp': 1435133761,
'upload_date': '20150624',
'uploader_id': 'echojecka',
},
},
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1045,6 +1058,20 @@ class GenericIE(InfoExtractor):
'description': 'Tabletop: Dread, Last Thoughts', 'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690, 'duration': 51690,
}, },
},
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
} }
] ]
@@ -1188,7 +1215,7 @@ class GenericIE(InfoExtractor):
full_response = None full_response = None
if head_response is False: if head_response is False:
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Accept-Encoding', '*') request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
head_response = full_response head_response = full_response
@@ -1217,7 +1244,7 @@ class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response: if not full_response:
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to # making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1694,7 +1721,9 @@ class GenericIE(InfoExtractor):
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None: if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds # Look for Eagle.Platform embeds
mobj = re.search( mobj = re.search(
@@ -1859,6 +1888,7 @@ class GenericIE(InfoExtractor):
entries = [] entries = []
for video_url in found: for video_url in found:
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url) video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1870,25 +1900,24 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you: # here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0] video_id = os.path.splitext(video_id)[0]
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
}
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'smil': if ext == 'smil':
entries.append({ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
'id': video_id,
'formats': self._extract_smil_formats(video_url, video_id),
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
elif ext == 'xspf': elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else: else:
entries.append({ entry_info_dict['url'] = video_url
'id': video_id,
'url': video_url, entries.append(entry_info_dict)
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
if len(entries) == 1: if len(entries) == 1:
return entries[0] return entries[0]

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
HEADRequest, HEADRequest,
sanitized_Request,
str_to_int, str_to_int,
urlencode_postdata, urlencode_postdata,
urlhandle_detect_ext, urlhandle_detect_ext,
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID') r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id}) payload = urlencode_postdata({'tracks[]': track_id})
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload) req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0] track = self._download_json(req, track_id, 'Downloading playlist')[0]

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
sanitized_Request,
) )
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'), ('mediaType', 's'),
('mediaId', video_id), ('mediaId', video_id),
]) ])
r = compat_urllib_request.Request( r = sanitized_Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata) 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded') r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json( mkd = self._download_json(

View File

@@ -4,12 +4,10 @@ import json
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
data = {'ax': 1, 'ts': time.time()} data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data) data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url) request = sanitized_Request(complete_url)
response, urlh = self._download_webpage_handle( response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url') request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '') cookie = urlh.headers.get('Set-Cookie', '')
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
title = track['song'] title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key) serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request( request = sanitized_Request(
serve_url, '', {'Content-Type': 'application/json'}) serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie) request.add_header('cookie', cookie)
song_data = self._download_json(request, track_id, 'Downloading metadata') song_data = self._download_json(request, track_id, 'Downloading metadata')

View File

@@ -10,7 +10,7 @@ from ..utils import (
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/p/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516', 'md5': '0d2da106a9d2631273e192b372806516',

View File

@@ -6,12 +6,10 @@ from random import random
from math import floor from math import floor
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
remove_end, remove_end,
sanitized_Request,
) )
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
(floor(random() * 1073741824), floor(random() * 1073741824)) (floor(random() * 1073741824), floor(random() * 1073741824))
) )
req = compat_urllib_request.Request(player_url) req = sanitized_Request(player_url)
req.add_header('Referer', url) req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id) playerpage = self._download_webpage(req, video_id)

View File

@@ -5,11 +5,9 @@ import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
] ]
} }
request = compat_urllib_request.Request(api_url, json.dumps(data)) request = sanitized_Request(api_url, json.dumps(data))
video_json_page = self._download_webpage( video_json_page = self._download_webpage(
request, video_id, 'Downloading video JSON') request, video_id, 'Downloading video JSON')

View File

@@ -2,12 +2,18 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url,
) )
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON') video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5') partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5') entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id) info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{ source_url = smuggled_data.get('source_url')
'format_id': '%(fileExt)s-%(bitrate)s' % f, if source_url:
'ext': f['fileExt'], referrer = base64.b64encode(
'tbr': f['bitrate'], '://'.join(compat_urlparse.urlparse(source_url)[:2])
'fps': f.get('frameRate'), .encode('utf-8')).decode('utf-8')
'filesize_approx': int_or_none(f.get('size'), invscale=1024), else:
'container': f.get('containerFormat'), referrer = None
'vcodec': f.get('videoCodecId'),
'height': f.get('height'), formats = []
'width': f.get('width'), for f in source_data['flavorAssets']:
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']), video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
} for f in source_data['flavorAssets']] if referrer:
video_url += '?referrer=%s' % referrer
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': int_or_none(f.get('height')),
'width': int_or_none(f.get('width')),
'url': video_url,
})
self._check_formats(formats, entry_id)
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': entry_id, 'id': entry_id,
'title': info['name'], 'title': info['name'],
'formats': formats, 'formats': formats,
'description': info.get('description'), 'description': clean_html(info.get('description')),
'thumbnail': info.get('thumbnailUrl'), 'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'), 'duration': info.get('duration'),
'timestamp': info.get('createdAt'), 'timestamp': info.get('createdAt'),

View File

@@ -4,10 +4,8 @@ import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_urlparse
compat_urllib_parse_urlparse, from ..utils import sanitized_Request
compat_urllib_request,
)
class KeezMoviesIE(InfoExtractor): class KeezMoviesIE(InfoExtractor):
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -8,13 +8,13 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_ord, compat_ord,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
parse_iso8601, parse_iso8601,
sanitized_Request,
int_or_none, int_or_none,
encode_data_uri, encode_data_uri,
) )
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())), 'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com' 'domain': 'www.letv.com'
} }
play_json_req = compat_urllib_request.Request( play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
) )
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')

View File

@@ -7,12 +7,12 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false', 'remember': 'false',
'stayPut': 'false' 'stayPut': 'false'
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false', 'remember': 'false',
'stayPut': 'false', 'stayPut': 'false',
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, request, None,

View File

@@ -7,12 +7,12 @@ from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0', 'filters': '0',
'submit': "Continue - I'm over 18", 'submit': "Continue - I'm over 18",
} }
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation() self.report_age_confirmation()
self._download_webpage(request, None, False, 'Unable to confirm age') self._download_webpage(request, None, False, 'Unable to confirm age')
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
# AnyClip videos require the flashversion cookie so that we get the link # AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file # to the mp4 file

View File

@@ -2,14 +2,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
sanitized_Request,
) )
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
('fileId', video_id), ('fileId', video_id),
('__RequestVerificationToken', token), ('__RequestVerificationToken', token),
] ]
req = compat_urllib_request.Request( req = sanitized_Request(
'http://minhateca.com.br/action/License/Download', 'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data)) data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded') req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import random import random
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
int_or_none, int_or_none,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id) video_id)
vid_config_request = compat_urllib_request.Request( vid_config_request = sanitized_Request(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
headers=http_headers) headers=http_headers)

View File

@@ -5,13 +5,11 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
] ]
r_json = json.dumps(r) r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json}) post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post) req = sanitized_Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id) response = self._download_json(req, video_id)

View File

@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import sanitized_Request
class MofosexIE(InfoExtractor): class MofosexIE(InfoExtractor):
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url') url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -5,13 +5,11 @@ import os.path
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
remove_start, remove_start,
sanitized_Request,
) )
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
orig_webpage, 'builtin URL', default=None, group='url') orig_webpage, 'builtin URL', default=None, group='url')
if builtin_url: if builtin_url:
req = compat_urllib_request.Request(builtin_url) req = sanitized_Request(builtin_url)
req.add_header('Referer', url) req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id, 'Downloading builtin page') webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
title = self._og_search_title(orig_webpage).strip() title = self._og_search_title(orig_webpage).strip()
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
'hash': hash_key, 'hash': hash_key,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..utils import sanitized_Request
compat_urllib_request,
)
class MovieClipsIE(InfoExtractor): class MovieClipsIE(InfoExtractor):
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
# it doesn't work if it thinks the browser it's too old # it doesn't work if it thinks the browser it's too old
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
HEADRequest, HEADRequest,
sanitized_Request,
unescapeHTML, unescapeHTML,
url_basename, url_basename,
RegexNotFoundError, RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_mobile_video_formats(self, mtvn_id): def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url) req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript # Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7') req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id, webpage = self._download_webpage(req, mtvn_id,

View File

@@ -11,10 +11,10 @@ from ..compat import (
compat_ord, compat_ord,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None: if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id, response = self._download_webpage(request, video_id,
'Downloading video info') 'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8')) info = json.loads(base64.b64decode(response).decode('utf-8'))

View File

@@ -8,11 +8,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
compat_str, compat_str,
compat_itertools_count, compat_itertools_count,
) )
from ..utils import sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
return int(round(ms / 1000.0)) return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note): def query_api(self, endpoint, video_id, note):
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint)) req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
req.add_header('Referer', self._API_BASE) req.add_header('Referer', self._API_BASE)
return self._download_json(req, video_id, note) return self._download_json(req, video_id, note)

View File

@@ -1,10 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request, from ..utils import sanitized_Request
compat_urllib_parse,
)
class NFBIE(InfoExtractor): class NFBIE(InfoExtractor):
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False) page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, request = sanitized_Request(
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) 'https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@@ -8,7 +8,6 @@ import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
sanitized_Request,
xpath_text, xpath_text,
determine_ext, determine_ext,
) )
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
'password': password, 'password': password,
} }
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request( request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data) 'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage( login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in') request, None, note='Logging in', errnote='Unable to log in')
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
'k': thumb_play_key, 'k': thumb_play_key,
'v': video_id 'v': video_id
}) })
flv_info_request = compat_urllib_request.Request( flv_info_request = sanitized_Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data, 'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'}) {'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage( flv_info_webpage = self._download_webpage(

View File

@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
'username': username, 'username': username,
'password': password, 'password': password,
} }
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username) login = self._download_json(request, None, 'Logging in as %s' % username)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
'op': 'download1', 'op': 'download1',
'method_free': 'Continue to Video', 'method_free': 'Continue to Video',
} }
req = compat_urllib_request.Request(url, urlencode_postdata(fields)) req = sanitized_Request(url, urlencode_postdata(fields))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id, webpage = self._download_webpage(req, video_id,
'Downloading download page') 'Downloading download page')

View File

@@ -3,14 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
NO_DEFAULT, NO_DEFAULT,
encode_dict, encode_dict,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
) )
@@ -65,7 +63,7 @@ class NovaMovIE(InfoExtractor):
'post url', default=url, group='url') 'post url', default=url, group='url')
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url) post_url = compat_urlparse.urljoin(url, post_url)
request = compat_urllib_request.Request( request = sanitized_Request(
post_url, urlencode_postdata(encode_dict(fields))) post_url, urlencode_postdata(encode_dict(fields)))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url) request.add_header('Referer', post_url)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
from .brightcove import BrightcoveLegacyIE from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..compat import compat_str
from ..compat import ( from ..utils import (
compat_str, ExtractorError,
compat_urllib_request, sanitized_Request,
) )
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
def _api_request(self, url, request_path): def _api_request(self, url, request_path):
display_id = self._match_id(url) display_id = self._match_id(url)
request = compat_urllib_request.Request( request = sanitized_Request(
'http://api.nowness.com/api/' + request_path % display_id, 'http://api.nowness.com/api/' + request_path % display_id,
headers={ headers={
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us', 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
formats = [] formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request( request = sanitized_Request(
'http://m.nuvid.com/play/%s' % video_id) 'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage( webpage = self._download_webpage(

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import js_to_json
js_to_json,
)
class PatreonIE(InfoExtractor): class PatreonIE(InfoExtractor):
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'https://www.patreon.com/processLogin', 'https://www.patreon.com/processLogin',
compat_urllib_parse.urlencode(login_form).encode('utf-8') compat_urllib_parse.urlencode(login_form).encode('utf-8')
) )

View File

@@ -5,12 +5,10 @@ import re
import os.path import os.path
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -1,29 +1,35 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
import random
import collections
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
sanitized_Request,
) )
class PluralsightIE(InfoExtractor): class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'http://app.pluralsight.com'
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight' IE_NAME = 'pluralsight'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
_LOGIN_URL = 'https://www.pluralsight.com/id/' _LOGIN_URL = 'https://app.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight' _NETRC_MACHINE = 'pluralsight'
_TEST = { _TESTS = [{
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
'md5': '4d458cf5cf4c593788672419a8dd4cf8', 'md5': '4d458cf5cf4c593788672419a8dd4cf8',
'info_dict': { 'info_dict': {
@@ -33,7 +39,14 @@ class PluralsightIE(InfoExtractor):
'duration': 338, 'duration': 338,
}, },
'skip': 'Requires pluralsight account credentials', 'skip': 'Requires pluralsight account credentials',
} }, {
'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
'only_matching': True,
}, {
# available without pluralsight account
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
'only_matching': True,
}]
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@@ -41,7 +54,7 @@ class PluralsightIE(InfoExtractor):
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
self.raise_login_required('Pluralsight account is required') return
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
@@ -60,7 +73,7 @@ class PluralsightIE(InfoExtractor):
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request( request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -73,30 +86,47 @@ class PluralsightIE(InfoExtractor):
if error: if error:
raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to login: %s' % error, expected=True)
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
author = mobj.group('author')
name = mobj.group('name') author = qs.get('author', [None])[0]
clip_id = mobj.group('clip') name = qs.get('name', [None])[0]
course = mobj.group('course') clip_id = qs.get('clip', [None])[0]
course = qs.get('course', [None])[0]
if any(not f for f in (author, name, clip_id, course,)):
raise ExtractorError('Invalid URL', expected=True)
display_id = '%s-%s' % (name, clip_id) display_id = '%s-%s' % (name, clip_id)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
collection = self._parse_json( modules = self._search_regex(
self._search_regex( r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)', webpage, 'modules', default=None)
webpage, 'modules'),
display_id) if modules:
collection = self._parse_json(modules, display_id)
else:
# Webpage may be served in different layout (see
# https://github.com/rg3/youtube-dl/issues/7607)
collection = self._parse_json(
self._search_regex(
r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
display_id)['course']['modules']
module, clip = None, None module, clip = None, None
for module_ in collection: for module_ in collection:
if module_.get('moduleName') == name: if name in (module_.get('moduleName'), module_.get('name')):
module = module_ module = module_
for clip_ in module_.get('clips', []): for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex') clip_index = clip_.get('clipIndex')
if clip_index is None:
clip_index = clip_.get('index')
if clip_index is None: if clip_index is None:
continue continue
if compat_str(clip_index) == clip_id: if compat_str(clip_index) == clip_id:
@@ -112,13 +142,33 @@ class PluralsightIE(InfoExtractor):
'high': {'width': 1024, 'height': 768}, 'high': {'width': 1024, 'height': 768},
} }
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = ( ALLOWED_QUALITIES = (
('webm', ('high',)), AllowedQuality('webm', ('high',)),
('mp4', ('low', 'medium', 'high',)), AllowedQuality('mp4', ('low', 'medium', 'high',)),
) )
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
if self._downloader.params.get('listformats', False):
allowed_qualities = ALLOWED_QUALITIES
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
req_format_split = req_format.split('-')
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, ('high', )), )
allowed_qualities = guess_allowed_qualities()
formats = [] formats = []
for ext, qualities in ALLOWED_QUALITIES: for ext, qualities in allowed_qualities:
for quality in qualities: for quality in qualities:
f = QUALITIES[quality].copy() f = QUALITIES[quality].copy()
clip_post = { clip_post = {
@@ -131,13 +181,24 @@ class PluralsightIE(InfoExtractor):
'mt': ext, 'mt': ext,
'q': '%dx%d' % (f['width'], f['height']), 'q': '%dx%d' % (f['width'], f['height']),
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://www.pluralsight.com/training/Player/ViewClip', '%s/training/Player/ViewClip' % self._API_BASE,
json.dumps(clip_post).encode('utf-8')) json.dumps(clip_post).encode('utf-8'))
request.add_header('Content-Type', 'application/json;charset=utf-8') request.add_header('Content-Type', 'application/json;charset=utf-8')
format_id = '%s-%s' % (ext, quality) format_id = '%s-%s' % (ext, quality)
clip_url = self._download_webpage( clip_url = self._download_webpage(
request, display_id, 'Downloading %s URL' % format_id, fatal=False) request, display_id, 'Downloading %s URL' % format_id, fatal=False)
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
# https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
# to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
random.randint(2, 5), display_id,
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
if not clip_url: if not clip_url:
continue continue
f.update({ f.update({
@@ -163,10 +224,10 @@ class PluralsightIE(InfoExtractor):
} }
class PluralsightCourseIE(InfoExtractor): class PluralsightCourseIE(PluralsightBaseIE):
IE_NAME = 'pluralsight:course' IE_NAME = 'pluralsight:course'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
_TEST = { _TESTS = [{
# Free course from Pluralsight Starter Subscription for Microsoft TechNet # Free course from Pluralsight Starter Subscription for Microsoft TechNet
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
@@ -176,7 +237,14 @@ class PluralsightCourseIE(InfoExtractor):
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
}, },
'playlist_count': 31, 'playlist_count': 31,
} }, {
# available without pluralsight account
'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
'only_matching': True,
}, {
'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
course_id = self._match_id(url) course_id = self._match_id(url)
@@ -184,14 +252,14 @@ class PluralsightCourseIE(InfoExtractor):
# TODO: PSM cookie # TODO: PSM cookie
course = self._download_json( course = self._download_json(
'http://www.pluralsight.com/data/course/%s' % course_id, '%s/data/course/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course JSON') course_id, 'Downloading course JSON')
title = course['title'] title = course['title']
description = course.get('description') or course.get('shortDescription') description = course.get('description') or course.get('shortDescription')
course_data = self._download_json( course_data = self._download_json(
'http://www.pluralsight.com/data/course/content/%s' % course_id, '%s/data/course/content/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course data JSON') course_id, 'Downloading course data JSON')
entries = [] entries = []
@@ -201,7 +269,7 @@ class PluralsightCourseIE(InfoExtractor):
if not player_parameters: if not player_parameters:
continue continue
entries.append(self.url_result( entries.append(self.url_result(
'http://www.pluralsight.com/training/player?%s' % player_parameters, '%s/training/player?%s' % (self._API_BASE, player_parameters),
'Pluralsight')) 'Pluralsight'))
return self.playlist_result(entries, course_id, title, description) return self.playlist_result(entries, course_id, title, description)

View File

@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
webpage = self._download_webpage(url, display_id or video_id) webpage = self._download_webpage(url, display_id or video_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>(.+) porn HD.+?</title>', webpage, 'title') [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(

View File

@@ -8,10 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
str_to_int, str_to_int,
) )
from ..aes import ( from ..aes import (
@@ -53,7 +53,7 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request( req = sanitized_Request(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class PornotubeIE(InfoExtractor):
'authenticationSpaceKey': originAuthenticationSpaceKey, 'authenticationSpaceKey': originAuthenticationSpaceKey,
'credentials': 'Clip Application', 'credentials': 'Clip Application',
} }
token_req = compat_urllib_request.Request( token_req = sanitized_Request(
'https://api.aebn.net/auth/v1/token/primal', 'https://api.aebn.net/auth/v1/token/primal',
data=json.dumps(token_req_data).encode('utf-8')) data=json.dumps(token_req_data).encode('utf-8'))
token_req.add_header('Content-Type', 'application/json') token_req.add_header('Content-Type', 'application/json')
@@ -56,7 +54,7 @@ class PornotubeIE(InfoExtractor):
token = token_answer['tokenKey'] token = token_answer['tokenKey']
# Get video URL # Get video URL
delivery_req = compat_urllib_request.Request( delivery_req = sanitized_Request(
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id) 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
delivery_req.add_header('Authorization', token) delivery_req.add_header('Authorization', token)
delivery_info = self._download_json( delivery_info = self._download_json(
@@ -64,7 +62,7 @@ class PornotubeIE(InfoExtractor):
video_url = delivery_info['mediaUrl'] video_url = delivery_info['mediaUrl']
# Get additional info (title etc.) # Get additional info (title etc.)
info_req = compat_urllib_request.Request( info_req = sanitized_Request(
'https://api.aebn.net/content/v1/clips/%s?expand=' 'https://api.aebn.net/content/v1/clips/%s?expand='
'title,description,primaryImageNumber,startSecond,endSecond,' 'title,description,primaryImageNumber,startSecond,endSecond,'
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,' 'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'

View File

@@ -1,11 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse, from ..utils import (
compat_urllib_request, ExtractorError,
sanitized_Request,
) )
from ..utils import ExtractorError
class PrimeShareTVIE(InfoExtractor): class PrimeShareTVIE(InfoExtractor):
@@ -41,7 +41,7 @@ class PrimeShareTVIE(InfoExtractor):
webpage, 'wait time', default=7)) + 1 webpage, 'wait time', default=7)) + 1
self._sleep(wait_time, video_id) self._sleep(wait_time, video_id)
req = compat_urllib_request.Request( req = sanitized_Request(
url, compat_urllib_parse.urlencode(fields), headers) url, compat_urllib_parse.urlencode(fields), headers)
video_page = self._download_webpage( video_page = self._download_webpage(
req, video_id, 'Downloading video page') req, video_id, 'Downloading video page')

View File

@@ -4,13 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -37,7 +35,7 @@ class PromptFileIE(InfoExtractor):
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, 'Downloading video page') req, video_id, 'Downloading video page')

View File

@@ -7,11 +7,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
sanitized_Request,
strip_jsonp, strip_jsonp,
unescapeHTML, unescapeHTML,
clean_html, clean_html,
) )
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor): class QQMusicIE(InfoExtractor):
@@ -201,7 +201,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
singer_desc = None singer_desc = None
if singer_id: if singer_id:
req = compat_urllib_request.Request( req = sanitized_Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id) 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header( req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')

View File

@@ -6,11 +6,11 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
remove_end, remove_end,
sanitized_Request,
std_headers, std_headers,
struct_unpack, struct_unpack,
) )
@@ -102,7 +102,7 @@ class RTVEALaCartaIE(InfoExtractor):
if info['state'] == 'DESPU': if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True) raise ExtractorError('The video is no longer available', expected=True)
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = compat_urllib_request.Request(png_url) png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url) png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information') png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png) video_url = _decrypt_url(png)

View File

@@ -9,7 +9,7 @@ from ..compat import (
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, determine_ext,
unified_strdate, unified_strdate,
) )
@@ -51,10 +51,25 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/play/options/%s/?format=json' % video_id, 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON') video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8') formats = []
if m3u8_url is None: for format_id, format_url in options['video_balancer'].items():
raise ExtractorError('Couldn\'t find m3u8 manifest url') ext = determine_ext(format_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return { return {
'id': video['id'], 'id': video['id'],
@@ -74,9 +89,9 @@ class RutubeIE(InfoExtractor):
class RutubeEmbedIE(InfoExtractor): class RutubeEmbedIE(InfoExtractor):
IE_NAME = 'rutube:embed' IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos' IE_DESC = 'Rutube embedded videos'
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' _VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
@@ -90,7 +105,10 @@ class RutubeEmbedIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'Requires ffmpeg', 'skip_download': 'Requires ffmpeg',
}, },
} }, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
embed_id = self._match_id(url) embed_id = self._match_id(url)

View File

@@ -6,12 +6,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE from .brightcove import BrightcoveLegacyIE
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
smuggle_url, smuggle_url,
std_headers, std_headers,
) )
@@ -58,7 +56,7 @@ class SafariBaseIE(InfoExtractor):
'next': '', 'next': '',
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)

View File

@@ -6,14 +6,12 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -37,7 +35,7 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class SharedIE(InfoExtractor):
'Video %s does not exist' % video_id, expected=True) 'Video %s does not exist' % video_id, expected=True)
download_form = self._hidden_inputs(webpage) download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request( request = sanitized_Request(
url, compat_urllib_parse.urlencode(download_form)) url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
sanitized_Request,
) )
@@ -50,7 +48,7 @@ class ShareSixIE(InfoExtractor):
'method_free': 'Free' 'method_free': 'Free'
} }
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id, webpage = self._download_webpage(req, video_id,

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request, from ..utils import sanitized_Request
compat_urllib_parse,
)
class SinaIE(InfoExtractor): class SinaIE(InfoExtractor):
@@ -61,7 +59,7 @@ class SinaIE(InfoExtractor):
if mobj.group('token') is not None: if mobj.group('token') is not None:
# The video id is in the redirected url # The video id is in the redirected url
self.to_screen('Getting video id') self.to_screen('Getting video id')
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.get_method = lambda: 'HEAD' request.get_method = lambda: 'HEAD'
(_, urlh) = self._download_webpage_handle(request, 'NA', False) (_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl()) return self._real_extract(urlh.geturl())

View File

@@ -7,13 +7,11 @@ import hashlib
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -176,7 +174,7 @@ class SmotriIE(InfoExtractor):
if video_password: if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
request = compat_urllib_request.Request( request = sanitized_Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form)) 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -339,7 +337,7 @@ class SmotriBroadcastIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage( broadcast_page = self._download_webpage(

View File

@@ -6,11 +6,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -96,7 +96,7 @@ class SohuIE(InfoExtractor):
else: else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = compat_urllib_request.Request(base_data_url + vid_id) req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy: if cn_verification_proxy:

View File

@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re import re
import itertools import itertools
from .common import InfoExtractor from .common import (
InfoExtractor,
SearchInfoExtractor
)
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urlparse, compat_urlparse,
compat_urllib_parse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
encode_dict,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unified_strdate, unified_strdate,
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
'description': data.get('description'), 'description': data.get('description'),
'entries': entries, 'entries': entries,
} }
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
_MAX_RESULTS = float('inf')
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
'title': 'post-avant jazzcore',
},
'playlist_count': 15,
}]
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit
query['client_id'] = self._CLIENT_ID
query['linked_partitioning'] = '1'
query['offset'] = 0
data = compat_urllib_parse.urlencode(encode_dict(query))
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
collected_results = 0
for i in itertools.count(1):
response = self._download_json(
next_url, collection_id, 'Downloading page {0}'.format(i),
'Unable to download API page')
collection = response.get('collection', [])
if not collection:
break
collection = list(filter(bool, collection))
collected_results += len(collection)
for item in collection:
yield self.url_result(item['uri'], SoundcloudIE.ie_key())
if not collection or collected_results >= limit:
break
next_url = response.get('next_href')
if not next_url:
break
def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
sanitized_Request,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
) )
@@ -51,7 +51,7 @@ class SpankwireIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
req = compat_urllib_request.Request('http://www.' + mobj.group('url')) req = sanitized_Request('http://www.' + mobj.group('url'))
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -54,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
sport_id, video_id) sport_id, video_id)
req = compat_urllib_request.Request(api_url, headers={ req = sanitized_Request(api_url, headers={
'Accept': 'application/vnd.vidibus.v2.html+json', 'Accept': 'application/vnd.vidibus.v2.html+json',
'Referer': url, 'Referer': url,
}) })

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse, from ..utils import sanitized_Request
compat_urllib_request,
)
class StreamcloudIE(InfoExtractor): class StreamcloudIE(InfoExtractor):
@@ -43,7 +41,7 @@ class StreamcloudIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -5,11 +5,9 @@ import hashlib
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -54,7 +52,7 @@ class StreamCZIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
api_path = '/episode/%s' % video_id api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path) req = sanitized_Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path)) req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id) data = self._download_json(req, video_id)

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -53,7 +51,7 @@ class TapelyIE(InfoExtractor):
display_id = mobj.group('id') display_id = mobj.group('id')
playlist_url = self._API_URL.format(display_id) playlist_url = self._API_URL.format(display_id)
request = compat_urllib_request.Request(playlist_url) request = sanitized_Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest') request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json') request.add_header('Accept', 'application/json')
request.add_header('Referer', url) request.add_header('Referer', url)

View File

@@ -187,8 +187,12 @@ class ThePlatformIE(ThePlatformBaseIE):
# Seems there's no pattern for the interested script filename, so # Seems there's no pattern for the interested script filename, so
# I try one by one # I try one by one
for script in reversed(scripts): for script in reversed(scripts):
feed_script = self._download_webpage(script, video_id, 'Downloading feed script') feed_script = self._download_webpage(
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None) self._proto_relative_url(script, 'http:'),
video_id, 'Downloading feed script')
feed_id = self._search_regex(
r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
'default feed id', default=None)
if feed_id is not None: if feed_id is not None:
break break
if feed_id is None: if feed_id is None:

View File

@@ -4,12 +4,10 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_urlparse
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
str_to_int, str_to_int,
) )
from ..aes import aes_decrypt_text from ..aes import aes_decrypt_text
@@ -42,7 +40,7 @@ class Tube8IE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)

View File

@@ -5,13 +5,11 @@ import codecs
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -44,7 +42,7 @@ class TubiTvIE(InfoExtractor):
'password': password, 'password': password,
} }
payload = compat_urllib_parse.urlencode(form_data).encode('utf-8') payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
request = compat_urllib_request.Request(self._LOGIN_URL, payload) request = sanitized_Request(self._LOGIN_URL, payload)
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, False, 'Wrong login info') request, None, False, 'Wrong login info')

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -20,6 +19,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -48,7 +48,7 @@ class TwitchBaseIE(InfoExtractor):
for cookie in self._downloader.cookiejar: for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token': if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value headers['Twitch-Api-Token'] = cookie.value
request = compat_urllib_request.Request(url, headers=headers) request = sanitized_Request(url, headers=headers)
response = super(TwitchBaseIE, self)._download_json(request, video_id, note) response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response) self._handle_error(response)
return response return response
@@ -80,7 +80,7 @@ class TwitchBaseIE(InfoExtractor):
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(redirect_url, post_url) post_url = compat_urlparse.urljoin(redirect_url, post_url)
request = compat_urllib_request.Request( request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8')) post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
request.add_header('Referer', redirect_url) request.add_header('Referer', redirect_url)
response = self._download_webpage( response = self._download_webpage(

View File

@@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
xpath_text, xpath_text,
remove_end, remove_end,
int_or_none, int_or_none,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -81,7 +81,7 @@ class TwitterCardIE(InfoExtractor):
config = None config = None
formats = [] formats = []
for user_agent in USER_AGENTS: for user_agent in USER_AGENTS:
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('User-Agent', user_agent) request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)

View File

@@ -9,6 +9,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -58,7 +59,7 @@ class UdemyIE(InfoExtractor):
for header, value in headers.items(): for header, value in headers.items():
url_or_request.add_header(header, value) url_or_request.add_header(header, value)
else: else:
url_or_request = compat_urllib_request.Request(url_or_request, headers=headers) url_or_request = sanitized_Request(url_or_request, headers=headers)
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note) response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
self._handle_error(response) self._handle_error(response)
@@ -89,7 +90,7 @@ class UdemyIE(InfoExtractor):
'password': password.encode('utf-8'), 'password': password.encode('utf-8'),
}) })
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._ORIGIN_URL) request.add_header('Referer', self._ORIGIN_URL)
request.add_header('Origin', self._ORIGIN_URL) request.add_header('Origin', self._ORIGIN_URL)

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -49,7 +49,7 @@ class Vbox7IE(InfoExtractor):
info_url = "http://vbox7.com/play/magare.do" info_url = "http://vbox7.com/play/magare.do"
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
info_request = compat_urllib_request.Request(info_url, data) info_request = sanitized_Request(info_url, data)
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
if info_response is None: if info_response is None:

View File

@@ -4,12 +4,10 @@ import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -110,7 +108,7 @@ class VeohIE(InfoExtractor):
if 'class="adultwarning-container"' in webpage: if 'class="adultwarning-container"' in webpage:
self.report_age_confirmation() self.report_age_confirmation()
age_limit = 18 age_limit = 18
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Cookie', 'confirmedAdult=true') request.add_header('Cookie', 'confirmedAdult=true')
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)

View File

@@ -4,10 +4,10 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -33,7 +33,7 @@ class VesselIE(InfoExtractor):
@staticmethod @staticmethod
def make_json_request(url, data): def make_json_request(url, data):
payload = json.dumps(data).encode('utf-8') payload = json.dumps(data).encode('utf-8')
req = compat_urllib_request.Request(url, payload) req = sanitized_Request(url, payload)
req.add_header('Content-Type', 'application/json; charset=utf-8') req.add_header('Content-Type', 'application/json; charset=utf-8')
return req return req

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_etree_fromstring
compat_etree_fromstring,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -73,7 +71,7 @@ class VevoIE(InfoExtractor):
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
def _real_initialize(self): def _real_initialize(self):
req = compat_urllib_request.Request( req = sanitized_Request(
'http://www.vevo.com/auth', data=b'') 'http://www.vevo.com/auth', data=b'')
webpage = self._download_webpage( webpage = self._download_webpage(
req, None, req, None,

View File

@@ -4,9 +4,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
) sanitized_Request,
from ..compat import (
compat_urllib_request
) )
@@ -65,7 +63,7 @@ class ViddlerIE(InfoExtractor):
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
video_id) video_id)
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
request = compat_urllib_request.Request(json_url, None, headers) request = sanitized_Request(json_url, None, headers)
data = self._download_json(request, video_id)['video'] data = self._download_json(request, video_id)['video']
formats = [] formats = []

View File

@@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request from ..utils import sanitized_Request
class VideoMegaIE(InfoExtractor): class VideoMegaIE(InfoExtractor):
@@ -30,7 +30,7 @@ class VideoMegaIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url) req = sanitized_Request(iframe_url)
req.add_header('Referer', url) req.add_header('Referer', url)
req.add_header('Cookie', 'noadvtday=0') req.add_header('Cookie', 'noadvtday=0')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -4,7 +4,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
@@ -13,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
HEADRequest, HEADRequest,
) )
@@ -76,7 +76,7 @@ class ViewsterIE(InfoExtractor):
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True): def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Accept', self._ACCEPT_HEADER) request.add_header('Accept', self._ACCEPT_HEADER)
request.add_header('Auth-token', self._AUTH_TOKEN) request.add_header('Auth-token', self._AUTH_TOKEN)
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal) return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)

View File

@@ -7,14 +7,14 @@ import hmac
import hashlib import hashlib
import itertools import itertools
from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
from ..compat import compat_urllib_request
from .common import InfoExtractor
class VikiBaseIE(InfoExtractor): class VikiBaseIE(InfoExtractor):
@@ -43,7 +43,7 @@ class VikiBaseIE(InfoExtractor):
hashlib.sha1 hashlib.sha1
).hexdigest() ).hexdigest()
url = self._API_URL_TEMPLATE % (query, sig) url = self._API_URL_TEMPLATE % (query, sig)
return compat_urllib_request.Request( return sanitized_Request(
url, json.dumps(post_data).encode('utf-8')) if post_data else url url, json.dumps(post_data).encode('utf-8')) if post_data else url
def _call_api(self, path, video_id, note, timestamp=None, post_data=None): def _call_api(self, path, video_id, note, timestamp=None, post_data=None):

View File

@@ -8,7 +8,6 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
InAdvancePagedList, InAdvancePagedList,
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request,
smuggle_url, smuggle_url,
std_headers, std_headers,
unified_strdate, unified_strdate,
@@ -47,7 +47,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': token,
})) }))
login_request = compat_urllib_request.Request(self._LOGIN_URL, data) login_request = sanitized_Request(self._LOGIN_URL, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Referer', self._LOGIN_URL) login_request.add_header('Referer', self._LOGIN_URL)
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
@@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'note': 'Video not completely processed, "failed" seed status', 'note': 'Video not completely processed, "failed" seed status',
'only_matching': True, 'only_matching': True,
}, },
{
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
'only_matching': True,
},
] ]
@staticmethod @staticmethod
@@ -218,7 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if url.startswith('http://'): if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url # vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://') url = url.replace('http://', 'https://')
password_request = compat_urllib_request.Request(url + '/password', data) password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url) password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
@@ -232,7 +236,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('This video is protected by a password, use the --video-password option') raise ExtractorError('This video is protected by a password, use the --video-password option')
data = urlencode_postdata(encode_dict({'password': password})) data = urlencode_postdata(encode_dict({'password': password}))
pass_url = url + '/check-password' pass_url = url + '/check-password'
password_request = compat_urllib_request.Request(pass_url, data) password_request = sanitized_Request(pass_url, data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
return self._download_json( return self._download_json(
password_request, video_id, password_request, video_id,
@@ -261,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers) request = sanitized_Request(url, None, headers)
try: try:
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)
except ExtractorError as ee: except ExtractorError as ee:
@@ -477,7 +481,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_path = self._search_regex( password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL') r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path) password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request.Request(password_url, post) password_request = sanitized_Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded') password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
self._set_vimeo_cookie('xsrft', token) self._set_vimeo_cookie('xsrft', token)
@@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_request, list_id, password_request, list_id,
'Verifying the password', 'Wrong password') 'Verifying the password', 'Wrong password')
def _extract_videos(self, list_id, base_url): def _title_and_entries(self, list_id, base_url):
video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum) page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage( webpage = self._download_webpage(
@@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
if pagenum == 1: if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage) webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') def _extract_videos(self, list_id, base_url):
for video_id in video_ids] title_and_entries = self._title_and_entries(list_id, base_url)
return {'_type': 'playlist', list_title = next(title_and_entries)
'id': list_id, return self.playlist_result(title_and_entries, list_id, list_title)
'title': self._extract_list_title(webpage),
'entries': entries,
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE(VimeoAlbumIE): class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group' IE_NAME = 'vimeo:group'
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)' _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/groups/rolexawards', 'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': { 'info_dict': {
@@ -637,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
def _page_url(self, base_url, pagenum): def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum) url = '%s/page:%d/' % (base_url, pagenum)
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
# Set the header to get a partial html page with the ids, # Set the header to get a partial html page with the ids,
# the normal page doesn't contain them. # the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest') request.add_header('X-Requested-With', 'XMLHttpRequest')

Some files were not shown because too many files have changed in this diff Show More