Compare commits

...

74 Commits

Author SHA1 Message Date
Philipp Hagemeister
20e98bf6c0 release 2015.11.23 2015-11-23 18:07:58 +01:00
Sergey M?
5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00
Sergey M․
67dda51722 Rename compat_urllib_request_Request to sanitized_Request and move to utils 2015-11-23 21:55:15 +06:00
Sergey M․
e4c4bcf36f [vimeo] Use compat_urllib_request_Request 2015-11-23 21:55:14 +06:00
Sergey M․
82d8a8b6e2 [YoutubeDL] Wrap plain-text URL requests in compat_urllib_request_Request 2015-11-23 21:55:13 +06:00
Sergey M․
13a10d5aa3 [compat] Add compat_urllib_request_Request
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified.
The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to
redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request
preprocessing that may be added in future.
2015-11-23 21:55:12 +06:00
Sergey M․
9022726446 [youtube] Fix test 2015-11-23 21:37:21 +06:00
Sergey M․
94bfcd23b7 [youtube] Fix test 2015-11-23 21:35:23 +06:00
Sergey M․
526b3b0716 [youtube] Clarify ytplayer.config extraction rationale 2015-11-23 21:14:03 +06:00
Sergey M․
61f92af1cf [youtube] Add test with '};' in tags 2015-11-23 21:02:37 +06:00
Sergey M․
a72778d364 [youtube] Improve ytplayer.config extraction 2015-11-23 21:00:06 +06:00
Sergey M
5ae17037a3 Merge pull request #7599 from lalinsky/fix-youtube
[youtube] More explicit player config JSON extraction (fixes #7468)
2015-11-23 20:52:23 +06:00
Sergey M․
02f0da20b0 [pluralsight] Add support for alternative webpage layout (Closes #7607) 2015-11-23 03:08:38 +06:00
Lukáš Lalinský
b41631c4e6 [youtube] Send the list of patterns directly to _search_regex 2015-11-22 13:53:26 +01:00
Lukáš Lalinský
0e49d9a6b0 [youtube] Fall back to the original regex for ytplayer.config 2015-11-22 13:49:33 +01:00
Sergey M․
4a7d108ab3 [rutube] Remove unnecessary print 2015-11-22 18:24:17 +06:00
Lukáš Lalinský
3cfd000849 [youtube] More explicit player config JSON extraction (fixes #7468) 2015-11-22 13:14:35 +01:00
Sergey M․
1b38185361 [pornhd] Fix title extraction (Closes #7596) 2015-11-22 18:08:30 +06:00
Sergey M․
9cb9a5df77 [utils] Check ext with trailing slash against the list of known extensions 2015-11-22 17:27:13 +06:00
Sergey M․
5035536e3f [test_utils] Add tests for determine_ext 2015-11-22 06:33:52 +06:00
Sergey M․
3e12bc583a [utils] Improve determine_ext (Closes #7593) 2015-11-22 06:29:39 +06:00
Sergey M․
e568c2233e [youtube] Add test for multi page list of playlists 2015-11-22 05:03:23 +06:00
Sergey M․
061a75edd6 [youtube] Extract base for entry list extractors and support multi page lists of playlists 2015-11-22 05:01:01 +06:00
Philipp Hagemeister
82c4d7b0ce release 2015.11.21 2015-11-21 23:36:27 +01:00
Sergey M․
136dadde95 [youtube:show] Rework in terms of playlists base extractor 2015-11-22 04:18:20 +06:00
Sergey M․
0c14841585 [youtube:user:playlists] Add extractor (Closes #3817) 2015-11-22 04:17:07 +06:00
Sergey M․
0eebf34d9d [pluralsight] Rephrase 2015-11-22 00:58:25 +06:00
Sergey M․
cf186b77a7 [pluralsight] Clarify allowed qualities guessing rationale 2015-11-22 00:56:40 +06:00
Sergey M․
a3372437bf [soundcloud] Remove unused variable 2015-11-22 00:49:58 +06:00
Sergey M․
4c57b4853d [pluralsight] Until listing formats request only single format 2015-11-22 00:42:58 +06:00
Sergey M․
38eb2968ab [pluralsight] Clarify and randomize ViewClip sleep interval 2015-11-22 00:07:09 +06:00
Andrzej Lichnerowicz
bea56c9569 [pluralsight] prevent error 429 when sensing video formats 2015-11-21 23:49:58 +06:00
Sergey M․
7e508ff2cf [pluralsight] Improve login detection 2015-11-21 21:49:37 +06:00
Sergey M․
563772eda4 [pluralsight] Extract base class 2015-11-21 21:37:29 +06:00
Sergey M․
0533915aad [pluralsight] Update some more URLs 2015-11-21 21:35:08 +06:00
Sergey M․
c3a227d1c4 [pluralsight] Update _LOGIN_URL 2015-11-21 21:25:48 +06:00
Sergey M․
f6c903e708 [soundcloud:search] Simplify (Closes #7213) 2015-11-21 21:21:21 +06:00
Sergey M․
7dc011c063 [soundcloud:search] Remove no track results message 2015-11-21 21:00:42 +06:00
Sergey M․
4e3b303016 [soundcloud:search] Fix non-ASCII searches 2015-11-21 20:55:48 +06:00
Sergey M․
7e1f5447e7 [utils] Improve encode_dict 2015-11-21 20:46:33 +06:00
Sergey M․
7e3472758b [soundcloud:search] PEP 8 2015-11-21 20:04:35 +06:00
reiv
328a22e175 [soundcloud] Remove limit on search results 2015-11-21 19:41:36 +06:00
reiv
417b453699 [soundcloud] Use correct error message conventions 2015-11-21 19:41:31 +06:00
reiv
6ea7190a3e Rewrite as list comprehension. 2015-11-21 19:41:26 +06:00
reiv
b54b08c91b Simplify with itertools.islice(). 2015-11-21 19:41:19 +06:00
reiv
c30943b1c0 Fix some compatibility issues, cleanup. 2015-11-21 19:41:15 +06:00
reiv
2abf7cab80 [soundcloud] Add Soundcloud search extractor 2015-11-21 19:41:08 +06:00
Sergey M․
4137196899 [rutube] Extract all formats 2015-11-21 18:02:52 +06:00
Sergey M․
019839faaa [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction 2015-11-21 18:01:39 +06:00
Sergey M․
f52183a878 [rutube:embed] Extend _VALID_URL (Closes #7588) 2015-11-21 17:39:24 +06:00
Yen Chi Hsuan
750b9ff032 [generic] Extract M3U8 formats (closes #7582) 2015-11-21 16:43:01 +08:00
Yen Chi Hsuan
28602e747c [generic] Refactor 2015-11-21 16:08:54 +08:00
Yen Chi Hsuan
6cc37c69e2 [generic] Unescape URLs from JWPlayer (#7582) 2015-11-21 14:12:34 +08:00
Sergey M․
a5cd0eb8a4 [pluralsight:course] Improve _VALID_URL 2015-11-21 08:32:48 +06:00
Sergey M․
c23e266427 [pluralsight] Do not require pluralsight account
Looks like some courses are available without pluralsight account
2015-11-21 08:25:52 +06:00
Sergey M․
651acffbe5 [pluralsight] Update ViewClip URL 2015-11-21 08:21:33 +06:00
Sergey M․
71bd93b89c [pluralsight] Do not rely on argument order in query (Closes #7583) 2015-11-21 08:08:34 +06:00
Sergey M․
6da620de58 [kaltura] Add test for referrer protected video (#7409) 2015-11-21 01:40:28 +06:00
Sergey M․
bdceea7afd [kaltura] Clean description 2015-11-21 01:39:29 +06:00
Sergey M․
d80a39cec8 [kaltura] Improve 2015-11-21 01:38:08 +06:00
Sergey M․
5b5fae5f20 [generic] Use referrer from source kaltura embed URLs (#7409) 2015-11-21 01:35:58 +06:00
Sergey M․
01b06aedcf [kaltura] Add support for referrer protected videos (#7409) 2015-11-21 01:34:02 +06:00
Sergey M
c711383811 Merge pull request #7579 from ashutosh-mishra/typo_fix
Typo fix, found while going through the code.
2015-11-20 23:24:54 +06:00
ashutosh-mishra
17cc153435 Typo fix, found while going through the code. 2015-11-20 22:51:46 +05:30
Sergey M․
67446fd49b [instagram] Improve _VALID_URL (Closes #7568) 2015-11-20 04:07:39 +06:00
Sergey M․
325bb615a7 [theplatform] Style 2015-11-19 22:58:43 +06:00
Sergey M․
ee5cd8418e [theplatform] Handle protocolless feed URLs (Closes #7532) 2015-11-19 22:58:29 +06:00
Sergey M․
342609a1b4 [bloomberg] Reax _VALID_URL (Closes #7546) 2015-11-19 22:55:06 +06:00
Sergey M
f270cf1a26 Merge pull request #7519 from barlik/master
Clarify that automatic subtitles are generated.
2015-11-19 22:44:08 +06:00
hedii
371c3b796c [YoutubeDL] Add playlist finished downloading message (Closes #7517)
Conflicts:
	youtube_dl/YoutubeDL.py
2015-11-19 22:39:02 +06:00
Sergey M․
6b7ceee1b9 [vimeo] Add test for #7552 2015-11-19 22:31:16 +06:00
Sergey M․
fdb20a27a3 [vimeo:group] Improve _VALID_URL (Closes #7552) 2015-11-19 22:30:58 +06:00
Sergey M․
2c94198eb6 [vimeo] Improve playlists extraction 2015-11-19 21:29:32 +06:00
Rastislav Barlik
741dd8ea65 Clarify that automatic subtitles are generated.
It wasn't clear what automatic word mean.
2015-11-16 14:15:25 +00:00
114 changed files with 726 additions and 493 deletions

View File

@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
## Subtitle Options:
--write-sub Write subtitle file
--write-auto-sub Write automatic subtitle file (YouTube
only)
--write-auto-sub Write automatically generated subtitle file
(YouTube only)
--all-subs Download all the available subtitles of the
video
--list-subs List all available subtitles for the video

View File

@@ -494,6 +494,7 @@
- **soompi:show**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
- **soundcloud:set**
- **soundcloud:user**
- **soundgasm**
@@ -707,6 +708,7 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**

View File

@@ -21,6 +21,7 @@ from youtube_dl.utils import (
clean_html,
DateRange,
detect_exe_version,
determine_ext,
encodeFilename,
escape_rfc3986,
escape_url,
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
def test_find_xpath_attr(self):
testxml = '''<root>
<node/>

View File

@@ -28,6 +28,7 @@ if os.name == 'nt':
import ctypes
from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
@@ -63,6 +64,7 @@ from .utils import (
SameFileError,
sanitize_filename,
sanitize_path,
sanitized_Request,
std_headers,
subtitles_filename,
UnavailableVideoError,
@@ -156,7 +158,7 @@ class YoutubeDL(object):
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
@@ -833,6 +835,7 @@ class YoutubeDL(object):
extra_info=extra)
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result
elif result_type == 'compat_list':
self.report_warning(
@@ -937,7 +940,7 @@ class YoutubeDL(object):
filter_parts.append(string)
def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings
# Remove operators that we don't use and join them with the surrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
@@ -1186,7 +1189,7 @@ class YoutubeDL(object):
return res
def _calc_cookies(self, info_dict):
pr = compat_urllib_request.Request(info_dict['url'])
pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
@@ -1870,6 +1873,8 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):

View File

@@ -42,7 +42,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
(experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import FileDownloader
from ..compat import compat_urllib_request
from ..utils import sanitized_Request
class DashSegmentsFD(FileDownloader):
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url)
req = sanitized_Request(target_url)
if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))

View File

@@ -7,14 +7,12 @@ import time
import re
from .common import FileDownloader
from ..compat import (
compat_urllib_request,
compat_urllib_error,
)
from ..compat import compat_urllib_error
from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
sanitized_Request,
)
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
basic_request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)

View File

@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,

View File

@@ -576,7 +576,8 @@ from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE
SoundcloudPlaylistIE,
SoundcloudSearchIE
)
from .soundgasm import (
SoundgasmIE,
@@ -833,6 +834,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

View File

@@ -7,11 +7,11 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
float_or_none,
sanitized_Request,
xpath_text,
ExtractorError,
)
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
'j_password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
formats = []
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)

View File

@@ -6,13 +6,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
'pass': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json(

View File

@@ -4,14 +4,12 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
}, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}]
def _real_extract(self, url):
name = self._match_id(url)

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -24,6 +23,7 @@ from ..utils import (
js_to_json,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
unsmuggle_url,
)
@@ -250,7 +250,7 @@ class BrightcoveLegacyIE(InfoExtractor):
def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
@@ -443,7 +443,7 @@ class BrightcoveNewIE(InfoExtractor):
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
req = compat_urllib_request.Request(
req = sanitized_Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})

View File

@@ -1,8 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import smuggle_url
from ..utils import (
sanitized_Request,
smuggle_url,
)
class CBSIE(InfoExtractor):
@@ -48,7 +50,7 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
# Android UA is served with higher quality (720p) streams (see
# https://github.com/rg3/youtube-dl/issues/7490)
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
@@ -13,6 +12,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
)
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani',
}
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data))
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
sanitized_Request,
)
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
}
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json')

View File

@@ -19,7 +19,6 @@ from ..compat import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_str,
compat_etree_fromstring,
@@ -37,6 +36,7 @@ from ..utils import (
int_or_none,
RegexNotFoundError,
sanitize_filename,
sanitized_Request,
unescapeHTML,
unified_strdate,
url_basename,
@@ -891,6 +891,11 @@ class InfoExtractor(object):
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@ -898,7 +903,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))

View File

@@ -23,6 +23,7 @@ from ..utils import (
int_or_none,
lowercase_escape,
remove_end,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_text,
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'name': username,
'password': password,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request = sanitized_Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request))
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req = sanitized_Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request(
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

View File

@@ -7,15 +7,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_request,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
str_to_int,
unescapeHTML,
)
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Cookie', 'family_filter=off; ff=off')
return request

View File

@@ -2,13 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'})

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -16,6 +15,7 @@ from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)

View File

@@ -5,8 +5,10 @@ import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import qualities
from ..utils import (
qualities,
sanitized_Request,
)
class DumpertIE(InfoExtractor):
@@ -32,7 +34,7 @@ class DumpertIE(InfoExtractor):
protocol = mobj.group('protocol')
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL')
if hls_url:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url})
token_data = self._download_json(

View File

@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID']
key = ims_video['hash']
config_req = compat_urllib_request.Request(
config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
pllist_req = compat_urllib_request.Request(pllist_url)
pllist_req = sanitized_Request(pllist_url)
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist_list = self._download_json(
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
raise ExtractorError('Playlist id not found')
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
pl_req = compat_urllib_request.Request(pl_url)
pl_req = sanitized_Request(pl_url)
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist = self._download_json(
pl_req, playlist_id, note='Downloading playlist info')

View File

@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -10,11 +10,11 @@ from ..compat import (
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
limit_length,
sanitized_Request,
urlencode_postdata,
get_element_by_id,
clean_html,
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
if useremail is None:
return
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
'timezone': '-60',
'trynum': '1',
}
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
login_results = self._download_webpage(request, None,
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save',
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None,
note='Confirming login')

View File

@@ -12,6 +12,7 @@ from ..compat import (
from ..utils import (
encode_dict,
ExtractorError,
sanitized_Request,
)
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
return False
# this is also needed
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
find_xpath_attr,
sanitized_Request,
)
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
req.add_header(
'User-Agent',
# it needs a more recent version

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
parse_iso8601,
sanitized_Request,
str_to_int,
)
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com',
}
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id)
formats = [{
'url': tokens[format]['token'],

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
remove_end,
HEADRequest,
sanitized_Request,
)
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')

View File

@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -22,6 +21,7 @@ from ..utils import (
HEADRequest,
is_html,
orderedSet,
sanitized_Request,
smuggle_url,
unescapeHTML,
unified_strdate,
@@ -823,6 +823,19 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
},
},
# Kaltura embed protected with referrer
{
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
'info_dict': {
'id': '1_g4fbemnq',
'ext': 'mp4',
'title': 'Violetta - Achter De Schermen - Ruggero',
'description': 'Achter de schermen met Ruggero',
'timestamp': 1435133761,
'upload_date': '20150624',
'uploader_id': 'echojecka',
},
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1045,6 +1058,20 @@ class GenericIE(InfoExtractor):
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
},
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
}
]
@@ -1188,7 +1215,7 @@ class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id)
head_response = full_response
@@ -1217,7 +1244,7 @@ class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1694,7 +1721,9 @@ class GenericIE(InfoExtractor):
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds
mobj = re.search(
@@ -1859,6 +1888,7 @@ class GenericIE(InfoExtractor):
entries = []
for video_url in found:
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1870,25 +1900,24 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
}
ext = determine_ext(video_url)
if ext == 'smil':
entries.append({
'id': video_id,
'formats': self._extract_smil_formats(video_url, video_id),
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else:
entries.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['url'] = video_url
entries.append(entry_info_dict)
if len(entries) == 1:
return entries[0]

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
HEADRequest,
sanitized_Request,
str_to_int,
urlencode_postdata,
urlhandle_detect_ext,
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id})
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0]

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
HEADRequest,
sanitized_Request,
)
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'),
('mediaId', video_id),
])
r = compat_urllib_request.Request(
r = sanitized_Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json(

View File

@@ -4,12 +4,10 @@ import json
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
request = sanitized_Request(complete_url)
response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request(
request = sanitized_Request(
serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie)
song_data = self._download_json(request, track_id, 'Downloading metadata')

View File

@@ -10,7 +10,7 @@ from ..utils import (
class InstagramIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/p/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',

View File

@@ -6,12 +6,10 @@ from random import random
from math import floor
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
remove_end,
sanitized_Request,
)
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
(floor(random() * 1073741824), floor(random() * 1073741824))
)
req = compat_urllib_request.Request(player_url)
req = sanitized_Request(player_url)
req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id)

View File

@@ -5,11 +5,9 @@ import re
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
request = sanitized_Request(api_url, json.dumps(data))
video_json_page = self._download_webpage(
request, video_id, 'Downloading video JSON')

View File

@@ -2,12 +2,18 @@
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
unsmuggle_url,
)
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
source_url = smuggled_data.get('source_url')
if source_url:
referrer = base64.b64encode(
'://'.join(compat_urlparse.urlparse(source_url)[:2])
.encode('utf-8')).decode('utf-8')
else:
referrer = None
formats = []
for f in source_data['flavorAssets']:
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
if referrer:
video_url += '?referrer=%s' % referrer
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': int_or_none(f.get('height')),
'width': int_or_none(f.get('width')),
'url': video_url,
})
self._check_formats(formats, entry_id)
self._sort_formats(formats)
return {
'id': entry_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'description': clean_html(info.get('description')),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),

View File

@@ -4,10 +4,8 @@ import os
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse_urlparse
from ..utils import sanitized_Request
class KeezMoviesIE(InfoExtractor):
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -8,13 +8,13 @@ import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_ord,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
sanitized_Request,
int_or_none,
encode_data_uri,
)
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json_req = compat_urllib_request.Request(
play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')

View File

@@ -7,12 +7,12 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
sanitized_Request,
)
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false',
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage(
request, None,

View File

@@ -7,12 +7,12 @@ from ..compat import (
compat_parse_qs,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0',
'submit': "Continue - I'm over 18",
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation()
self._download_webpage(request, None, False, 'Unable to confirm age')
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file

View File

@@ -2,14 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
sanitized_Request,
)
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import random
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
xpath_text,
int_or_none,
ExtractorError,
sanitized_Request,
)
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id)
vid_config_request = compat_urllib_request.Request(
vid_config_request = sanitized_Request(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
headers=http_headers)

View File

@@ -5,13 +5,11 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
]
r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post)
req = sanitized_Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id)

View File

@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import sanitized_Request
class MofosexIE(InfoExtractor):
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -5,13 +5,11 @@ import os.path
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
remove_start,
sanitized_Request,
)
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
orig_webpage, 'builtin URL', default=None, group='url')
if builtin_url:
req = compat_urllib_request.Request(builtin_url)
req = sanitized_Request(builtin_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
title = self._og_search_title(orig_webpage).strip()
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
'hash': hash_key,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import sanitized_Request
class MovieClipsIE(InfoExtractor):
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
# it doesn't work if it thinks the browser it's too old
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
webpage = self._download_webpage(req, display_id)

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
find_xpath_attr,
fix_xml_ampersands,
HEADRequest,
sanitized_Request,
unescapeHTML,
url_basename,
RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,

View File

@@ -11,10 +11,10 @@ from ..compat import (
compat_ord,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))

View File

@@ -8,11 +8,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_str,
compat_itertools_count,
)
from ..utils import sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor):
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note):
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
req.add_header('Referer', self._API_BASE)
return self._download_json(req, video_id, note)

View File

@@ -1,10 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class NFBIE(InfoExtractor):
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request = sanitized_Request(
'https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@@ -8,7 +8,6 @@ import datetime
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
sanitized_Request,
xpath_text,
determine_ext,
)
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
'password': password,
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in')
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
'k': thumb_play_key,
'v': video_id
})
flv_info_request = compat_urllib_request.Request(
flv_info_request = sanitized_Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage(

View File

@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
clean_html,
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
float_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
'username': username,
'password': password,
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
urlencode_postdata,
xpath_text,
xpath_with_ns,
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
'op': 'download1',
'method_free': 'Continue to Video',
}
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
req = sanitized_Request(url, urlencode_postdata(fields))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,
'Downloading download page')

View File

@@ -3,14 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
NO_DEFAULT,
encode_dict,
sanitized_Request,
urlencode_postdata,
)
@@ -65,7 +63,7 @@ class NovaMovIE(InfoExtractor):
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = compat_urllib_request.Request(
request = sanitized_Request(
post_url, urlencode_postdata(encode_dict(fields)))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import (
compat_str,
compat_urllib_request,
from ..compat import compat_str
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
def _api_request(self, url, request_path):
display_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://api.nowness.com/api/' + request_path % display_id,
headers={
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
sanitized_Request,
unified_strdate,
)
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage(

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
)
from ..utils import js_to_json
class PatreonIE(InfoExtractor):
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://www.patreon.com/processLogin',
compat_urllib_parse.urlencode(login_form).encode('utf-8')
)

View File

@@ -5,12 +5,10 @@ import re
import os.path
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -1,29 +1,35 @@
from __future__ import unicode_literals
import re
import json
import random
import collections
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
sanitized_Request,
)
class PluralsightIE(InfoExtractor):
class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'http://app.pluralsight.com'
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
_LOGIN_URL = 'https://www.pluralsight.com/id/'
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
_LOGIN_URL = 'https://app.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight'
_TEST = {
_TESTS = [{
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
'info_dict': {
@@ -33,7 +39,14 @@ class PluralsightIE(InfoExtractor):
'duration': 338,
},
'skip': 'Requires pluralsight account credentials',
}
}, {
'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
'only_matching': True,
}, {
# available without pluralsight account
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
'only_matching': True,
}]
def _real_initialize(self):
self._login()
@@ -41,7 +54,7 @@ class PluralsightIE(InfoExtractor):
def _login(self):
(username, password) = self._get_login_info()
if username is None:
self.raise_login_required('Pluralsight account is required')
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -60,7 +73,7 @@ class PluralsightIE(InfoExtractor):
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request(
request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -73,30 +86,47 @@ class PluralsightIE(InfoExtractor):
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
author = mobj.group('author')
name = mobj.group('name')
clip_id = mobj.group('clip')
course = mobj.group('course')
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
author = qs.get('author', [None])[0]
name = qs.get('name', [None])[0]
clip_id = qs.get('clip', [None])[0]
course = qs.get('course', [None])[0]
if any(not f for f in (author, name, clip_id, course,)):
raise ExtractorError('Invalid URL', expected=True)
display_id = '%s-%s' % (name, clip_id)
webpage = self._download_webpage(url, display_id)
collection = self._parse_json(
self._search_regex(
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
webpage, 'modules'),
display_id)
modules = self._search_regex(
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
webpage, 'modules', default=None)
if modules:
collection = self._parse_json(modules, display_id)
else:
# Webpage may be served in different layout (see
# https://github.com/rg3/youtube-dl/issues/7607)
collection = self._parse_json(
self._search_regex(
r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
display_id)['course']['modules']
module, clip = None, None
for module_ in collection:
if module_.get('moduleName') == name:
if name in (module_.get('moduleName'), module_.get('name')):
module = module_
for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex')
if clip_index is None:
clip_index = clip_.get('index')
if clip_index is None:
continue
if compat_str(clip_index) == clip_id:
@@ -112,13 +142,33 @@ class PluralsightIE(InfoExtractor):
'high': {'width': 1024, 'height': 768},
}
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = (
('webm', ('high',)),
('mp4', ('low', 'medium', 'high',)),
AllowedQuality('webm', ('high',)),
AllowedQuality('mp4', ('low', 'medium', 'high',)),
)
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
if self._downloader.params.get('listformats', False):
allowed_qualities = ALLOWED_QUALITIES
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
req_format_split = req_format.split('-')
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, ('high', )), )
allowed_qualities = guess_allowed_qualities()
formats = []
for ext, qualities in ALLOWED_QUALITIES:
for ext, qualities in allowed_qualities:
for quality in qualities:
f = QUALITIES[quality].copy()
clip_post = {
@@ -131,13 +181,24 @@ class PluralsightIE(InfoExtractor):
'mt': ext,
'q': '%dx%d' % (f['width'], f['height']),
}
request = compat_urllib_request.Request(
'http://www.pluralsight.com/training/Player/ViewClip',
request = sanitized_Request(
'%s/training/Player/ViewClip' % self._API_BASE,
json.dumps(clip_post).encode('utf-8'))
request.add_header('Content-Type', 'application/json;charset=utf-8')
format_id = '%s-%s' % (ext, quality)
clip_url = self._download_webpage(
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
# https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
# to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
random.randint(2, 5), display_id,
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
if not clip_url:
continue
f.update({
@@ -163,10 +224,10 @@ class PluralsightIE(InfoExtractor):
}
class PluralsightCourseIE(InfoExtractor):
class PluralsightCourseIE(PluralsightBaseIE):
IE_NAME = 'pluralsight:course'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
_TEST = {
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
_TESTS = [{
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
@@ -176,7 +237,14 @@ class PluralsightCourseIE(InfoExtractor):
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
},
'playlist_count': 31,
}
}, {
# available without pluralsight account
'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
'only_matching': True,
}, {
'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
'only_matching': True,
}]
def _real_extract(self, url):
course_id = self._match_id(url)
@@ -184,14 +252,14 @@ class PluralsightCourseIE(InfoExtractor):
# TODO: PSM cookie
course = self._download_json(
'http://www.pluralsight.com/data/course/%s' % course_id,
'%s/data/course/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course JSON')
title = course['title']
description = course.get('description') or course.get('shortDescription')
course_data = self._download_json(
'http://www.pluralsight.com/data/course/content/%s' % course_id,
'%s/data/course/content/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course data JSON')
entries = []
@@ -201,7 +269,7 @@ class PluralsightCourseIE(InfoExtractor):
if not player_parameters:
continue
entries.append(self.url_result(
'http://www.pluralsight.com/training/player?%s' % player_parameters,
'%s/training/player?%s' % (self._API_BASE, player_parameters),
'Pluralsight'))
return self.playlist_result(entries, course_id, title, description)

View File

@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
webpage = self._download_webpage(url, display_id or video_id)
title = self._html_search_regex(
r'<title>(.+) porn HD.+?</title>', webpage, 'title')
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex(

View File

@@ -8,10 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
str_to_int,
)
from ..aes import (
@@ -53,7 +53,7 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class PornotubeIE(InfoExtractor):
'authenticationSpaceKey': originAuthenticationSpaceKey,
'credentials': 'Clip Application',
}
token_req = compat_urllib_request.Request(
token_req = sanitized_Request(
'https://api.aebn.net/auth/v1/token/primal',
data=json.dumps(token_req_data).encode('utf-8'))
token_req.add_header('Content-Type', 'application/json')
@@ -56,7 +54,7 @@ class PornotubeIE(InfoExtractor):
token = token_answer['tokenKey']
# Get video URL
delivery_req = compat_urllib_request.Request(
delivery_req = sanitized_Request(
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
delivery_req.add_header('Authorization', token)
delivery_info = self._download_json(
@@ -64,7 +62,7 @@ class PornotubeIE(InfoExtractor):
video_url = delivery_info['mediaUrl']
# Get additional info (title etc.)
info_req = compat_urllib_request.Request(
info_req = sanitized_Request(
'https://api.aebn.net/content/v1/clips/%s?expand='
'title,description,primaryImageNumber,startSecond,endSecond,'
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'

View File

@@ -1,11 +1,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
from ..utils import ExtractorError
class PrimeShareTVIE(InfoExtractor):
@@ -41,7 +41,7 @@ class PrimeShareTVIE(InfoExtractor):
webpage, 'wait time', default=7)) + 1
self._sleep(wait_time, video_id)
req = compat_urllib_request.Request(
req = sanitized_Request(
url, compat_urllib_parse.urlencode(fields), headers)
video_page = self._download_webpage(
req, video_id, 'Downloading video page')

View File

@@ -4,13 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
determine_ext,
ExtractorError,
sanitized_Request,
)
@@ -37,7 +35,7 @@ class PromptFileIE(InfoExtractor):
fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')

View File

@@ -7,11 +7,11 @@ import re
from .common import InfoExtractor
from ..utils import (
sanitized_Request,
strip_jsonp,
unescapeHTML,
clean_html,
)
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor):
@@ -201,7 +201,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
singer_desc = None
if singer_id:
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')

View File

@@ -6,11 +6,11 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
float_or_none,
remove_end,
sanitized_Request,
std_headers,
struct_unpack,
)
@@ -102,7 +102,7 @@ class RTVEALaCartaIE(InfoExtractor):
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = compat_urllib_request.Request(png_url)
png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)

View File

@@ -9,7 +9,7 @@ from ..compat import (
compat_str,
)
from ..utils import (
ExtractorError,
determine_ext,
unified_strdate,
)
@@ -51,10 +51,25 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
formats = []
for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video['id'],
@@ -74,9 +89,9 @@ class RutubeIE(InfoExtractor):
class RutubeEmbedIE(InfoExtractor):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
_VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
_TEST = {
_TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
@@ -90,7 +105,10 @@ class RutubeEmbedIE(InfoExtractor):
'params': {
'skip_download': 'Requires ffmpeg',
},
}
}, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
}]
def _real_extract(self, url):
embed_id = self._match_id(url)

View File

@@ -6,12 +6,10 @@ import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
smuggle_url,
std_headers,
)
@@ -58,7 +56,7 @@ class SafariBaseIE(InfoExtractor):
'next': '',
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)

View File

@@ -6,14 +6,12 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
sanitized_Request,
unified_strdate,
)
@@ -37,7 +35,7 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class SharedIE(InfoExtractor):
'Video %s does not exist' % video_id, expected=True)
download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(
request = sanitized_Request(
url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
parse_duration,
sanitized_Request,
)
@@ -50,7 +48,7 @@ class ShareSixIE(InfoExtractor):
'method_free': 'Free'
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class SinaIE(InfoExtractor):
@@ -61,7 +59,7 @@ class SinaIE(InfoExtractor):
if mobj.group('token') is not None:
# The video id is in the redirected url
self.to_screen('Getting video id')
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.get_method = lambda: 'HEAD'
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())

View File

@@ -7,13 +7,11 @@ import hashlib
import uuid
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
unified_strdate,
)
@@ -176,7 +174,7 @@ class SmotriIE(InfoExtractor):
if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -339,7 +337,7 @@ class SmotriBroadcastIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage(

View File

@@ -6,11 +6,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -96,7 +96,7 @@ class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = compat_urllib_request.Request(base_data_url + vid_id)
req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:

View File

@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from .common import (
InfoExtractor,
SearchInfoExtractor
)
from ..compat import (
compat_str,
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
encode_dict,
ExtractorError,
int_or_none,
unified_strdate,
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
'description': data.get('description'),
'entries': entries,
}
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
_MAX_RESULTS = float('inf')
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
'title': 'post-avant jazzcore',
},
'playlist_count': 15,
}]
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit
query['client_id'] = self._CLIENT_ID
query['linked_partitioning'] = '1'
query['offset'] = 0
data = compat_urllib_parse.urlencode(encode_dict(query))
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
collected_results = 0
for i in itertools.count(1):
response = self._download_json(
next_url, collection_id, 'Downloading page {0}'.format(i),
'Unable to download API page')
collection = response.get('collection', [])
if not collection:
break
collection = list(filter(bool, collection))
collected_results += len(collection)
for item in collection:
yield self.url_result(item['uri'], SoundcloudIE.ie_key())
if not collection or collected_results >= limit:
break
next_url = response.get('next_href')
if not next_url:
break
def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import (
sanitized_Request,
str_to_int,
unified_strdate,
)
@@ -51,7 +51,7 @@ class SpankwireIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
req = sanitized_Request('http://www.' + mobj.group('url'))
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_iso8601,
sanitized_Request,
)
@@ -54,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
sport_id, video_id)
req = compat_urllib_request.Request(api_url, headers={
req = sanitized_Request(api_url, headers={
'Accept': 'application/vnd.vidibus.v2.html+json',
'Referer': url,
})

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class StreamcloudIE(InfoExtractor):
@@ -43,7 +41,7 @@ class StreamcloudIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -5,11 +5,9 @@ import hashlib
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
sanitized_Request,
)
@@ -54,7 +52,7 @@ class StreamCZIE(InfoExtractor):
video_id = self._match_id(url)
api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path)
req = sanitized_Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id)

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
clean_html,
ExtractorError,
float_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -53,7 +51,7 @@ class TapelyIE(InfoExtractor):
display_id = mobj.group('id')
playlist_url = self._API_URL.format(display_id)
request = compat_urllib_request.Request(playlist_url)
request = sanitized_Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json')
request.add_header('Referer', url)

View File

@@ -187,8 +187,12 @@ class ThePlatformIE(ThePlatformBaseIE):
# Seems there's no pattern for the interested script filename, so
# I try one by one
for script in reversed(scripts):
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
feed_script = self._download_webpage(
self._proto_relative_url(script, 'http:'),
video_id, 'Downloading feed script')
feed_id = self._search_regex(
r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
'default feed id', default=None)
if feed_id is not None:
break
if feed_id is None:

View File

@@ -4,12 +4,10 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
from ..aes import aes_decrypt_text
@@ -42,7 +40,7 @@ class Tube8IE(InfoExtractor):
video_id = mobj.group('id')
display_id = mobj.group('display_id')
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, display_id)

View File

@@ -5,13 +5,11 @@ import codecs
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -44,7 +42,7 @@ class TubiTvIE(InfoExtractor):
'password': password,
}
payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
request = compat_urllib_request.Request(self._LOGIN_URL, payload)
request = sanitized_Request(self._LOGIN_URL, payload)
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_page = self._download_webpage(
request, None, False, 'Wrong login info')

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -20,6 +19,7 @@ from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
sanitized_Request,
)
@@ -48,7 +48,7 @@ class TwitchBaseIE(InfoExtractor):
for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value
request = compat_urllib_request.Request(url, headers=headers)
request = sanitized_Request(url, headers=headers)
response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response)
return response
@@ -80,7 +80,7 @@ class TwitchBaseIE(InfoExtractor):
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(redirect_url, post_url)
request = compat_urllib_request.Request(
request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
request.add_header('Referer', redirect_url)
response = self._download_webpage(

View File

@@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
xpath_text,
remove_end,
int_or_none,
ExtractorError,
sanitized_Request,
)
@@ -81,7 +81,7 @@ class TwitterCardIE(InfoExtractor):
config = None
formats = []
for user_agent in USER_AGENTS:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(request, video_id)

View File

@@ -9,6 +9,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -58,7 +59,7 @@ class UdemyIE(InfoExtractor):
for header, value in headers.items():
url_or_request.add_header(header, value)
else:
url_or_request = compat_urllib_request.Request(url_or_request, headers=headers)
url_or_request = sanitized_Request(url_or_request, headers=headers)
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
self._handle_error(response)
@@ -89,7 +90,7 @@ class UdemyIE(InfoExtractor):
'password': password.encode('utf-8'),
})
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._ORIGIN_URL)
request.add_header('Origin', self._ORIGIN_URL)

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -49,7 +49,7 @@ class Vbox7IE(InfoExtractor):
info_url = "http://vbox7.com/play/magare.do"
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
info_request = compat_urllib_request.Request(info_url, data)
info_request = sanitized_Request(info_url, data)
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
if info_response is None:

View File

@@ -4,12 +4,10 @@ import re
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
ExtractorError,
sanitized_Request,
)
@@ -110,7 +108,7 @@ class VeohIE(InfoExtractor):
if 'class="adultwarning-container"' in webpage:
self.report_age_confirmation()
age_limit = 18
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Cookie', 'confirmedAdult=true')
webpage = self._download_webpage(request, video_id)

View File

@@ -4,10 +4,10 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
parse_iso8601,
sanitized_Request,
)
@@ -33,7 +33,7 @@ class VesselIE(InfoExtractor):
@staticmethod
def make_json_request(url, data):
payload = json.dumps(data).encode('utf-8')
req = compat_urllib_request.Request(url, payload)
req = sanitized_Request(url, payload)
req.add_header('Content-Type', 'application/json; charset=utf-8')
return req

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_urllib_request,
)
from ..compat import compat_etree_fromstring
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -73,7 +71,7 @@ class VevoIE(InfoExtractor):
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
def _real_initialize(self):
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.vevo.com/auth', data=b'')
webpage = self._download_webpage(
req, None,

View File

@@ -4,9 +4,7 @@ from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
from ..compat import (
compat_urllib_request
sanitized_Request,
)
@@ -65,7 +63,7 @@ class ViddlerIE(InfoExtractor):
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
video_id)
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
request = compat_urllib_request.Request(json_url, None, headers)
request = sanitized_Request(json_url, None, headers)
data = self._download_json(request, video_id)['video']
formats = []

View File

@@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import sanitized_Request
class VideoMegaIE(InfoExtractor):
@@ -30,7 +30,7 @@ class VideoMegaIE(InfoExtractor):
video_id = self._match_id(url)
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url)
req = sanitized_Request(iframe_url)
req.add_header('Referer', url)
req.add_header('Cookie', 'noadvtday=0')
webpage = self._download_webpage(req, video_id)

View File

@@ -4,7 +4,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_unquote,
)
@@ -13,6 +12,7 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
sanitized_Request,
HEADRequest,
)
@@ -76,7 +76,7 @@ class ViewsterIE(InfoExtractor):
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Accept', self._ACCEPT_HEADER)
request.add_header('Auth-token', self._AUTH_TOKEN)
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)

View File

@@ -7,14 +7,14 @@ import hmac
import hashlib
import itertools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
sanitized_Request,
)
from ..compat import compat_urllib_request
from .common import InfoExtractor
class VikiBaseIE(InfoExtractor):
@@ -43,7 +43,7 @@ class VikiBaseIE(InfoExtractor):
hashlib.sha1
).hexdigest()
url = self._API_URL_TEMPLATE % (query, sig)
return compat_urllib_request.Request(
return sanitized_Request(
url, json.dumps(post_data).encode('utf-8')) if post_data else url
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):

View File

@@ -8,7 +8,6 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
std_headers,
unified_strdate,
@@ -47,7 +47,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'service': 'vimeo',
'token': token,
}))
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
login_request = sanitized_Request(self._LOGIN_URL, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Referer', self._LOGIN_URL)
self._set_vimeo_cookie('vuid', vuid)
@@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'note': 'Video not completely processed, "failed" seed status',
'only_matching': True,
},
{
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
'only_matching': True,
},
]
@staticmethod
@@ -218,7 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = compat_urllib_request.Request(url + '/password', data)
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
@@ -232,7 +236,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('This video is protected by a password, use the --video-password option')
data = urlencode_postdata(encode_dict({'password': password}))
pass_url = url + '/check-password'
password_request = compat_urllib_request.Request(pass_url, data)
password_request = sanitized_Request(pass_url, data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
return self._download_json(
password_request, video_id,
@@ -261,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
request = sanitized_Request(url, None, headers)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
@@ -477,7 +481,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request.Request(password_url, post)
password_request = sanitized_Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
self._set_vimeo_cookie('vuid', vuid)
self._set_vimeo_cookie('xsrft', token)
@@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_request, list_id,
'Verifying the password', 'Wrong password')
def _extract_videos(self, list_id, base_url):
video_ids = []
def _title_and_entries(self, list_id, base_url):
for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage(
@@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
return {'_type': 'playlist',
'id': list_id,
'title': self._extract_list_title(webpage),
'entries': entries,
}
def _extract_videos(self, list_id, base_url):
title_and_entries = self._title_and_entries(list_id, base_url)
list_title = next(title_and_entries)
return self.playlist_result(title_and_entries, list_id, list_title)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{
'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
@@ -637,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum)
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
# Set the header to get a partial html page with the ids,
# the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest')

Some files were not shown because too many files have changed in this diff Show More