Merge branch 'master' of github.com:rg3/youtube-dl into weibo
This commit is contained in:
		
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.02** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.23** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2017.12.02 | ||||
| [debug] youtube-dl version 2017.12.23 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
							
								
								
									
										87
									
								
								ChangeLog
									
									
									
									
									
								
							
							
						
						
									
										87
									
								
								ChangeLog
									
									
									
									
									
								
							| @@ -1,3 +1,86 @@ | ||||
| version <unreleased> | ||||
|  | ||||
| Extractors | ||||
| * [youku] Update ccode (#14880) | ||||
| * [youku] Fix list extraction (#15065) | ||||
|  | ||||
|  | ||||
| version 2017.12.23 | ||||
|  | ||||
| Core | ||||
| * [extractor/common] Move X-Forwarded-For setup code into _request_webpage | ||||
| + [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in | ||||
|   output template (#11427, #15018) | ||||
| + [extractor/common] Introduce uploader, uploader_id and uploader_url | ||||
|   meta fields for playlists (#11427, #15018) | ||||
| * [downloader/fragment] Encode filename of fragment being removed (#15020) | ||||
| + [utils] Add another date format pattern (#14999) | ||||
|  | ||||
| Extractors | ||||
| + [kaltura] Add another embed pattern for entry_id | ||||
| + [7plus] Add support for 7plus.com.au (#15043) | ||||
| * [animeondemand] Relax login error regular expression | ||||
| + [shahid] Add support for show pages (#7401) | ||||
| + [youtube] Extract uploader, uploader_id and uploader_url for playlists | ||||
|   (#11427, #15018) | ||||
| * [afreecatv] Improve format extraction (#15019) | ||||
| + [cspan] Add support for audio only pages and catch page errors (#14995) | ||||
| + [mailru] Add support for embed URLs (#14904) | ||||
| * [crunchyroll] Future-proof XML element checks (#15013) | ||||
| * [cbslocal] Fix timestamp extraction (#14999, #15000) | ||||
| * [discoverygo] Correct TTML subtitle extension | ||||
| * [vk] Make view count optional (#14979) | ||||
| * [disney] Skip Apple FairPlay formats (#14982) | ||||
| * [voot] Fix format extraction (#14758) | ||||
|  | ||||
|  | ||||
| version 2017.12.14 | ||||
|  | ||||
| Core | ||||
| * [postprocessor/xattr] Clarify NO_SPACE message (#14970) | ||||
| * [downloader/http] Return actual download result from real_download (#14971) | ||||
|  | ||||
| Extractors | ||||
| + [itv] Extract more subtitles and duration | ||||
| * [itv] Improve extraction (#14944) | ||||
| + [byutv] Add support for geo restricted videos | ||||
| * [byutv] Fix extraction (#14966, #14967) | ||||
| + [bbccouk] Fix extraction for 320k HLS streams | ||||
| + [toutv] Add support for special video URLs (#14179) | ||||
| * [discovery] Fix free videos extraction (#14157, #14954) | ||||
| * [tvnow] Fix extraction (#7831) | ||||
| + [nickelodeon:br] Add support for nickelodeon brazil websites (#14893) | ||||
| * [nick] Improve extraction (#14876) | ||||
| * [tbs] Fix extraction (#13658) | ||||
|  | ||||
|  | ||||
| version 2017.12.10 | ||||
|  | ||||
| Core | ||||
| + [utils] Add sami mimetype to mimetype2ext | ||||
|  | ||||
| Extractors | ||||
| * [culturebox] Improve video id extraction (#14947) | ||||
| * [twitter] Improve extraction (#14197) | ||||
| + [udemy] Extract more HLS formats | ||||
| * [udemy] Improve course id extraction (#14938) | ||||
| + [stretchinternet] Add support for portal.stretchinternet.com (#14576) | ||||
| * [ellentube] Fix extraction (#14407, #14570) | ||||
| + [raiplay:playlist] Add support for playlists (#14563) | ||||
| * [sonyliv] Bypass geo restriction | ||||
| * [sonyliv] Extract higher quality formats (#14922) | ||||
| * [fox] Extract subtitles | ||||
| + [fox] Add support for Adobe Pass authentication (#14205, #14489) | ||||
| - [dailymotion:cloud] Remove extractor (#6794) | ||||
| * [xhamster] Fix thumbnail extraction (#14780) | ||||
| + [xhamster] Add support for mobile URLs (#14780) | ||||
| * [generic] Don't pass video id as mpd id while extracting DASH (#14902) | ||||
| * [ard] Skip invalid stream URLs (#14906) | ||||
| * [porncom] Fix metadata extraction (#14911) | ||||
| * [pluralsight] Detect agreement request (#14913) | ||||
| * [toutv] Fix login (#14614) | ||||
|  | ||||
|  | ||||
| version 2017.12.02 | ||||
|  | ||||
| Core | ||||
| @@ -101,8 +184,8 @@ Extractors | ||||
| + [fxnetworks] Extract series metadata (#14603) | ||||
| + [younow] Add support for younow.com (#9255, #9432, #12436) | ||||
| * [dctptv] Fix extraction (#14599) | ||||
| * [youtube] Restrict embed regex (#14600) | ||||
| * [vimeo] Restrict iframe embed regex (#14600) | ||||
| * [youtube] Restrict embed regular expression (#14600) | ||||
| * [vimeo] Restrict iframe embed regular expression (#14600) | ||||
| * [soundgasm] Improve extraction (#14588) | ||||
| - [myvideo] Remove extractor (#8557) | ||||
| + [nbc] Add support for classic-tv videos (#14575) | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| include README.md | ||||
| include test/*.py | ||||
| include test/*.json | ||||
| include LICENSE | ||||
| include AUTHORS | ||||
| include ChangeLog | ||||
| include youtube-dl.bash-completion | ||||
| include youtube-dl.fish | ||||
| include youtube-dl.1 | ||||
| recursive-include docs Makefile conf.py *.rst | ||||
| recursive-include test * | ||||
|   | ||||
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -110,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in | ||||
| youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) | ||||
| 	$(PYTHON) devscripts/make_lazy_extractors.py $@ | ||||
|  | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS | ||||
| 	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ | ||||
| 		--exclude '*.DS_Store' \ | ||||
| 		--exclude '*.kate-swp' \ | ||||
| @@ -122,7 +122,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- | ||||
| 		--exclude 'docs/_build' \ | ||||
| 		-- \ | ||||
| 		bin devscripts test youtube_dl docs \ | ||||
| 		ChangeLog LICENSE README.md README.txt \ | ||||
| 		ChangeLog AUTHORS LICENSE README.md README.txt \ | ||||
| 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ | ||||
| 		youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \ | ||||
| 		youtube-dl | ||||
|   | ||||
| @@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f | ||||
|  - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage | ||||
|  - `comment_count` (numeric): Number of comments on the video | ||||
|  - `age_limit` (numeric): Age restriction for the video (years) | ||||
|  - `is_live` (boolean): Whether this video is a live stream or a fixed-length video | ||||
|  - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL | ||||
|  - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL | ||||
|  - `format` (string): A human-readable description of the format  | ||||
|  - `format_id` (string): Format code specified by `--format` | ||||
|  - `format_note` (string): Additional info about the format | ||||
| @@ -536,6 +539,8 @@ The basic usage is not to set any template arguments when downloading a single f | ||||
|  - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist | ||||
|  - `playlist_id` (string): Playlist identifier | ||||
|  - `playlist_title` (string): Playlist title | ||||
|  - `playlist_uploader` (string): Full name of the playlist uploader | ||||
|  - `playlist_uploader_id` (string): Nickname or id of the playlist uploader | ||||
|  | ||||
| Available for the video that belongs to some logical chapter or section: | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ | ||||
|  - **56.com** | ||||
|  - **5min** | ||||
|  - **6play** | ||||
|  - **7plus** | ||||
|  - **8tracks** | ||||
|  - **91porn** | ||||
|  - **9c9media** | ||||
| @@ -122,7 +123,6 @@ | ||||
|  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen | ||||
|  - **BuzzFeed** | ||||
|  - **BYUtv** | ||||
|  - **BYUtvEvent** | ||||
|  - **Camdemy** | ||||
|  - **CamdemyFolder** | ||||
|  - **CamWithHer** | ||||
| @@ -198,7 +198,6 @@ | ||||
|  - **dailymotion** | ||||
|  - **dailymotion:playlist** | ||||
|  - **dailymotion:user** | ||||
|  - **DailymotionCloud** | ||||
|  - **DaisukiMotto** | ||||
|  - **DaisukiMottoPlaylist** | ||||
|  - **daum.net** | ||||
| @@ -243,8 +242,9 @@ | ||||
|  - **eHow** | ||||
|  - **Einthusan** | ||||
|  - **eitb.tv** | ||||
|  - **EllenTV** | ||||
|  - **EllenTV:clips** | ||||
|  - **EllenTube** | ||||
|  - **EllenTubePlaylist** | ||||
|  - **EllenTubeVideo** | ||||
|  - **ElPais**: El País | ||||
|  - **Embedly** | ||||
|  - **EMPFlix** | ||||
| @@ -538,6 +538,7 @@ | ||||
|  - **nhl.com:videocenter:category**: NHL videocenter category | ||||
|  - **nick.com** | ||||
|  - **nick.de** | ||||
|  - **nickelodeon:br** | ||||
|  - **nickelodeonru** | ||||
|  - **nicknight** | ||||
|  - **niconico**: ニコニコ動画 | ||||
| @@ -556,8 +557,6 @@ | ||||
|  - **nowness** | ||||
|  - **nowness:playlist** | ||||
|  - **nowness:series** | ||||
|  - **NowTV** (Currently broken) | ||||
|  - **NowTVList** | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **Noz** | ||||
|  - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl | ||||
| @@ -662,6 +661,7 @@ | ||||
|  - **Rai** | ||||
|  - **RaiPlay** | ||||
|  - **RaiPlayLive** | ||||
|  - **RaiPlayPlaylist** | ||||
|  - **RBMARadio** | ||||
|  - **RDS**: RDS.ca | ||||
|  - **RedBullTV** | ||||
| @@ -729,6 +729,7 @@ | ||||
|  - **Servus** | ||||
|  - **Sexu** | ||||
|  - **Shahid** | ||||
|  - **ShahidShow** | ||||
|  - **Shared**: shared.sx | ||||
|  - **ShowRoomLive** | ||||
|  - **Sina** | ||||
| @@ -781,6 +782,7 @@ | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
|  - **StreetVoice** | ||||
|  - **StretchInternet** | ||||
|  - **SunPorno** | ||||
|  - **SVT** | ||||
|  - **SVTPlay**: SVT Play and Öppet arkiv | ||||
| @@ -792,7 +794,7 @@ | ||||
|  - **tagesschau:player** | ||||
|  - **Tass** | ||||
|  - **TastyTrade** | ||||
|  - **TBS** (Currently broken) | ||||
|  - **TBS** | ||||
|  - **TDSLifeway** | ||||
|  - **teachertube**: teachertube.com videos | ||||
|  - **teachertube:user:collection**: teachertube.com user and collection videos | ||||
| @@ -863,6 +865,8 @@ | ||||
|  - **tvland.com** | ||||
|  - **TVN24** | ||||
|  - **TVNoe** | ||||
|  - **TVNow** | ||||
|  - **TVNowList** | ||||
|  - **tvp**: Telewizja Polska | ||||
|  - **tvp:embed**: Telewizja Polska | ||||
|  - **tvp:series** | ||||
|   | ||||
							
								
								
									
										1
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
									
									
									
									
								
							| @@ -109,6 +109,7 @@ setup( | ||||
|     author_email='ytdl@yt-dl.org', | ||||
|     maintainer='Sergey M.', | ||||
|     maintainer_email='dstftw@gmail.com', | ||||
|     license='Unlicense', | ||||
|     packages=[ | ||||
|         'youtube_dl', | ||||
|         'youtube_dl.extractor', 'youtube_dl.downloader', | ||||
|   | ||||
| @@ -343,6 +343,7 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) | ||||
|         self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) | ||||
|         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) | ||||
|         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) | ||||
|  | ||||
|     def test_determine_ext(self): | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') | ||||
|   | ||||
| @@ -975,6 +975,8 @@ class YoutubeDL(object): | ||||
|                     'playlist': playlist, | ||||
|                     'playlist_id': ie_result.get('id'), | ||||
|                     'playlist_title': ie_result.get('title'), | ||||
|                     'playlist_uploader': ie_result.get('uploader'), | ||||
|                     'playlist_uploader_id': ie_result.get('uploader_id'), | ||||
|                     'playlist_index': i + playliststart, | ||||
|                     'extractor': ie_result['extractor'], | ||||
|                     'webpage_url': ie_result['webpage_url'], | ||||
|   | ||||
| @@ -112,7 +112,7 @@ class FragmentFD(FileDownloader): | ||||
|             if self.__do_ytdl_file(ctx): | ||||
|                 self._write_ytdl_file(ctx) | ||||
|             if not self.params.get('keep_fragments', False): | ||||
|                 os.remove(ctx['fragment_filename_sanitized']) | ||||
|                 os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) | ||||
|             del ctx['fragment_filename_sanitized'] | ||||
|  | ||||
|     def _prepare_frag_download(self, ctx): | ||||
|   | ||||
| @@ -284,8 +284,7 @@ class HttpFD(FileDownloader): | ||||
|         while count <= retries: | ||||
|             try: | ||||
|                 establish_connection() | ||||
|                 download() | ||||
|                 return True | ||||
|                 return download() | ||||
|             except RetryDownload as e: | ||||
|                 count += 1 | ||||
|                 if count <= retries: | ||||
|   | ||||
| @@ -228,10 +228,19 @@ class AfreecaTVIE(InfoExtractor): | ||||
|                     r'^(\d{8})_', key, 'upload date', default=None) | ||||
|                 file_duration = int_or_none(file_element.get('duration')) | ||||
|                 format_id = key if key else '%s_%s' % (video_id, file_num) | ||||
|                 formats = self._extract_m3u8_formats( | ||||
|                     file_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', | ||||
|                     note='Downloading part %d m3u8 information' % file_num) | ||||
|                 if determine_ext(file_url) == 'm3u8': | ||||
|                     formats = self._extract_m3u8_formats( | ||||
|                         file_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id='hls', | ||||
|                         note='Downloading part %d m3u8 information' % file_num) | ||||
|                 else: | ||||
|                     formats = [{ | ||||
|                         'url': file_url, | ||||
|                         'format_id': 'http', | ||||
|                     }] | ||||
|                 if not formats: | ||||
|                     continue | ||||
|                 self._sort_formats(formats) | ||||
|                 file_info = common_entry.copy() | ||||
|                 file_info.update({ | ||||
|                     'id': format_id, | ||||
|   | ||||
| @@ -85,8 +85,8 @@ class AnimeOnDemandIE(InfoExtractor): | ||||
|  | ||||
|         if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): | ||||
|             error = self._search_regex( | ||||
|                 r'<p class="alert alert-danger">(.+?)</p>', | ||||
|                 response, 'error', default=None) | ||||
|                 r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>', | ||||
|                 response, 'error', default=None, group='error') | ||||
|             if error: | ||||
|                 raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||
|             raise ExtractorError('Unable to log in') | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .generic import GenericIE | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
| @@ -126,6 +127,8 @@ class ARDMediathekIE(InfoExtractor): | ||||
|                 quality = stream.get('_quality') | ||||
|                 server = stream.get('_server') | ||||
|                 for stream_url in stream_urls: | ||||
|                     if not isinstance(stream_url, compat_str) or '//' not in stream_url: | ||||
|                         continue | ||||
|                     ext = determine_ext(stream_url) | ||||
|                     if quality != 'auto' and ext in ('f4m', 'm3u8'): | ||||
|                         continue | ||||
| @@ -146,13 +149,11 @@ class ARDMediathekIE(InfoExtractor): | ||||
|                                 'play_path': stream_url, | ||||
|                                 'format_id': 'a%s-rtmp-%s' % (num, quality), | ||||
|                             } | ||||
|                         elif stream_url.startswith('http'): | ||||
|                         else: | ||||
|                             f = { | ||||
|                                 'url': stream_url, | ||||
|                                 'format_id': 'a%s-%s-%s' % (num, ext, quality) | ||||
|                             } | ||||
|                         else: | ||||
|                             continue | ||||
|                         m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) | ||||
|                         if m: | ||||
|                             f.update({ | ||||
|   | ||||
							
								
								
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import hashlib | ||||
| import hmac | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class AWSIE(InfoExtractor): | ||||
|     _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' | ||||
|     _AWS_REGION = 'us-east-1' | ||||
|  | ||||
|     def _aws_execute_api(self, aws_dict, video_id, query=None): | ||||
|         query = query or {} | ||||
|         amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | ||||
|         date = amz_date[:8] | ||||
|         headers = { | ||||
|             'Accept': 'application/json', | ||||
|             'Host': self._AWS_PROXY_HOST, | ||||
|             'X-Amz-Date': amz_date, | ||||
|         } | ||||
|         session_token = aws_dict.get('session_token') | ||||
|         if session_token: | ||||
|             headers['X-Amz-Security-Token'] = session_token | ||||
|         headers['X-Api-Key'] = self._AWS_API_KEY | ||||
|  | ||||
|         def aws_hash(s): | ||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||
|         canonical_querystring = compat_urllib_parse_urlencode(query) | ||||
|         canonical_headers = '' | ||||
|         for header_name, header_value in headers.items(): | ||||
|             canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) | ||||
|         signed_headers = ';'.join([header.lower() for header in headers.keys()]) | ||||
|         canonical_request = '\n'.join([ | ||||
|             'GET', | ||||
|             aws_dict['uri'], | ||||
|             canonical_querystring, | ||||
|             canonical_headers, | ||||
|             signed_headers, | ||||
|             aws_hash('') | ||||
|         ]) | ||||
|  | ||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||
|         credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] | ||||
|         credential_scope = '/'.join(credential_scope_list) | ||||
|         string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) | ||||
|  | ||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||
|         def aws_hmac(key, msg): | ||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) | ||||
|  | ||||
|         def aws_hmac_digest(key, msg): | ||||
|             return aws_hmac(key, msg).digest() | ||||
|  | ||||
|         def aws_hmac_hexdigest(key, msg): | ||||
|             return aws_hmac(key, msg).hexdigest() | ||||
|  | ||||
|         k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') | ||||
|         for value in credential_scope_list: | ||||
|             k_signing = aws_hmac_digest(k_signing, value) | ||||
|  | ||||
|         signature = aws_hmac_hexdigest(k_signing, string_to_sign) | ||||
|  | ||||
|         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html | ||||
|         headers['Authorization'] = ', '.join([ | ||||
|             '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), | ||||
|             'SignedHeaders=%s' % signed_headers, | ||||
|             'Signature=%s' % signature, | ||||
|         ]) | ||||
|  | ||||
|         return self._download_json( | ||||
|             'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), | ||||
|             video_id, headers=headers) | ||||
| @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                             m3u8_id=format_id, fatal=False)) | ||||
|                         if re.search(self._USP_RE, href): | ||||
|                             usp_formats = self._extract_m3u8_formats( | ||||
|                                 re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), | ||||
|                                 re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), | ||||
|                                 programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                                 m3u8_id=format_id, fatal=False) | ||||
|                             for f in usp_formats: | ||||
|   | ||||
| @@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|             'timestamp': 1441391203, | ||||
|             'upload_date': '20150904', | ||||
|             'uploader_id': '929656772001', | ||||
|             'formats': 'mincount:22', | ||||
|             'formats': 'mincount:20', | ||||
|         }, | ||||
|     }, { | ||||
|         # with rtmp streams | ||||
| @@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|             'timestamp': 1433556729, | ||||
|             'upload_date': '20150606', | ||||
|             'uploader_id': '4036320279001', | ||||
|             'formats': 'mincount:41', | ||||
|             'formats': 'mincount:39', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
| @@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|  | ||||
|         return entries | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|  | ||||
|         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://players.brightcove.net/%s/%s_%s/index.min.js' | ||||
|             % (account_id, player_id, embed), video_id) | ||||
|  | ||||
|         policy_key = None | ||||
|  | ||||
|         catalog = self._search_regex( | ||||
|             r'catalog\(({.+?})\);', webpage, 'catalog', default=None) | ||||
|         if catalog: | ||||
|             catalog = self._parse_json( | ||||
|                 js_to_json(catalog), video_id, fatal=False) | ||||
|             if catalog: | ||||
|                 policy_key = catalog.get('policyKey') | ||||
|  | ||||
|         if not policy_key: | ||||
|             policy_key = self._search_regex( | ||||
|                 r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', | ||||
|                 webpage, 'policy key', group='pk') | ||||
|  | ||||
|         api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) | ||||
|         try: | ||||
|             json_data = self._download_json(api_url, video_id, headers={ | ||||
|                 'Accept': 'application/json;pk=%s' % policy_key | ||||
|             }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0] | ||||
|                 message = json_data.get('message') or json_data['error_code'] | ||||
|                 if json_data.get('error_subcode') == 'CLIENT_GEO': | ||||
|                     self.raise_geo_restricted(msg=message) | ||||
|                 raise ExtractorError(message, expected=True) | ||||
|             raise | ||||
|  | ||||
|         errors = json_data.get('errors') | ||||
|         if errors and errors[0].get('error_subcode') == 'TVE_AUTH': | ||||
|             custom_fields = json_data['custom_fields'] | ||||
|             tve_token = self._extract_mvpd_auth( | ||||
|                 smuggled_data['source_url'], video_id, | ||||
|                 custom_fields['bcadobepassrequestorid'], | ||||
|                 custom_fields['bcadobepassresourceid']) | ||||
|             json_data = self._download_json( | ||||
|                 api_url, video_id, headers={ | ||||
|                     'Accept': 'application/json;pk=%s' % policy_key | ||||
|                 }, query={ | ||||
|                     'tveToken': tve_token, | ||||
|                 }) | ||||
|  | ||||
|     def _parse_brightcove_metadata(self, json_data, video_id): | ||||
|         title = json_data['name'].strip() | ||||
|  | ||||
|         formats = [] | ||||
| @@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|                     }) | ||||
|                 formats.append(f) | ||||
|  | ||||
|         errors = json_data.get('errors') | ||||
|         if not formats and errors: | ||||
|             error = errors[0] | ||||
|             raise ExtractorError( | ||||
| @@ -708,9 +657,64 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), | ||||
|             'duration': duration, | ||||
|             'timestamp': parse_iso8601(json_data.get('published_at')), | ||||
|             'uploader_id': account_id, | ||||
|             'uploader_id': json_data.get('account_id'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'tags': json_data.get('tags', []), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|  | ||||
|         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://players.brightcove.net/%s/%s_%s/index.min.js' | ||||
|             % (account_id, player_id, embed), video_id) | ||||
|  | ||||
|         policy_key = None | ||||
|  | ||||
|         catalog = self._search_regex( | ||||
|             r'catalog\(({.+?})\);', webpage, 'catalog', default=None) | ||||
|         if catalog: | ||||
|             catalog = self._parse_json( | ||||
|                 js_to_json(catalog), video_id, fatal=False) | ||||
|             if catalog: | ||||
|                 policy_key = catalog.get('policyKey') | ||||
|  | ||||
|         if not policy_key: | ||||
|             policy_key = self._search_regex( | ||||
|                 r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', | ||||
|                 webpage, 'policy key', group='pk') | ||||
|  | ||||
|         api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) | ||||
|         try: | ||||
|             json_data = self._download_json(api_url, video_id, headers={ | ||||
|                 'Accept': 'application/json;pk=%s' % policy_key | ||||
|             }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0] | ||||
|                 message = json_data.get('message') or json_data['error_code'] | ||||
|                 if json_data.get('error_subcode') == 'CLIENT_GEO': | ||||
|                     self.raise_geo_restricted(msg=message) | ||||
|                 raise ExtractorError(message, expected=True) | ||||
|             raise | ||||
|  | ||||
|         errors = json_data.get('errors') | ||||
|         if errors and errors[0].get('error_subcode') == 'TVE_AUTH': | ||||
|             custom_fields = json_data['custom_fields'] | ||||
|             tve_token = self._extract_mvpd_auth( | ||||
|                 smuggled_data['source_url'], video_id, | ||||
|                 custom_fields['bcadobepassrequestorid'], | ||||
|                 custom_fields['bcadobepassresourceid']) | ||||
|             json_data = self._download_json( | ||||
|                 api_url, video_id, headers={ | ||||
|                     'Accept': 'application/json;pk=%s' % policy_key | ||||
|                 }, query={ | ||||
|                     'tveToken': tve_token, | ||||
|                 }) | ||||
|  | ||||
|         return self._parse_brightcove_metadata(json_data, video_id) | ||||
|   | ||||
| @@ -3,20 +3,19 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class BYUtvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', | ||||
|         'info_dict': { | ||||
|             'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', | ||||
|             'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', | ||||
|             'display_id': 'studio-c-season-5-episode-5', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Season 5 Episode 5', | ||||
|             'description': 'md5:e07269172baff037f8e8bf9956bc9747', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', | ||||
|             'thumbnail': r're:^https?://.*', | ||||
|             'duration': 1486.486, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         episode_code = self._search_regex( | ||||
|             r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') | ||||
|  | ||||
|         ep = self._parse_json( | ||||
|             episode_code, display_id, transform_source=lambda s: | ||||
|             re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) | ||||
|  | ||||
|         if ep['providerType'] != 'Ooyala': | ||||
|             raise ExtractorError('Unsupported provider %s' % ep['provider']) | ||||
|         ep = self._download_json( | ||||
|             'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, | ||||
|             query={ | ||||
|                 'contentid': video_id, | ||||
|                 'channel': 'byutv', | ||||
|                 'x-byutv-context': 'web$US', | ||||
|             }, headers={ | ||||
|                 'x-byutv-context': 'web$US', | ||||
|                 'x-byutv-platformkey': 'xsaaw9c7y5', | ||||
|             })['ooyalaVOD'] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
| @@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor): | ||||
|             'url': 'ooyala:%s' % ep['providerId'], | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': ep['title'], | ||||
|             'title': ep.get('title'), | ||||
|             'description': ep.get('description'), | ||||
|             'thumbnail': ep.get('imageThumbnail'), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BYUtvEventIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', | ||||
|         'info_dict': { | ||||
|             'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Toledo vs. BYU (9/30/16)', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         ooyala_id = self._search_regex( | ||||
|             r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | ||||
|             webpage, 'ooyala id', group='id') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage, | ||||
|             'title').strip() | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'Ooyala', | ||||
|             'url': 'ooyala:%s' % ooyala_id, | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|         } | ||||
|   | ||||
| @@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE): | ||||
|  | ||||
|         info_dict = self._extract_anvato_videos(webpage, display_id) | ||||
|  | ||||
|         time_str = self._html_search_regex( | ||||
|             r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) | ||||
|         if time_str: | ||||
|             timestamp = unified_timestamp(time_str) | ||||
|         else: | ||||
|             timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
|             r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, | ||||
|             'released date', default=None)) or parse_iso8601( | ||||
|             self._html_search_meta('uploadDate', webpage)) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|   | ||||
| @@ -301,8 +301,9 @@ class InfoExtractor(object): | ||||
|     There must be a key "entries", which is a list, an iterable, or a PagedList | ||||
|     object, each element of which is a valid dictionary by this specification. | ||||
|  | ||||
|     Additionally, playlists can have "title", "description" and "id" attributes | ||||
|     with the same semantics as videos (see above). | ||||
|     Additionally, playlists can have "id", "title", "description", "uploader", | ||||
|     "uploader_id", "uploader_url" attributes with the same semantics as videos | ||||
|     (see above). | ||||
|  | ||||
|  | ||||
|     _type "multi_video" indicates that there are multiple videos that | ||||
| @@ -494,6 +495,16 @@ class InfoExtractor(object): | ||||
|                 self.to_screen('%s' % (note,)) | ||||
|             else: | ||||
|                 self.to_screen('%s: %s' % (video_id, note)) | ||||
|  | ||||
|         # Some sites check X-Forwarded-For HTTP header in order to figure out | ||||
|         # the origin of the client behind proxy. This allows bypassing geo | ||||
|         # restriction by faking this header's value to IP that belongs to some | ||||
|         # geo unrestricted country. We will do so once we encounter any | ||||
|         # geo restriction error. | ||||
|         if self._x_forwarded_for_ip: | ||||
|             if 'X-Forwarded-For' not in headers: | ||||
|                 headers['X-Forwarded-For'] = self._x_forwarded_for_ip | ||||
|  | ||||
|         if isinstance(url_or_request, compat_urllib_request.Request): | ||||
|             url_or_request = update_Request( | ||||
|                 url_or_request, data=data, headers=headers, query=query) | ||||
| @@ -523,15 +534,6 @@ class InfoExtractor(object): | ||||
|         if isinstance(url_or_request, (compat_str, str)): | ||||
|             url_or_request = url_or_request.partition('#')[0] | ||||
|  | ||||
|         # Some sites check X-Forwarded-For HTTP header in order to figure out | ||||
|         # the origin of the client behind proxy. This allows bypassing geo | ||||
|         # restriction by faking this header's value to IP that belongs to some | ||||
|         # geo unrestricted country. We will do so once we encounter any | ||||
|         # geo restriction error. | ||||
|         if self._x_forwarded_for_ip: | ||||
|             if 'X-Forwarded-For' not in headers: | ||||
|                 headers['X-Forwarded-For'] = self._x_forwarded_for_ip | ||||
|  | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) | ||||
|         if urlh is False: | ||||
|             assert not fatal | ||||
|   | ||||
| @@ -392,7 +392,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                 'Downloading subtitles for ' + sub_name, data={ | ||||
|                     'subtitle_script_id': sub_id, | ||||
|                 }) | ||||
|             if not sub_doc: | ||||
|             if sub_doc is None: | ||||
|                 continue | ||||
|             sid = sub_doc.get('id') | ||||
|             iv = xpath_text(sub_doc, 'iv', 'subtitle iv') | ||||
| @@ -479,9 +479,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                     'video_quality': stream_quality, | ||||
|                     'current_page': url, | ||||
|                 }) | ||||
|             if streamdata: | ||||
|             if streamdata is not None: | ||||
|                 stream_info = streamdata.find('./{default}preload/stream_info') | ||||
|                 if stream_info: | ||||
|                 if stream_info is not None: | ||||
|                     stream_infos.append(stream_info) | ||||
|             stream_info = self._call_rpc_api( | ||||
|                 'VideoEncode_GetStreamInfo', video_id, | ||||
| @@ -490,7 +490,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                     'video_format': stream_format, | ||||
|                     'video_encode_quality': stream_quality, | ||||
|                 }) | ||||
|             if stream_info: | ||||
|             if stream_info is not None: | ||||
|                 stream_infos.append(stream_info) | ||||
|             for stream_info in stream_infos: | ||||
|                 video_encode_id = xpath_text(stream_info, './video_encode_id') | ||||
|   | ||||
| @@ -4,13 +4,14 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
|     find_xpath_attr, | ||||
|     smuggle_url, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     extract_attributes, | ||||
|     find_xpath_attr, | ||||
|     get_element_by_class, | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from .senateisvp import SenateISVPIE | ||||
| from .ustream import UstreamIE | ||||
| @@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor): | ||||
|             'uploader': 'HouseCommittee', | ||||
|             'uploader_id': '12987475', | ||||
|         }, | ||||
|     }, { | ||||
|         # Audio Only | ||||
|         'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' | ||||
|  | ||||
| @@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor): | ||||
|                     title = self._og_search_title(webpage) | ||||
|                     surl = smuggle_url(senate_isvp_url, {'force_title': title}) | ||||
|                     return self.url_result(surl, 'SenateISVP', video_id, title) | ||||
|                 video_id = self._search_regex( | ||||
|                     r'jwsetup\.clipprog\s*=\s*(\d+);', | ||||
|                     webpage, 'jwsetup program id', default=None) | ||||
|                 if video_id: | ||||
|                     video_type = 'program' | ||||
|         if video_type is None or video_id is None: | ||||
|             error_message = get_element_by_class('VLplayer-error-message', webpage) | ||||
|             if error_message: | ||||
|                 raise ExtractorError(error_message) | ||||
|             raise ExtractorError('unable to find video id and type') | ||||
|  | ||||
|         def get_text_attr(d, attr): | ||||
| @@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor): | ||||
|         entries = [] | ||||
|         for partnum, f in enumerate(files): | ||||
|             formats = [] | ||||
|             for quality in f['qualities']: | ||||
|             for quality in f.get('qualities', []): | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), | ||||
|                     'url': unescapeHTML(get_text_attr(quality, 'file')), | ||||
|   | ||||
| @@ -413,52 +413,3 @@ class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|             'title': full_user, | ||||
|             'entries': self._extract_entries(user), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DailymotionCloudIE(DailymotionBaseInfoExtractor): | ||||
|     _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/' | ||||
|     _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX | ||||
|     _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html | ||||
|         # Tested at FranceTvInfo_2 | ||||
|         'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html | ||||
|         'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_dmcloud_url(cls, webpage): | ||||
|         mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage) | ||||
|         if mobj: | ||||
|             return mobj.group(1) | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group(1) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage_no_ff(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title') | ||||
|  | ||||
|         video_info = self._parse_json(self._search_regex( | ||||
|             r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) | ||||
|  | ||||
|         # TODO: parse ios_url, which is in fact a manifest | ||||
|         video_url = video_info['mp4_url'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': video_info.get('thumbnail_url'), | ||||
|         } | ||||
|   | ||||
| @@ -1,14 +1,18 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| import random | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from .discoverygo import DiscoveryGoBaseIE | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     ExtractorError, | ||||
|     update_url_query, | ||||
| ) | ||||
| from ..compat import compat_str | ||||
| from ..compat import compat_HTTPError | ||||
|  | ||||
|  | ||||
| class DiscoveryIE(InfoExtractor): | ||||
| class DiscoveryIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?: | ||||
|             discovery| | ||||
|             investigationdiscovery| | ||||
| @@ -19,79 +23,65 @@ class DiscoveryIE(InfoExtractor): | ||||
|             sciencechannel| | ||||
|             tlc| | ||||
|             velocity | ||||
|         )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)''' | ||||
|         )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', | ||||
|         'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', | ||||
|         'info_dict': { | ||||
|             'id': '20769', | ||||
|             'id': '5a2d9b4d6b66d17a5026e1fd', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Mission Impossible Outtakes', | ||||
|             'description': ('Watch Jamie Hyneman and Adam Savage practice being' | ||||
|                             ' each other -- to the point of confusing Jamie\'s dog -- and ' | ||||
|                             'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' | ||||
|                             ' back.'), | ||||
|             'duration': 156, | ||||
|             'timestamp': 1302032462, | ||||
|             'upload_date': '20110405', | ||||
|             'uploader_id': '103207', | ||||
|             'title': 'Dave Foley', | ||||
|             'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', | ||||
|             'duration': 608, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons', | ||||
|         'info_dict': { | ||||
|             'id': 'mythbusters-the-simpsons', | ||||
|             'title': 'MythBusters: The Simpsons', | ||||
|         }, | ||||
|         'playlist_mincount': 10, | ||||
|     }, { | ||||
|         'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', | ||||
|         'info_dict': { | ||||
|             'id': '78326', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Longfin Eels: Maneaters?', | ||||
|             'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.', | ||||
|             'upload_date': '20140725', | ||||
|             'timestamp': 1406246400, | ||||
|             'duration': 116, | ||||
|             'uploader_id': '103207', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         } | ||||
|         'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _GEO_BYPASS = False | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         info = self._download_json(url + '?flat=1', display_id) | ||||
|         path, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_title = info.get('playlist_title') or info.get('video_title') | ||||
|         react_data = self._parse_json(self._search_regex( | ||||
|             r'window\.__reactTransmitPacket\s*=\s*({.+?});', | ||||
|             webpage, 'react data'), display_id) | ||||
|         content_blocks = react_data['layout'][path]['contentBlocks'] | ||||
|         video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] | ||||
|         video_id = video['id'] | ||||
|  | ||||
|         entries = [] | ||||
|         access_token = self._download_json( | ||||
|             'https://www.discovery.com/anonymous', display_id, query={ | ||||
|                 'authLink': update_url_query( | ||||
|                     'https://login.discovery.com/v1/oauth2/authorize', { | ||||
|                         'client_id': react_data['application']['apiClientId'], | ||||
|                         'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html', | ||||
|                         'response_type': 'anonymous', | ||||
|                         'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]), | ||||
|                     }) | ||||
|             })['access_token'] | ||||
|  | ||||
|         for idx, video_info in enumerate(info['playlist']): | ||||
|             subtitles = {} | ||||
|             caption_url = video_info.get('captionsUrl') | ||||
|             if caption_url: | ||||
|                 subtitles = { | ||||
|                     'en': [{ | ||||
|                         'url': caption_url, | ||||
|                     }] | ||||
|                 } | ||||
|         try: | ||||
|             stream = self._download_json( | ||||
|                 'https://api.discovery.com/v1/streaming/video/' + video_id, | ||||
|                 display_id, headers={ | ||||
|                     'Authorization': 'Bearer ' + access_token, | ||||
|                 }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 e_description = self._parse_json( | ||||
|                     e.cause.read().decode(), display_id)['description'] | ||||
|                 if 'resource not available for country' in e_description: | ||||
|                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | ||||
|                 if 'Authorized Networks' in e_description: | ||||
|                     raise ExtractorError( | ||||
|                         'This video is only available via cable service provider subscription that' | ||||
|                         ' is not currently supported. You may want to use --cookies.', expected=True) | ||||
|                 raise ExtractorError(e_description) | ||||
|             raise | ||||
|  | ||||
|             entries.append({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'], | ||||
|                 'id': compat_str(video_info['id']), | ||||
|                 'title': video_info['title'], | ||||
|                 'description': video_info.get('description'), | ||||
|                 'duration': parse_duration(video_info.get('video_length')), | ||||
|                 'webpage_url': video_info.get('href') or video_info.get('url'), | ||||
|                 'thumbnail': video_info.get('thumbnailURL'), | ||||
|                 'alt_title': video_info.get('secondary_title'), | ||||
|                 'timestamp': parse_iso8601(video_info.get('publishedDate')), | ||||
|                 'subtitles': subtitles, | ||||
|             }) | ||||
|  | ||||
|         return self.playlist_result(entries, display_id, video_title) | ||||
|         return self._extract_video_info(video, stream, display_id) | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| @@ -27,42 +28,9 @@ class DiscoveryGoBaseIE(InfoExtractor): | ||||
|             velocitychannel | ||||
|         )go\.com/%s(?P<id>[^/?#&]+)''' | ||||
|  | ||||
|  | ||||
| class DiscoveryGoIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TEST = { | ||||
|         'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', | ||||
|         'info_dict': { | ||||
|             'id': '58c167d86b66d12f2addeb01', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Reaper Madness', | ||||
|             'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', | ||||
|             'duration': 2519, | ||||
|             'series': 'Bering Sea Gold', | ||||
|             'season_number': 8, | ||||
|             'episode_number': 6, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         container = extract_attributes( | ||||
|             self._search_regex( | ||||
|                 r'(<div[^>]+class=["\']video-player-container[^>]+>)', | ||||
|                 webpage, 'video container')) | ||||
|  | ||||
|         video = self._parse_json( | ||||
|             container.get('data-video') or container.get('data-json'), | ||||
|             display_id) | ||||
|  | ||||
|     def _extract_video_info(self, video, stream, display_id): | ||||
|         title = video['name'] | ||||
|  | ||||
|         stream = video.get('stream') | ||||
|         if not stream: | ||||
|             if video.get('authenticated') is True: | ||||
|                 raise ExtractorError( | ||||
| @@ -106,7 +74,11 @@ class DiscoveryGoIE(DiscoveryGoBaseIE): | ||||
|                         not subtitle_url.startswith('http')): | ||||
|                     continue | ||||
|                 lang = caption.get('fileLang', 'en') | ||||
|                 subtitles.setdefault(lang, []).append({'url': subtitle_url}) | ||||
|                 ext = determine_ext(subtitle_url) | ||||
|                 subtitles.setdefault(lang, []).append({ | ||||
|                     'url': subtitle_url, | ||||
|                     'ext': 'ttml' if ext == 'xml' else ext, | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -124,6 +96,43 @@ class DiscoveryGoIE(DiscoveryGoBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DiscoveryGoIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TEST = { | ||||
|         'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', | ||||
|         'info_dict': { | ||||
|             'id': '58c167d86b66d12f2addeb01', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Reaper Madness', | ||||
|             'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', | ||||
|             'duration': 2519, | ||||
|             'series': 'Bering Sea Gold', | ||||
|             'season_number': 8, | ||||
|             'episode_number': 6, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         container = extract_attributes( | ||||
|             self._search_regex( | ||||
|                 r'(<div[^>]+class=["\']video-player-container[^>]+>)', | ||||
|                 webpage, 'video container')) | ||||
|  | ||||
|         video = self._parse_json( | ||||
|             container.get('data-video') or container.get('data-json'), | ||||
|             display_id) | ||||
|  | ||||
|         stream = video.get('stream') | ||||
|  | ||||
|         return self._extract_video_info(video, stream, display_id) | ||||
|  | ||||
|  | ||||
| class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' | ||||
|     _TEST = { | ||||
|   | ||||
| @@ -10,6 +10,7 @@ from ..utils import ( | ||||
|     compat_str, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor): | ||||
|                 continue | ||||
|             tbr = int_or_none(flavor.get('bitrate')) | ||||
|             if tbr == 99999: | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                 # wrong ks(Kaltura Signature) causes 404 Error | ||||
|                 flavor_url = update_url_query(flavor_url, {'ks': ''}) | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     flavor_url, video_id, 'mp4', | ||||
|                     m3u8_id=flavor_format, fatal=False)) | ||||
|                     m3u8_id=flavor_format, fatal=False) | ||||
|                 for f in m3u8_formats: | ||||
|                     # Apple FairPlay | ||||
|                     if '/fpshls/' in f['url']: | ||||
|                         continue | ||||
|                     formats.append(f) | ||||
|                 continue | ||||
|             format_id = [] | ||||
|             if flavor_format: | ||||
|   | ||||
							
								
								
									
										133
									
								
								youtube_dl/extractor/ellentube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								youtube_dl/extractor/ellentube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     extract_attributes, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EllenTubeBaseIE(InfoExtractor): | ||||
|     def _extract_data_config(self, webpage, video_id): | ||||
|         details = self._search_regex( | ||||
|             r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage, | ||||
|             'details') | ||||
|         return self._parse_json( | ||||
|             extract_attributes(details)['data-config'], video_id) | ||||
|  | ||||
|     def _extract_video(self, data, video_id): | ||||
|         title = data['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         duration = None | ||||
|         for entry in data.get('media'): | ||||
|             if entry.get('id') == 'm3u8': | ||||
|                 formats = self._extract_m3u8_formats( | ||||
|                     entry['url'], video_id, 'mp4', | ||||
|                     entry_protocol='m3u8_native', m3u8_id='hls') | ||||
|                 duration = int_or_none(entry.get('duration')) | ||||
|                 break | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         def get_insight(kind): | ||||
|             return int_or_none(try_get( | ||||
|                 data, lambda x: x['insight']['%ss' % kind])) | ||||
|  | ||||
|         return { | ||||
|             'extractor_key': EllenTubeIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': data.get('description'), | ||||
|             'duration': duration, | ||||
|             'thumbnail': data.get('thumbnail'), | ||||
|             'timestamp': float_or_none(data.get('publishTime'), scale=1000), | ||||
|             'view_count': get_insight('view'), | ||||
|             'like_count': get_insight('like'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class EllenTubeIE(EllenTubeBaseIE): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         (?: | ||||
|                             ellentube:| | ||||
|                             https://api-prod\.ellentube\.com/ellenapi/api/item/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3', | ||||
|         'md5': '2fabc277131bddafdd120e0fc0f974c9', | ||||
|         'info_dict': { | ||||
|             'id': '0822171c-3829-43bf-b99f-d77358ae75e3', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck', | ||||
|             'description': 'md5:76e3355e2242a78ad9e3858e5616923f', | ||||
|             'thumbnail': r're:^https?://.+?', | ||||
|             'duration': 514, | ||||
|             'timestamp': 1508505120, | ||||
|             'upload_date': '20171020', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         data = self._download_json( | ||||
|             'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id, | ||||
|             video_id) | ||||
|         return self._extract_video(data, video_id) | ||||
|  | ||||
|  | ||||
| class EllenTubeVideoIE(EllenTubeBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._extract_data_config(webpage, display_id)['id'] | ||||
|         return self.url_result( | ||||
|             'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(), | ||||
|             video_id=video_id) | ||||
|  | ||||
|  | ||||
| class EllenTubePlaylistIE(EllenTubeBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html', | ||||
|         'info_dict': { | ||||
|             'id': 'dax-shepard-jordan-fisher-haim', | ||||
|             'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM", | ||||
|             'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c', | ||||
|         }, | ||||
|         'playlist_count': 6, | ||||
|     }, { | ||||
|         'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         data = self._extract_data_config(webpage, display_id)['data'] | ||||
|         feed = self._download_json( | ||||
|             'https://api-prod.ellentube.com/ellenapi/api/feed/?%s' | ||||
|             % data['filter'], display_id) | ||||
|         entries = [ | ||||
|             self._extract_video(elem, elem['id']) | ||||
|             for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')] | ||||
|         return self.playlist_result( | ||||
|             entries, display_id, data.get('title'), | ||||
|             clean_html(data.get('description'))) | ||||
| @@ -1,101 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
| from ..utils import NO_DEFAULT | ||||
|  | ||||
|  | ||||
| class EllenTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', | ||||
|         'md5': '4294cf98bc165f218aaa0b89e0fd8042', | ||||
|         'info_dict': { | ||||
|             'id': '0_ipq1gsai', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Fast Fingers of Fate', | ||||
|             'description': 'md5:3539013ddcbfa64b2a6d1b38d910868a', | ||||
|             'timestamp': 1428035648, | ||||
|             'upload_date': '20150403', | ||||
|             'uploader_id': 'batchUser', | ||||
|         }, | ||||
|     }, { | ||||
|         # not available via http://widgets.ellentube.com/ | ||||
|         'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', | ||||
|         'info_dict': { | ||||
|             'id': '1_szkgu2m2', | ||||
|             'ext': 'flv', | ||||
|             'title': "Ellen's Amazingly Talented Audience", | ||||
|             'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', | ||||
|             'timestamp': 1255140900, | ||||
|             'upload_date': '20091010', | ||||
|             'uploader_id': 'ellenkaltura@gmail.com', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) | ||||
|  | ||||
|         for num, url_ in enumerate(URLS, 1): | ||||
|             webpage = self._download_webpage( | ||||
|                 url_, video_id, fatal=num == len(URLS)) | ||||
|  | ||||
|             default = NO_DEFAULT if num == len(URLS) else None | ||||
|  | ||||
|             partner_id = self._search_regex( | ||||
|                 r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', | ||||
|                 default=default) | ||||
|  | ||||
|             kaltura_id = self._search_regex( | ||||
|                 [r'id="kaltura_player_([^"]+)"', | ||||
|                  r"_wb_entry_id\s*:\s*'([^']+)", | ||||
|                  r'data-kaltura-entry-id="([^"]+)'], | ||||
|                 webpage, 'kaltura id', default=default) | ||||
|  | ||||
|             if partner_id and kaltura_id: | ||||
|                 break | ||||
|  | ||||
|         return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) | ||||
|  | ||||
|  | ||||
| class EllenTVClipsIE(InfoExtractor): | ||||
|     IE_NAME = 'EllenTV:clips' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/', | ||||
|         'info_dict': { | ||||
|             'id': 'meryl-streep-vanessa-hudgens', | ||||
|             'title': 'Meryl Streep, Vanessa Hudgens', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         playlist = self._extract_playlist(webpage, playlist_id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'entries': self._extract_entries(playlist) | ||||
|         } | ||||
|  | ||||
|     def _extract_playlist(self, webpage, playlist_id): | ||||
|         json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') | ||||
|         return self._parse_json('[{' + json_string + '}]', playlist_id) | ||||
|  | ||||
|     def _extract_entries(self, playlist): | ||||
|         return [ | ||||
|             self.url_result( | ||||
|                 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), | ||||
|                 KalturaIE.ie_key(), video_id=item['kaltura_entry_id']) | ||||
|             for item in playlist] | ||||
| @@ -1,6 +1,9 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .once import OnceIE | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| @@ -9,22 +12,27 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ESPNIE(InfoExtractor): | ||||
| class ESPNIE(OnceIE): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:(?:\w+\.)+)?espn\.go| | ||||
|                             (?:www\.)?espn | ||||
|                         )\.com/ | ||||
|                         (?: | ||||
|                             (?: | ||||
|                                 video/clip| | ||||
|                                 watch/player | ||||
|                             ) | ||||
|                             (?: | ||||
|                                 \?.*?\bid=| | ||||
|                                 /_/id/ | ||||
|                             ) | ||||
|                                 (?: | ||||
|                                     (?:(?:\w+\.)+)?espn\.go| | ||||
|                                     (?:www\.)?espn | ||||
|                                 )\.com/ | ||||
|                                 (?: | ||||
|                                     (?: | ||||
|                                         video/(?:clip|iframe/twitter)| | ||||
|                                         watch/player | ||||
|                                     ) | ||||
|                                     (?: | ||||
|                                         .*?\?.*?\bid=| | ||||
|                                         /_/id/ | ||||
|                                     ) | ||||
|                                 ) | ||||
|                             )| | ||||
|                             (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ | ||||
|                         ) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
| @@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.espn.com/video/clip/_/id/17989860', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor): | ||||
|  | ||||
|         def traverse_source(source, base_source_id=None): | ||||
|             for source_id, source in source.items(): | ||||
|                 if isinstance(source, compat_str): | ||||
|                 if source_id == 'alert': | ||||
|                     continue | ||||
|                 elif isinstance(source, compat_str): | ||||
|                     extract_source(source, base_source_id) | ||||
|                 elif isinstance(source, dict): | ||||
|                     traverse_source( | ||||
| @@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor): | ||||
|                 return | ||||
|             format_urls.add(source_url) | ||||
|             ext = determine_ext(source_url) | ||||
|             if ext == 'smil': | ||||
|             if OnceIE.suitable(source_url): | ||||
|                 formats.extend(self._extract_once_formats(source_url)) | ||||
|             elif ext == 'smil': | ||||
|                 formats.extend(self._extract_smil_formats( | ||||
|                     source_url, video_id, fatal=False)) | ||||
|             elif ext == 'f4m': | ||||
| @@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor): | ||||
|                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=source_id, fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                 f = { | ||||
|                     'url': source_url, | ||||
|                     'format_id': source_id, | ||||
|                 }) | ||||
|                 } | ||||
|                 mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url) | ||||
|                 if mobj: | ||||
|                     f.update({ | ||||
|                         'height': int(mobj.group(1)), | ||||
|                         'fps': int(mobj.group(2)), | ||||
|                         'tbr': int(mobj.group(3)), | ||||
|                     }) | ||||
|                 if source_id == 'mezzanine': | ||||
|                     f['preference'] = 1 | ||||
|                 formats.append(f) | ||||
|  | ||||
|         traverse_source(clip['links']['source']) | ||||
|         links = clip.get('links', {}) | ||||
|         traverse_source(links.get('source', {})) | ||||
|         traverse_source(links.get('mobile', {})) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = clip.get('caption') or clip.get('description') | ||||
| @@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor): | ||||
| class ESPNArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://espn.go.com/nba/recap?gameId=400793786', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
| @@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor): | ||||
|  | ||||
|         return self.url_result( | ||||
|             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) | ||||
|  | ||||
|  | ||||
| class FiveThirtyEightIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', | ||||
|         'info_dict': { | ||||
|             'id': '21846851', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'FiveThirtyEight: The Raiders can still make the playoffs', | ||||
|             'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', | ||||
|             'timestamp': 1513960621, | ||||
|             'upload_date': '20171222', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-video-id=["\'](?P<id>\d+)', | ||||
|             webpage, 'video id', group='id') | ||||
|  | ||||
|         return self.url_result( | ||||
|             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) | ||||
|   | ||||
| @@ -138,10 +138,7 @@ from .brightcove import ( | ||||
|     BrightcoveNewIE, | ||||
| ) | ||||
| from .buzzfeed import BuzzFeedIE | ||||
| from .byutv import ( | ||||
|     BYUtvIE, | ||||
|     BYUtvEventIE, | ||||
| ) | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .camdemy import ( | ||||
|     CamdemyIE, | ||||
| @@ -246,7 +243,6 @@ from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionPlaylistIE, | ||||
|     DailymotionUserIE, | ||||
|     DailymotionCloudIE, | ||||
| ) | ||||
| from .daisuki import ( | ||||
|     DaisukiMottoIE, | ||||
| @@ -312,9 +308,10 @@ from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .einthusan import EinthusanIE | ||||
| from .eitb import EitbIE | ||||
| from .ellentv import ( | ||||
|     EllenTVIE, | ||||
|     EllenTVClipsIE, | ||||
| from .ellentube import ( | ||||
|     EllenTubeIE, | ||||
|     EllenTubeVideoIE, | ||||
|     EllenTubePlaylistIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .embedly import EmbedlyIE | ||||
| @@ -325,6 +322,7 @@ from .escapist import EscapistIE | ||||
| from .espn import ( | ||||
|     ESPNIE, | ||||
|     ESPNArticleIE, | ||||
|     FiveThirtyEightIE, | ||||
| ) | ||||
| from .esri import EsriVideoIE | ||||
| from .etonline import ETOnlineIE | ||||
| @@ -689,6 +687,7 @@ from .nhl import ( | ||||
| ) | ||||
| from .nick import ( | ||||
|     NickIE, | ||||
|     NickBrIE, | ||||
|     NickDeIE, | ||||
|     NickNightIE, | ||||
|     NickRuIE, | ||||
| @@ -721,10 +720,6 @@ from .nowness import ( | ||||
|     NownessPlaylistIE, | ||||
|     NownessSeriesIE, | ||||
| ) | ||||
| from .nowtv import ( | ||||
|     NowTVIE, | ||||
|     NowTVListIE, | ||||
| ) | ||||
| from .noz import NozIE | ||||
| from .npo import ( | ||||
|     AndereTijdenIE, | ||||
| @@ -857,6 +852,7 @@ from .radiofrance import RadioFranceIE | ||||
| from .rai import ( | ||||
|     RaiPlayIE, | ||||
|     RaiPlayLiveIE, | ||||
|     RaiPlayPlaylistIE, | ||||
|     RaiIE, | ||||
| ) | ||||
| from .rbmaradio import RBMARadioIE | ||||
| @@ -931,8 +927,12 @@ from .senateisvp import SenateISVPIE | ||||
| from .sendtonews import SendtoNewsIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .servus import ServusIE | ||||
| from .sevenplus import SevenPlusIE | ||||
| from .sexu import SexuIE | ||||
| from .shahid import ShahidIE | ||||
| from .shahid import ( | ||||
|     ShahidIE, | ||||
|     ShahidShowIE, | ||||
| ) | ||||
| from .shared import ( | ||||
|     SharedIE, | ||||
|     VivoIE, | ||||
| @@ -1000,6 +1000,7 @@ from .streamango import StreamangoIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .streetvoice import StreetVoiceIE | ||||
| from .stretchinternet import StretchInternetIE | ||||
| from .sunporno import SunPornoIE | ||||
| from .svt import ( | ||||
|     SVTIE, | ||||
| @@ -1102,6 +1103,10 @@ from .tvigle import TvigleIE | ||||
| from .tvland import TVLandIE | ||||
| from .tvn24 import TVN24IE | ||||
| from .tvnoe import TVNoeIE | ||||
| from .tvnow import ( | ||||
|     TVNowIE, | ||||
|     TVNowListIE, | ||||
| ) | ||||
| from .tvp import ( | ||||
|     TVPEmbedIE, | ||||
|     TVPIE, | ||||
| @@ -1139,6 +1144,7 @@ from .udemy import ( | ||||
| from .udn import UDNEmbedIE | ||||
| from .uktvplay import UKTVPlayIE | ||||
| from .digiteka import DigitekaIE | ||||
| from .umg import UMGDeIE | ||||
| from .unistra import UnistraIE | ||||
| from .unity import UnityIE | ||||
| from .uol import UOLIE | ||||
|   | ||||
| @@ -11,6 +11,7 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -62,7 +63,8 @@ class FOXIE(AdobePassIE): | ||||
|         duration = int_or_none(video.get('durationInSeconds')) or int_or_none( | ||||
|             video.get('duration')) or parse_duration(video.get('duration')) | ||||
|         timestamp = unified_timestamp(video.get('datePublished')) | ||||
|         age_limit = parse_age_limit(video.get('contentRating')) | ||||
|         rating = video.get('contentRating') | ||||
|         age_limit = parse_age_limit(rating) | ||||
|  | ||||
|         data = try_get( | ||||
|             video, lambda x: x['trackingData']['properties'], dict) or {} | ||||
| @@ -77,8 +79,24 @@ class FOXIE(AdobePassIE): | ||||
|         release_year = int_or_none(video.get('releaseYear')) | ||||
|  | ||||
|         if data.get('authRequired'): | ||||
|             # TODO: AP | ||||
|             pass | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 'fbc-fox', title, video.get('guid'), rating) | ||||
|             release_url = update_url_query( | ||||
|                 release_url, { | ||||
|                     'auth': self._extract_mvpd_auth( | ||||
|                         url, video_id, 'fbc-fox', resource) | ||||
|                 }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for doc_rel in video.get('documentReleases', []): | ||||
|             rel_url = doc_rel.get('url') | ||||
|             if not url or doc_rel.get('format') != 'SCC': | ||||
|                 continue | ||||
|             subtitles['en'] = [{ | ||||
|                 'url': rel_url, | ||||
|                 'ext': 'scc', | ||||
|             }] | ||||
|             break | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
| @@ -93,6 +111,7 @@ class FOXIE(AdobePassIE): | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|             'release_year': release_year, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|         urlh = self._request_webpage(HEADRequest(release_url), video_id) | ||||
|   | ||||
| @@ -13,10 +13,7 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     determine_ext, | ||||
| ) | ||||
| from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionCloudIE, | ||||
| ) | ||||
| from .dailymotion import DailymotionIE | ||||
|  | ||||
|  | ||||
| class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
| @@ -290,10 +287,6 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|  | ||||
|         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) | ||||
|         if dmcloud_url: | ||||
|             return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key()) | ||||
|  | ||||
|         dailymotion_urls = DailymotionIE._extract_urls(webpage) | ||||
|         if dailymotion_urls: | ||||
|             return self.playlist_result([ | ||||
| @@ -363,6 +356,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor): | ||||
|             raise ExtractorError('Video %s is not available' % name, expected=True) | ||||
|  | ||||
|         video_id, catalogue = self._search_regex( | ||||
|             r'"https?://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') | ||||
|             r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', | ||||
|             webpage, 'video id').split('@') | ||||
|  | ||||
|         return self._extract_video(video_id, catalogue) | ||||
|   | ||||
| @@ -59,10 +59,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE | ||||
| from .drtuber import DrTuberIE | ||||
| from .redtube import RedTubeIE | ||||
| from .vimeo import VimeoIE | ||||
| from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionCloudIE, | ||||
| ) | ||||
| from .dailymotion import DailymotionIE | ||||
| from .dailymail import DailyMailIE | ||||
| from .onionstudios import OnionStudiosIE | ||||
| from .viewlift import ViewLiftEmbedIE | ||||
| @@ -1472,23 +1469,6 @@ class GenericIE(InfoExtractor): | ||||
|                 'timestamp': 1432570283, | ||||
|             }, | ||||
|         }, | ||||
|         # Dailymotion Cloud video | ||||
|         { | ||||
|             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', | ||||
|             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', | ||||
|             'info_dict': { | ||||
|                 'id': 'x2uy8t3', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sauvons les abeilles ! - Le débat', | ||||
|                 'description': 'md5:d9082128b1c5277987825d684939ca26', | ||||
|                 'thumbnail': r're:^https?://.*\.jpe?g$', | ||||
|                 'timestamp': 1434970506, | ||||
|                 'upload_date': '20150622', | ||||
|                 'uploader': 'Public Sénat', | ||||
|                 'uploader_id': 'xa9gza', | ||||
|             }, | ||||
|             'skip': 'File not found.', | ||||
|         }, | ||||
|         # OnionStudios embed | ||||
|         { | ||||
|             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', | ||||
| @@ -2195,7 +2175,7 @@ class GenericIE(InfoExtractor): | ||||
|                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id) | ||||
|             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): | ||||
|                 info_dict['formats'] = self._parse_mpd_formats( | ||||
|                     doc, video_id, | ||||
|                     doc, | ||||
|                     mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], | ||||
|                     mpd_url=url) | ||||
|                 self._sort_formats(info_dict['formats']) | ||||
| @@ -2704,11 +2684,6 @@ class GenericIE(InfoExtractor): | ||||
|         if senate_isvp_url: | ||||
|             return self.url_result(senate_isvp_url, 'SenateISVP') | ||||
|  | ||||
|         # Look for Dailymotion Cloud videos | ||||
|         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) | ||||
|         if dmcloud_url: | ||||
|             return self.url_result(dmcloud_url, 'DailymotionCloud') | ||||
|  | ||||
|         # Look for OnionStudios embeds | ||||
|         onionstudios_url = OnionStudiosIE._extract_url(webpage) | ||||
|         if onionstudios_url: | ||||
|   | ||||
| @@ -26,7 +26,7 @@ from ..utils import ( | ||||
| class ITVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' | ||||
|     _GEO_COUNTRIES = ['GB'] | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', | ||||
|         'info_dict': { | ||||
|             'id': '2a2936a0053', | ||||
| @@ -37,7 +37,11 @@ class ITVIE(InfoExtractor): | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # unavailable via data-playlist-url | ||||
|         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -101,6 +105,18 @@ class ITVIE(InfoExtractor): | ||||
|             'Content-Type': 'text/xml; charset=utf-8', | ||||
|             'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', | ||||
|         }) | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|  | ||||
|         def extract_subtitle(sub_url): | ||||
|             ext = determine_ext(sub_url, 'ttml') | ||||
|             subtitles.setdefault('en', []).append({ | ||||
|                 'url': sub_url, | ||||
|                 'ext': 'ttml' if ext == 'xml' else ext, | ||||
|             }) | ||||
|  | ||||
|         resp_env = self._download_xml( | ||||
|             params['data-playlist-url'], video_id, | ||||
|             headers=headers, data=etree.tostring(req_env)) | ||||
| @@ -111,37 +127,55 @@ class ITVIE(InfoExtractor): | ||||
|             if fault_code == 'InvalidGeoRegion': | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=fault_string, countries=self._GEO_COUNTRIES) | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) | ||||
|         title = xpath_text(playlist, 'EpisodeTitle', fatal=True) | ||||
|         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|         media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||
|         rtmp_url = media_files.attrib['base'] | ||||
|             elif fault_code != 'InvalidEntity': | ||||
|                 raise ExtractorError( | ||||
|                     '%s said: %s' % (self.IE_NAME, fault_string), expected=True) | ||||
|             info.update({ | ||||
|                 'title': self._og_search_title(webpage), | ||||
|                 'episode_title': params.get('data-video-episode'), | ||||
|                 'series': params.get('data-video-title'), | ||||
|             }) | ||||
|         else: | ||||
|             title = xpath_text(playlist, 'EpisodeTitle', default=None) | ||||
|             info.update({ | ||||
|                 'title': title, | ||||
|                 'episode_title': title, | ||||
|                 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||
|                 'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||
|                 'duration': parse_duration(xpath_text(playlist, 'Duration')), | ||||
|             }) | ||||
|             video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|             media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||
|             rtmp_url = media_files.attrib['base'] | ||||
|  | ||||
|         formats = [] | ||||
|         for media_file in media_files.findall('MediaFile'): | ||||
|             play_path = xpath_text(media_file, 'URL') | ||||
|             if not play_path: | ||||
|                 continue | ||||
|             tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||
|             f = { | ||||
|                 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||
|                 'play_path': play_path, | ||||
|                 # Providing this swfVfy allows to avoid truncated downloads | ||||
|                 'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||
|                 'page_url': url, | ||||
|                 'tbr': tbr, | ||||
|                 'ext': 'flv', | ||||
|             } | ||||
|             app = self._search_regex( | ||||
|                 'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||
|             if app: | ||||
|                 f.update({ | ||||
|                     'url': rtmp_url.split('?', 1)[0], | ||||
|                     'app': app, | ||||
|                 }) | ||||
|             else: | ||||
|                 f['url'] = rtmp_url | ||||
|             formats.append(f) | ||||
|             for media_file in media_files.findall('MediaFile'): | ||||
|                 play_path = xpath_text(media_file, 'URL') | ||||
|                 if not play_path: | ||||
|                     continue | ||||
|                 tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||
|                 f = { | ||||
|                     'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||
|                     'play_path': play_path, | ||||
|                     # Providing this swfVfy allows to avoid truncated downloads | ||||
|                     'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||
|                     'page_url': url, | ||||
|                     'tbr': tbr, | ||||
|                     'ext': 'flv', | ||||
|                 } | ||||
|                 app = self._search_regex( | ||||
|                     'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||
|                 if app: | ||||
|                     f.update({ | ||||
|                         'url': rtmp_url.split('?', 1)[0], | ||||
|                         'app': app, | ||||
|                     }) | ||||
|                 else: | ||||
|                     f['url'] = rtmp_url | ||||
|                 formats.append(f) | ||||
|  | ||||
|             for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||
|                 if caption_url.text: | ||||
|                     extract_subtitle(caption_url.text) | ||||
|  | ||||
|         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') | ||||
|         hmac = params.get('data-video-hmac') | ||||
| @@ -198,27 +232,22 @@ class ITVIE(InfoExtractor): | ||||
|                         formats.append({ | ||||
|                             'url': href, | ||||
|                         }) | ||||
|                 subs = video_data.get('Subtitles') | ||||
|                 if isinstance(subs, list): | ||||
|                     for sub in subs: | ||||
|                         if not isinstance(sub, dict): | ||||
|                             continue | ||||
|                         href = sub.get('Href') | ||||
|                         if isinstance(href, compat_str): | ||||
|                             extract_subtitle(href) | ||||
|                 if not info.get('duration'): | ||||
|                     info['duration'] = parse_duration(video_data.get('Duration')) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||
|             if not caption_url.text: | ||||
|                 continue | ||||
|             ext = determine_ext(caption_url.text, 'ttml') | ||||
|             subtitles.setdefault('en', []).append({ | ||||
|                 'url': caption_url.text, | ||||
|                 'ext': 'ttml' if ext == 'xml' else ext, | ||||
|             }) | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'episode_title': title, | ||||
|             'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||
|             'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||
|             'duartion': parse_duration(xpath_text(playlist, 'Duration')), | ||||
|         }) | ||||
|         return info | ||||
|   | ||||
| @@ -125,9 +125,12 @@ class KalturaIE(InfoExtractor): | ||||
|                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* | ||||
|                     (?P=q1).*? | ||||
|                     (?: | ||||
|                         entry_?[Ii]d| | ||||
|                         (?P<q2>["'])entry_?[Ii]d(?P=q2) | ||||
|                     )\s*:\s* | ||||
|                         (?: | ||||
|                             entry_?[Ii]d| | ||||
|                             (?P<q2>["'])entry_?[Ii]d(?P=q2) | ||||
|                         )\s*:\s*| | ||||
|                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* | ||||
|                     ) | ||||
|                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) | ||||
|                 ''', webpage) or | ||||
|             re.search( | ||||
|   | ||||
| @@ -13,8 +13,15 @@ from ..utils import ( | ||||
| class MailRuIE(InfoExtractor): | ||||
|     IE_NAME = 'mailru' | ||||
|     IE_DESC = 'Видео@Mail.Ru' | ||||
|     _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' | ||||
|  | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:(?:www|m)\.)?my\.mail\.ru/ | ||||
|                         (?: | ||||
|                             video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)| | ||||
|                             (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| | ||||
|                             (?:video/embed|\+/video/meta)/(?P<metaid>\d+) | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||
| @@ -23,7 +30,7 @@ class MailRuIE(InfoExtractor): | ||||
|                 'id': '46301138_76', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||
|                 'timestamp': 1393232740, | ||||
|                 'timestamp': 1393235077, | ||||
|                 'upload_date': '20140224', | ||||
|                 'uploader': 'sonypicturesrus', | ||||
|                 'uploader_id': 'sonypicturesrus@mail.ru', | ||||
| @@ -40,7 +47,7 @@ class MailRuIE(InfoExtractor): | ||||
|                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', | ||||
|                 'timestamp': 1397039888, | ||||
|                 'upload_date': '20140409', | ||||
|                 'uploader': 'hitech@corp.mail.ru', | ||||
|                 'uploader': 'hitech', | ||||
|                 'uploader_id': 'hitech@corp.mail.ru', | ||||
|                 'duration': 245, | ||||
|             }, | ||||
| @@ -65,28 +72,42 @@ class MailRuIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://my.mail.ru/video/embed/7949340477499637815', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('idv1') | ||||
|         meta_id = mobj.group('metaid') | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_id = None | ||||
|         if meta_id: | ||||
|             meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id | ||||
|         else: | ||||
|             video_id = mobj.group('idv1') | ||||
|             if not video_id: | ||||
|                 video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             page_config = self._parse_json(self._search_regex( | ||||
|                 r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', | ||||
|                 webpage, 'page config', default='{}'), video_id, fatal=False) | ||||
|             if page_config: | ||||
|                 meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') | ||||
|             else: | ||||
|                 meta_url = None | ||||
|  | ||||
|         video_data = None | ||||
|  | ||||
|         page_config = self._parse_json(self._search_regex( | ||||
|             r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', | ||||
|             webpage, 'page config', default='{}'), video_id, fatal=False) | ||||
|         if page_config: | ||||
|             meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') | ||||
|             if meta_url: | ||||
|                 video_data = self._download_json( | ||||
|                     meta_url, video_id, 'Downloading video meta JSON', fatal=False) | ||||
|         if meta_url: | ||||
|             video_data = self._download_json( | ||||
|                 meta_url, video_id or meta_id, 'Downloading video meta JSON', | ||||
|                 fatal=not video_id) | ||||
|  | ||||
|         # Fallback old approach | ||||
|         if not video_data: | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import update_url_query | ||||
| class NickIE(MTVServicesInfoExtractor): | ||||
|     # None of videos on the website are still alive? | ||||
|     IE_NAME = 'nick.com' | ||||
|     _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' | ||||
|     _VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' | ||||
|     _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TESTS = [{ | ||||
| @@ -69,8 +69,59 @@ class NickIE(MTVServicesInfoExtractor): | ||||
|             'mgid': uri, | ||||
|         } | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') | ||||
|     def _real_extract(self, url): | ||||
|         domain, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         video_data = self._download_json( | ||||
|             'http://%s/data/video.endLevel.json' % domain, | ||||
|             display_id, query={ | ||||
|                 'urlKey': display_id, | ||||
|             }) | ||||
|         return self._get_videos_info(video_data['player'] + video_data['id']) | ||||
|  | ||||
|  | ||||
| class NickBrIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'nickelodeon:br' | ||||
|     _VALID_URL = r'https?://(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?#.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         uri = self._search_regex( | ||||
|             r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid') | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         config = self._download_json( | ||||
|             'http://media.mtvnservices.com/pmt/e1/access/index.html', | ||||
|             video_id, query={ | ||||
|                 'uri': uri, | ||||
|                 'configtype': 'edge', | ||||
|             }, headers={ | ||||
|                 'Referer': url, | ||||
|             }) | ||||
|         info_url = self._remove_template_parameter(config['feedWithQueryParams']) | ||||
|         if info_url == 'None': | ||||
|             if domain.startswith('www.'): | ||||
|                 domain = domain[4:] | ||||
|             content_domain = { | ||||
|                 'mundonick.uol': 'mundonick.com.br', | ||||
|                 'nickjr': 'br.nickelodeonjunior.tv', | ||||
|             }[domain] | ||||
|             query = { | ||||
|                 'mgid': uri, | ||||
|                 'imageEp': content_domain, | ||||
|                 'arcEp': content_domain, | ||||
|             } | ||||
|             if domain == 'nickjr.com.br': | ||||
|                 query['ep'] = 'c4b16088' | ||||
|             info_url = update_url_query( | ||||
|                 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query) | ||||
|         return self._get_videos_info_from_url(info_url, video_id) | ||||
|  | ||||
|  | ||||
| class NickDeIE(MTVServicesInfoExtractor): | ||||
|   | ||||
| @@ -1,261 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     parse_duration, | ||||
|     remove_start, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NowTVBaseIE(InfoExtractor): | ||||
|     _VIDEO_FIELDS = ( | ||||
|         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', | ||||
|         'broadcastStartDate', 'seoUrl', 'duration', 'files', | ||||
|         'format.defaultImage169Format', 'format.defaultImage169Logo') | ||||
|  | ||||
|     def _extract_video(self, info, display_id=None): | ||||
|         video_id = compat_str(info['id']) | ||||
|  | ||||
|         files = info['files'] | ||||
|         if not files: | ||||
|             if info.get('geoblocked', False): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s is not available from your location due to geo restriction' % video_id, | ||||
|                     expected=True) | ||||
|             if not info.get('free', True): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s is not available for free' % video_id, expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for item in files['items']: | ||||
|             if determine_ext(item['path']) != 'f4v': | ||||
|                 continue | ||||
|             app, play_path = remove_start(item['path'], '/').split('/', 1) | ||||
|             formats.append({ | ||||
|                 'url': 'rtmpe://fms.rtl.de', | ||||
|                 'app': app, | ||||
|                 'play_path': 'mp4:%s' % play_path, | ||||
|                 'ext': 'flv', | ||||
|                 'page_url': 'http://rtlnow.rtl.de', | ||||
|                 'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf', | ||||
|                 'tbr': int_or_none(item.get('bitrate')), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = info['title'] | ||||
|         description = info.get('articleLong') or info.get('articleShort') | ||||
|         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') | ||||
|         duration = parse_duration(info.get('duration')) | ||||
|  | ||||
|         f = info.get('format', {}) | ||||
|         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id or info.get('seoUrl'), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NowTVIE(NowTVBaseIE): | ||||
|     _WORKING = False | ||||
|     _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # rtl | ||||
|         'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player', | ||||
|         'info_dict': { | ||||
|             'id': '203519', | ||||
|             'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Inka Bause stellt die neuen Bauern vor', | ||||
|             'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1432580700, | ||||
|             'upload_date': '20150525', | ||||
|             'duration': 2786, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # rtl2 | ||||
|         'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player', | ||||
|         'info_dict': { | ||||
|             'id': '203481', | ||||
|             'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Berlin - Tag & Nacht (Folge 934)', | ||||
|             'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1432666800, | ||||
|             'upload_date': '20150526', | ||||
|             'duration': 2641, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # rtlnitro | ||||
|         'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player', | ||||
|         'info_dict': { | ||||
|             'id': '165780', | ||||
|             'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Hals- und Beinbruch', | ||||
|             'description': 'md5:b50d248efffe244e6f56737f0911ca57', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1432415400, | ||||
|             'upload_date': '20150523', | ||||
|             'duration': 2742, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # superrtl | ||||
|         'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player', | ||||
|         'info_dict': { | ||||
|             'id': '99205', | ||||
|             'display_id': 'medicopter-117/angst', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Angst!', | ||||
|             'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1222632900, | ||||
|             'upload_date': '20080928', | ||||
|             'duration': 3025, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # ntv | ||||
|         'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player', | ||||
|         'info_dict': { | ||||
|             'id': '203521', | ||||
|             'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', | ||||
|             'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1432751700, | ||||
|             'upload_date': '20150527', | ||||
|             'duration': 1083, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # vox | ||||
|         'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', | ||||
|         'info_dict': { | ||||
|             'id': '128953', | ||||
|             'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', | ||||
|             'ext': 'flv', | ||||
|             'title': "Büro-Fall / Chihuahua 'Joel'", | ||||
|             'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1432408200, | ||||
|             'upload_date': '20150523', | ||||
|             'duration': 3092, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id')) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'https://api.nowtv.de/v3/movies/%s?fields=%s' | ||||
|             % (display_id, ','.join(self._VIDEO_FIELDS)), display_id) | ||||
|  | ||||
|         return self._extract_video(info, display_id) | ||||
|  | ||||
|  | ||||
| class NowTVListIE(NowTVBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$' | ||||
|  | ||||
|     _SHOW_FIELDS = ('title', ) | ||||
|     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell', | ||||
|         'info_dict': { | ||||
|             'id': '17006', | ||||
|             'title': 'stern TV - Aktuell', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8', | ||||
|         'info_dict': { | ||||
|             'id': '20716', | ||||
|             'title': 'Das Supertalent - FREE Staffel 8', | ||||
|         }, | ||||
|         'playlist_count': 14, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_id = mobj.group('show_id') | ||||
|         season_id = mobj.group('id') | ||||
|  | ||||
|         fields = [] | ||||
|         fields.extend(self._SHOW_FIELDS) | ||||
|         fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) | ||||
|         fields.extend( | ||||
|             'formatTabs.formatTabPages.container.movies.%s' % field | ||||
|             for field in self._VIDEO_FIELDS) | ||||
|  | ||||
|         list_info = self._download_json( | ||||
|             'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php' | ||||
|             % (','.join(fields), show_id), | ||||
|             season_id) | ||||
|  | ||||
|         season = next( | ||||
|             season for season in list_info['formatTabs']['items'] | ||||
|             if season.get('seoheadline') == season_id) | ||||
|  | ||||
|         title = '%s - %s' % (list_info['title'], season['headline']) | ||||
|  | ||||
|         entries = [] | ||||
|         for container in season['formatTabPages']['items']: | ||||
|             for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: | ||||
|                 entries.append(self._extract_video(info)) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, compat_str(season.get('id') or season_id), title) | ||||
| @@ -112,6 +112,8 @@ class PhantomJSwrapper(object): | ||||
|         return get_exe_version('phantomjs', version_re=r'([0-9.]+)') | ||||
|  | ||||
|     def __init__(self, extractor, required_version=None, timeout=10000): | ||||
|         self._TMP_FILES = {} | ||||
|  | ||||
|         self.exe = check_executable('phantomjs', ['-v']) | ||||
|         if not self.exe: | ||||
|             raise ExtractorError('PhantomJS executable not found in PATH, ' | ||||
| @@ -130,7 +132,6 @@ class PhantomJSwrapper(object): | ||||
|         self.options = { | ||||
|             'timeout': timeout, | ||||
|         } | ||||
|         self._TMP_FILES = {} | ||||
|         for name in self._TMP_FILE_NAMES: | ||||
|             tmp = tempfile.NamedTemporaryFile(delete=False) | ||||
|             tmp.close() | ||||
| @@ -140,7 +141,7 @@ class PhantomJSwrapper(object): | ||||
|         for name in self._TMP_FILE_NAMES: | ||||
|             try: | ||||
|                 os.remove(self._TMP_FILES[name].name) | ||||
|             except (IOError, OSError): | ||||
|             except (IOError, OSError, KeyError): | ||||
|                 pass | ||||
|  | ||||
|     def _save_cookies(self, url): | ||||
| @@ -242,7 +243,7 @@ class PhantomJSwrapper(object): | ||||
|  | ||||
|  | ||||
| class OpenloadIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://openload.co/f/kUEfGclsU9o', | ||||
| @@ -289,6 +290,9 @@ class OpenloadIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.openload.link/f/KnG-kKZdcfY', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://oload.stream/f/KnG-kKZdcfY', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' | ||||
|   | ||||
| @@ -131,6 +131,13 @@ class PluralsightIE(PluralsightBaseIE): | ||||
|             if BLOCKED in response: | ||||
|                 raise ExtractorError( | ||||
|                     'Unable to login: %s' % BLOCKED, expected=True) | ||||
|             MUST_AGREE = 'To continue using Pluralsight, you must agree to' | ||||
|             if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): | ||||
|                 raise ExtractorError( | ||||
|                     'Unable to login: %s some documents. Go to pluralsight.com, ' | ||||
|                     'log in and agree with what Pluralsight requires.' | ||||
|                     % MUST_AGREE, expected=True) | ||||
|  | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): | ||||
|   | ||||
| @@ -77,12 +77,14 @@ class PornComIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, | ||||
|             (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', | ||||
|              r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, | ||||
|             'view count', fatal=False)) | ||||
|  | ||||
|         def extract_list(kind): | ||||
|             s = self._search_regex( | ||||
|                 r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(), | ||||
|                 (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(), | ||||
|                  r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()), | ||||
|                 webpage, kind, fatal=False) | ||||
|             return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') | ||||
|  | ||||
|   | ||||
| @@ -17,6 +17,7 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
| @@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class RaiPlayPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', | ||||
|         'info_dict': { | ||||
|             'id': 'nondirloalmiocapo', | ||||
|             'title': 'Non dirlo al mio capo', | ||||
|             'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             ('programma', 'nomeProgramma'), webpage, 'title') | ||||
|         description = unescapeHTML(self._html_search_meta( | ||||
|             ('description', 'og:description'), webpage, 'description')) | ||||
|         print(description) | ||||
|  | ||||
|         entries = [] | ||||
|         for mobj in re.finditer( | ||||
|                 r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', | ||||
|                 webpage): | ||||
|             video_url = urljoin(url, mobj.group('path')) | ||||
|             entries.append(self.url_result( | ||||
|                 video_url, ie=RaiPlayIE.ie_key(), | ||||
|                 video_id=RaiPlayIE._match_id(video_url))) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class RaiIE(RaiBaseIE): | ||||
|     _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE | ||||
|     _TESTS = [{ | ||||
|   | ||||
| @@ -1,13 +1,11 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import hashlib | ||||
| import hmac | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .aws import AWSIE | ||||
| from .anvato import AnvatoIE | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
| @@ -16,7 +14,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ScrippsNetworksWatchIE(InfoExtractor): | ||||
| class ScrippsNetworksWatchIE(AWSIE): | ||||
|     IE_NAME = 'scrippsnetworks:watch' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
| @@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor): | ||||
|         'travelchannel': 'trav', | ||||
|         'geniuskitchen': 'genius', | ||||
|     } | ||||
|     _SNI_HOST = 'web.api.video.snidigital.com' | ||||
|  | ||||
|     _AWS_REGION = 'us-east-1' | ||||
|     _AWS_IDENTITY_ID_JSON = json.dumps({ | ||||
|         'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION | ||||
|     }) | ||||
|     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' | ||||
|     _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' | ||||
|     _AWS_SERVICE = 'execute-api' | ||||
|     _AWS_REQUEST = 'aws4_request' | ||||
|     _AWS_SIGNED_HEADERS = ';'.join([ | ||||
|         'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) | ||||
|     _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET | ||||
| %(uri)s | ||||
|     _AWS_PROXY_HOST = 'web.api.video.snidigital.com' | ||||
|  | ||||
| host:%(host)s | ||||
| x-amz-date:%(date)s | ||||
| x-amz-security-token:%(token)s | ||||
| x-api-key:%(key)s | ||||
|  | ||||
| %(signed_headers)s | ||||
| %(payload_hash)s''' | ||||
|     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, video_id = mobj.group('site', 'id') | ||||
|  | ||||
|         def aws_hash(s): | ||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         aws_identity_id_json = json.dumps({ | ||||
|             'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION | ||||
|         }).encode('utf-8') | ||||
|         token = self._download_json( | ||||
|             'https://cognito-identity.us-east-1.amazonaws.com/', video_id, | ||||
|             data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), | ||||
|             'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, | ||||
|             data=aws_identity_id_json, | ||||
|             headers={ | ||||
|                 'Accept': '*/*', | ||||
|                 'Content-Type': 'application/x-amz-json-1.1', | ||||
|                 'Referer': url, | ||||
|                 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), | ||||
|                 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), | ||||
|                 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', | ||||
|                 'X-Amz-User-Agent': self._AWS_USER_AGENT, | ||||
|             })['Token'] | ||||
| @@ -124,64 +105,12 @@ x-api-key:%(key)s | ||||
|                 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, | ||||
|                 fatal=True) | ||||
|  | ||||
|         access_key_id = get('AccessKeyId') | ||||
|         secret_access_key = get('SecretAccessKey') | ||||
|         session_token = get('SessionToken') | ||||
|  | ||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||
|         uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) | ||||
|         datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | ||||
|         date = datetime_now[:8] | ||||
|         canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { | ||||
|             'uri': uri, | ||||
|             'host': self._SNI_HOST, | ||||
|             'date': datetime_now, | ||||
|             'token': session_token, | ||||
|             'key': self._AWS_API_KEY, | ||||
|             'signed_headers': self._AWS_SIGNED_HEADERS, | ||||
|             'payload_hash': aws_hash(''), | ||||
|         } | ||||
|  | ||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||
|         credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) | ||||
|         string_to_sign = '\n'.join([ | ||||
|             'AWS4-HMAC-SHA256', datetime_now, credential_string, | ||||
|             aws_hash(canonical_string)]) | ||||
|  | ||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||
|         def aws_hmac(key, msg): | ||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) | ||||
|  | ||||
|         def aws_hmac_digest(key, msg): | ||||
|             return aws_hmac(key, msg).digest() | ||||
|  | ||||
|         def aws_hmac_hexdigest(key, msg): | ||||
|             return aws_hmac(key, msg).hexdigest() | ||||
|  | ||||
|         k_secret = 'AWS4' + secret_access_key | ||||
|         k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) | ||||
|         k_region = aws_hmac_digest(k_date, self._AWS_REGION) | ||||
|         k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) | ||||
|         k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) | ||||
|  | ||||
|         signature = aws_hmac_hexdigest(k_signing, string_to_sign) | ||||
|  | ||||
|         auth_header = ', '.join([ | ||||
|             'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( | ||||
|                 [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), | ||||
|             'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, | ||||
|             'Signature=%s' % signature, | ||||
|         ]) | ||||
|  | ||||
|         mcp_id = self._download_json( | ||||
|             'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ | ||||
|                 'Accept': '*/*', | ||||
|                 'Referer': url, | ||||
|                 'Authorization': auth_header, | ||||
|                 'X-Amz-Date': datetime_now, | ||||
|                 'X-Amz-Security-Token': session_token, | ||||
|                 'X-Api-Key': self._AWS_API_KEY, | ||||
|             })['results'][0]['mcpId'] | ||||
|         mcp_id = self._aws_execute_api({ | ||||
|             'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), | ||||
|             'access_key': get('AccessKeyId'), | ||||
|             'secret_key': get('SecretAccessKey'), | ||||
|             'session_token': get('SessionToken'), | ||||
|         }, video_id)['results'][0]['mcpId'] | ||||
|  | ||||
|         return self.url_result( | ||||
|             smuggle_url( | ||||
|   | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/sevenplus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/sevenplus.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .brightcove import BrightcoveNewIE | ||||
| from ..utils import update_url_query | ||||
|  | ||||
|  | ||||
| class SevenPlusIE(BrightcoveNewIE): | ||||
|     IE_NAME = '7plus' | ||||
|     _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', | ||||
|         'info_dict': { | ||||
|             'id': 'BEAT-001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', | ||||
|             'description': 'md5:37718bea20a8eedaca7f7361af566131', | ||||
|             'uploader_id': '5303576322001', | ||||
|             'upload_date': '20171031', | ||||
|             'timestamp': 1509440068, | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         path, episode_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         media = self._download_json( | ||||
|             'https://videoservice.swm.digital/playback', episode_id, query={ | ||||
|                 'appId': '7plus', | ||||
|                 'deviceType': 'web', | ||||
|                 'platformType': 'web', | ||||
|                 'accountId': 5303576322001, | ||||
|                 'referenceId': 'ref:' + episode_id, | ||||
|                 'deliveryId': 'csai', | ||||
|                 'videoType': 'vod', | ||||
|             })['media'] | ||||
|  | ||||
|         for source in media.get('sources', {}): | ||||
|             src = source.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             source['src'] = update_url_query(src, {'rule': ''}) | ||||
|  | ||||
|         info = self._parse_brightcove_metadata(media, episode_id) | ||||
|  | ||||
|         content = self._download_json( | ||||
|             'https://component-cdn.swm.digital/content/' + path, | ||||
|             episode_id, headers={ | ||||
|                 'market-id': 4, | ||||
|             }, fatal=False) or {} | ||||
|         for item in content.get('items', {}): | ||||
|             if item.get('componentData', {}).get('componentType') == 'infoPanel': | ||||
|                 for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]: | ||||
|                     value = item.get(src_key) | ||||
|                     if value: | ||||
|                         info[dst_key] = value | ||||
|  | ||||
|         return info | ||||
| @@ -1,22 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import math | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .aws import AWSIE | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
|     urlencode_postdata, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ShahidIE(InfoExtractor): | ||||
| class ShahidBaseIE(AWSIE): | ||||
|     _AWS_PROXY_HOST = 'api2.shahid.net' | ||||
|     _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' | ||||
|  | ||||
|     def _handle_error(self, e): | ||||
|         fail_data = self._parse_json( | ||||
|             e.cause.read().decode('utf-8'), None, fatal=False) | ||||
|         if fail_data: | ||||
|             faults = fail_data.get('faults', []) | ||||
|             faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) | ||||
|             if faults_message: | ||||
|                 raise ExtractorError(faults_message, expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id, request=None): | ||||
|         query = {} | ||||
|         if request: | ||||
|             query['request'] = json.dumps(request) | ||||
|         try: | ||||
|             return self._aws_execute_api({ | ||||
|                 'uri': '/proxy/v2/' + path, | ||||
|                 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', | ||||
|                 'secret_key': '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', | ||||
|             }, video_id, query) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self._handle_error(e) | ||||
|             raise | ||||
|  | ||||
|  | ||||
| class ShahidIE(ShahidBaseIE): | ||||
|     _NETRC_MACHINE = 'shahid' | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
| @@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor): | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _api2_request(self, *args, **kwargs): | ||||
|         try: | ||||
|             return self._download_json(*args, **kwargs) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 fail_data = self._parse_json( | ||||
|                     e.cause.read().decode('utf-8'), None, fatal=False) | ||||
|                 if fail_data: | ||||
|                     faults = fail_data.get('faults', []) | ||||
|                     faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) | ||||
|                     if faults_message: | ||||
|                         raise ExtractorError(faults_message, expected=True) | ||||
|             raise | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         email, password = self._get_login_info() | ||||
|         if email is None: | ||||
|             return | ||||
|  | ||||
|         user_data = self._api2_request( | ||||
|             'https://shahid.mbc.net/wd/service/users/login', | ||||
|             None, 'Logging in', data=json.dumps({ | ||||
|                 'email': email, | ||||
|                 'password': password, | ||||
|                 'basic': 'false', | ||||
|             }).encode('utf-8'), headers={ | ||||
|                 'Content-Type': 'application/json; charset=UTF-8', | ||||
|             })['user'] | ||||
|         try: | ||||
|             user_data = self._download_json( | ||||
|                 'https://shahid.mbc.net/wd/service/users/login', | ||||
|                 None, 'Logging in', data=json.dumps({ | ||||
|                     'email': email, | ||||
|                     'password': password, | ||||
|                     'basic': 'false', | ||||
|                 }).encode('utf-8'), headers={ | ||||
|                     'Content-Type': 'application/json; charset=UTF-8', | ||||
|                 })['user'] | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self._handle_error(e) | ||||
|             raise | ||||
|  | ||||
|         self._download_webpage( | ||||
|             'https://shahid.mbc.net/populateContext', | ||||
| @@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor): | ||||
|                 'sessionId': user_data['sessionId'], | ||||
|             })) | ||||
|  | ||||
|     def _get_api_data(self, response): | ||||
|         data = response.get('data', {}) | ||||
|  | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), | ||||
|                 expected=True) | ||||
|  | ||||
|         return data | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         if page_type == 'clip': | ||||
|             page_type = 'episode' | ||||
|  | ||||
|         playout = self._api2_request( | ||||
|             'https://api2.shahid.net/proxy/v2/playout/url/' + video_id, | ||||
|             video_id, 'Downloading player JSON')['playout'] | ||||
|         playout = self._call_api( | ||||
|             'playout/url/' + video_id, video_id)['playout'] | ||||
|  | ||||
|         if playout.get('drm'): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
| @@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor): | ||||
|         formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video = self._get_api_data(self._download_json( | ||||
|         # video = self._call_api( | ||||
|         #     'product/id', video_id, { | ||||
|         #         'id': video_id, | ||||
|         #         'productType': 'ASSET', | ||||
|         #         'productSubType': page_type.upper() | ||||
|         #     })['productModel'] | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), | ||||
|             video_id, 'Downloading video JSON', query={ | ||||
|                 'apiKey': 'sh@hid0nlin3', | ||||
|                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', | ||||
|             }))[page_type] | ||||
|             }) | ||||
|         data = response.get('data', {}) | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), | ||||
|                 expected=True) | ||||
|  | ||||
|         video = data[page_type] | ||||
|         title = video['title'] | ||||
|         categories = [ | ||||
|             category['name'] | ||||
| @@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor): | ||||
|             'episode_id': video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ShahidShowIE(ShahidBaseIE): | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', | ||||
|         'info_dict': { | ||||
|             'id': '79187', | ||||
|             'title': 'رامز قرش البحر', | ||||
|             'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', | ||||
|         }, | ||||
|         'playlist_mincount': 32, | ||||
|     }, { | ||||
|         'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _PAGE_SIZE = 30 | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id = self._match_id(url) | ||||
|  | ||||
|         product = self._call_api( | ||||
|             'playableAsset', show_id, {'showId': show_id})['productModel'] | ||||
|         playlist = product['playlist'] | ||||
|         playlist_id = playlist['id'] | ||||
|         show = product.get('show', {}) | ||||
|  | ||||
|         def page_func(page_num): | ||||
|             playlist = self._call_api( | ||||
|                 'product/playlist', show_id, { | ||||
|                     'playListId': playlist_id, | ||||
|                     'pageNumber': page_num, | ||||
|                     'pageSize': 30, | ||||
|                     'sorts': [{ | ||||
|                         'order': 'DESC', | ||||
|                         'type': 'SORTDATE' | ||||
|                     }], | ||||
|                 }) | ||||
|             for product in playlist.get('productList', {}).get('products', []): | ||||
|                 product_url = product.get('productUrl', []).get('url') | ||||
|                 if not product_url: | ||||
|                     continue | ||||
|                 yield self.url_result( | ||||
|                     product_url, 'Shahid', | ||||
|                     str_or_none(product.get('id')), | ||||
|                     product.get('title')) | ||||
|  | ||||
|         entries = InAdvancePagedList( | ||||
|             page_func, | ||||
|             math.ceil(playlist['count'] / self._PAGE_SIZE), | ||||
|             self._PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, show_id, show.get('title'), show.get('description')) | ||||
|   | ||||
| @@ -2,6 +2,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
|  | ||||
|  | ||||
| class SonyLIVIE(InfoExtractor): | ||||
| @@ -10,12 +11,12 @@ class SonyLIVIE(InfoExtractor): | ||||
|         'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", | ||||
|         'info_dict': { | ||||
|             'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", | ||||
|             'id': '5024612095001', | ||||
|             'id': 'ref:5024612095001', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20160707', | ||||
|             'upload_date': '20170923', | ||||
|             'description': 'md5:7f28509a148d5be9d0782b4d5106410d', | ||||
|             'uploader_id': '4338955589001', | ||||
|             'timestamp': 1467870968, | ||||
|             'uploader_id': '5182475815001', | ||||
|             'timestamp': 1506200547, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -26,9 +27,11 @@ class SonyLIVIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' | ||||
|     # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         brightcove_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) | ||||
|             smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}), | ||||
|             'BrightcoveNew', brightcove_id) | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/stretchinternet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/stretchinternet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class StretchInternetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video', | ||||
|         'info_dict': { | ||||
|             'id': '313900', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Augustana (S.D.) Baseball vs University of Mary', | ||||
|             'description': 'md5:7578478614aae3bdd4a90f578f787438', | ||||
|             'timestamp': 1490468400, | ||||
|             'upload_date': '20170325', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         stream = self._download_json( | ||||
|             'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s' | ||||
|             % video_id, video_id) | ||||
|  | ||||
|         video_url = 'https://%s' % stream['source'] | ||||
|  | ||||
|         event = self._download_json( | ||||
|             'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', | ||||
|             video_id, query={ | ||||
|                 'clientID': 99997, | ||||
|                 'eventID': video_id, | ||||
|                 'token': 'asdf', | ||||
|             })['event'] | ||||
|  | ||||
|         title = event.get('title') or event['mobileTitle'] | ||||
|         description = event.get('customText') | ||||
|         timestamp = int_or_none(event.get('longtime')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'url': video_url, | ||||
|         } | ||||
| @@ -4,58 +4,109 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .turner import TurnerBaseIE | ||||
| from ..utils import extract_attributes | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     strip_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TBSIE(TurnerBaseIE): | ||||
|     # https://github.com/rg3/youtube-dl/issues/13658 | ||||
|     _WORKING = False | ||||
|  | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', | ||||
|         'md5': '9e61d680e2285066ade7199e6408b2ee', | ||||
|         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', | ||||
|         'info_dict': { | ||||
|             'id': '2007318', | ||||
|             'id': '8d384cde33b89f3a43ce5329de42903ed5099887', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Theatrical Trailer', | ||||
|             'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', | ||||
|             'title': 'Monster', | ||||
|             'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', | ||||
|             'timestamp': 1508175329, | ||||
|             'upload_date': '20171016', | ||||
|         }, | ||||
|         'skip': 'TBS videos are deleted after a while', | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', | ||||
|         'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', | ||||
|         'info_dict': { | ||||
|             'id': '1538823', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'You Better Run', | ||||
|             'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', | ||||
|         }, | ||||
|         'skip': 'TBS videos are deleted after a while', | ||||
|         'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         site = domain[:3] | ||||
|         site, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) | ||||
|         query = None | ||||
|         clip_id = video_params.get('clipid') | ||||
|         if clip_id: | ||||
|             query = 'id=' + clip_id | ||||
|         else: | ||||
|             query = 'titleId=' + video_params['titleid'] | ||||
|         return self._extract_cvp_info( | ||||
|             'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, { | ||||
|                 'default': { | ||||
|                     'media_src': 'http://ht.cdn.turner.com/%s/big' % site, | ||||
|                 }, | ||||
|                 'secure': { | ||||
|                     'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, | ||||
|                     'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, | ||||
|                 }, | ||||
|             }, { | ||||
|                 'url': url, | ||||
|                 'site_name': site.upper(), | ||||
|                 'auth_required': video_params.get('isAuthRequired') != 'false', | ||||
|             }) | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', | ||||
|             webpage, 'drupal setting'), display_id)['turner_playlist'][0] | ||||
|  | ||||
|         media_id = video_data['mediaID'] | ||||
|         title = video_data['title'] | ||||
|  | ||||
|         streams_data = self._download_json( | ||||
|             'http://medium.ngtv.io/media/%s/tv' % media_id, | ||||
|             media_id)['media']['tv'] | ||||
|         duration = None | ||||
|         chapters = [] | ||||
|         formats = [] | ||||
|         for supported_type in ('unprotected', 'bulkaes'): | ||||
|             stream_data = streams_data.get(supported_type, {}) | ||||
|             m3u8_url = stream_data.get('secureUrl') or stream_data.get('url') | ||||
|             if not m3u8_url: | ||||
|                 continue | ||||
|             if stream_data.get('playlistProtection') == 'spe': | ||||
|                 m3u8_url = self._add_akamai_spe_token( | ||||
|                     'http://www.%s.com/service/token_spe' % site, | ||||
|                     m3u8_url, media_id, { | ||||
|                         'url': url, | ||||
|                         'site_name': site[:3].upper(), | ||||
|                         'auth_required': video_data.get('authRequired') == '1', | ||||
|                     }) | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|             duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration')) | ||||
|  | ||||
|             if not chapters: | ||||
|                 for chapter in stream_data.get('contentSegments', []): | ||||
|                     start_time = float_or_none(chapter.get('start')) | ||||
|                     duration = float_or_none(chapter.get('duration')) | ||||
|                     if start_time is None or duration is None: | ||||
|                         continue | ||||
|                     chapters.append({ | ||||
|                         'start_time': start_time, | ||||
|                         'end_time': start_time + duration, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for image_id, image in video_data.get('images', {}).items(): | ||||
|             image_url = image.get('url') | ||||
|             if not image_url or image.get('type') != 'video': | ||||
|                 continue | ||||
|             i = { | ||||
|                 'id': image_id, | ||||
|                 'url': image_url, | ||||
|             } | ||||
|             mobj = re.search(r'(\d+)x(\d+)', image_url) | ||||
|             if mobj: | ||||
|                 i.update({ | ||||
|                     'width': int(mobj.group(1)), | ||||
|                     'height': int(mobj.group(2)), | ||||
|                 }) | ||||
|             thumbnails.append(i) | ||||
|  | ||||
|         return { | ||||
|             'id': media_id, | ||||
|             'title': title, | ||||
|             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')), | ||||
|             'duration': duration, | ||||
|             'timestamp': int_or_none(video_data.get('created')), | ||||
|             'season_number': int_or_none(video_data.get('season')), | ||||
|             'episode_number': int_or_none(video_data.get('episode')), | ||||
|             'cahpters': chapters, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
| class TouTvIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'toutv' | ||||
|     IE_NAME = 'tou.tv' | ||||
|     _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)' | ||||
|     _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' | ||||
|     _access_token = None | ||||
|     _claims = None | ||||
|  | ||||
| @@ -37,6 +37,9 @@ class TouTvIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://ici.tou.tv/hackers', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://ici.tou.tv/l-age-adulte/S01C501', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|   | ||||
| @@ -18,9 +18,32 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class TurnerBaseIE(AdobePassIE): | ||||
|     _AKAMAI_SPE_TOKEN_CACHE = {} | ||||
|  | ||||
|     def _extract_timestamp(self, video_data): | ||||
|         return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) | ||||
|  | ||||
|     def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data): | ||||
|         secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' | ||||
|         token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path) | ||||
|         if not token: | ||||
|             query = { | ||||
|                 'path': secure_path, | ||||
|                 'videoId': content_id, | ||||
|             } | ||||
|             if ap_data.get('auth_required'): | ||||
|                 query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name']) | ||||
|             auth = self._download_xml( | ||||
|                 tokenizer_src, content_id, query=query) | ||||
|             error_msg = xpath_text(auth, 'error/msg') | ||||
|             if error_msg: | ||||
|                 raise ExtractorError(error_msg, expected=True) | ||||
|             token = xpath_text(auth, 'token') | ||||
|             if not token: | ||||
|                 return video_url | ||||
|             self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token | ||||
|         return video_url + '?hdnea=' + token | ||||
|  | ||||
|     def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): | ||||
|         video_data = self._download_xml(data_src, video_id) | ||||
|         video_id = video_data.attrib['id'] | ||||
| @@ -33,7 +56,6 @@ class TurnerBaseIE(AdobePassIE): | ||||
|         #         rtmp_src = splited_rtmp_src[1] | ||||
|         # aifp = xpath_text(video_data, 'akamai/aifp', default='') | ||||
|  | ||||
|         tokens = {} | ||||
|         urls = [] | ||||
|         formats = [] | ||||
|         rex = re.compile( | ||||
| @@ -67,26 +89,10 @@ class TurnerBaseIE(AdobePassIE): | ||||
|                 secure_path_data = path_data.get('secure') | ||||
|                 if not secure_path_data: | ||||
|                     continue | ||||
|                 video_url = secure_path_data['media_src'] + video_url | ||||
|                 secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' | ||||
|                 token = tokens.get(secure_path) | ||||
|                 if not token: | ||||
|                     query = { | ||||
|                         'path': secure_path, | ||||
|                         'videoId': content_id, | ||||
|                     } | ||||
|                     if ap_data.get('auth_required'): | ||||
|                         query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) | ||||
|                     auth = self._download_xml( | ||||
|                         secure_path_data['tokenizer_src'], video_id, query=query) | ||||
|                     error_msg = xpath_text(auth, 'error/msg') | ||||
|                     if error_msg: | ||||
|                         raise ExtractorError(error_msg, expected=True) | ||||
|                     token = xpath_text(auth, 'token') | ||||
|                     if not token: | ||||
|                         continue | ||||
|                     tokens[secure_path] = token | ||||
|                 video_url = video_url + '?hdnea=' + token | ||||
|                 video_url = self._add_akamai_spe_token( | ||||
|                     secure_path_data['tokenizer_src'], | ||||
|                     secure_path_data['media_src'] + video_url, | ||||
|                     content_id, ap_data) | ||||
|             elif not re.match('https?://', video_url): | ||||
|                 base_path_data = path_data.get(ext, path_data.get('default', {})) | ||||
|                 media_src = base_path_data.get('media_src') | ||||
|   | ||||
							
								
								
									
										175
									
								
								youtube_dl/extractor/tvnow.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								youtube_dl/extractor/tvnow.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,175 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
|     parse_duration, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TVNowBaseIE(InfoExtractor): | ||||
|     _VIDEO_FIELDS = ( | ||||
|         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', | ||||
|         'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear', | ||||
|         'format.defaultImage169Format', 'format.defaultImage169Logo') | ||||
|  | ||||
|     def _call_api(self, path, video_id, query): | ||||
|         return self._download_json( | ||||
|             'https://api.tvnow.de/v3/' + path, | ||||
|             video_id, query=query) | ||||
|  | ||||
|     def _extract_video(self, info, display_id): | ||||
|         video_id = compat_str(info['id']) | ||||
|         title = info['title'] | ||||
|  | ||||
|         mpd_url = info['manifest']['dashclear'] | ||||
|         if not mpd_url: | ||||
|             if info.get('isDrm'): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s is DRM protected' % video_id, expected=True) | ||||
|             if info.get('geoblocked'): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s is not available from your location due to geo restriction' % video_id, | ||||
|                     expected=True) | ||||
|             if not info.get('free', True): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s is not available for free' % video_id, expected=True) | ||||
|  | ||||
|         mpd_url = update_url_query(mpd_url, {'filter': ''}) | ||||
|         formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False) | ||||
|         formats.extend(self._extract_ism_formats( | ||||
|             mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'), | ||||
|             video_id, ism_id='mss', fatal=False)) | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'), | ||||
|             video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = info.get('articleLong') or info.get('articleShort') | ||||
|         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') | ||||
|         duration = parse_duration(info.get('duration')) | ||||
|  | ||||
|         f = info.get('format', {}) | ||||
|         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TVNowIE(TVNowBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # rtl | ||||
|         'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl', | ||||
|         'info_dict': { | ||||
|             'id': '385314', | ||||
|             'display_id': 'alarm-fuer-cobra-11/freier-fall', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Freier Fall', | ||||
|             'description': 'md5:8c2d8f727261adf7e0dc18366124ca02', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1512677700, | ||||
|             'upload_date': '20171207', | ||||
|             'duration': 2862.0, | ||||
|         }, | ||||
|     }, { | ||||
|         # rtl2 | ||||
|         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', | ||||
|         'only_matching': 'True', | ||||
|     }, { | ||||
|         # rtlnitro | ||||
|         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', | ||||
|         'only_matching': 'True', | ||||
|     }, { | ||||
|         # superrtl | ||||
|         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', | ||||
|         'only_matching': 'True', | ||||
|     }, { | ||||
|         # ntv | ||||
|         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', | ||||
|         'only_matching': 'True', | ||||
|     }, { | ||||
|         # vox | ||||
|         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', | ||||
|         'only_matching': 'True', | ||||
|     }, { | ||||
|         # rtlplus | ||||
|         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', | ||||
|         'only_matching': 'True', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = '%s/%s' % re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         info = self._call_api( | ||||
|             'movies/' + display_id, display_id, query={ | ||||
|                 'fields': ','.join(self._VIDEO_FIELDS), | ||||
|             }) | ||||
|  | ||||
|         return self._extract_video(info, display_id) | ||||
|  | ||||
|  | ||||
| class TVNowListIE(TVNowBaseIE): | ||||
|     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$' | ||||
|  | ||||
|     _SHOW_FIELDS = ('title', ) | ||||
|     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) | ||||
|     _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', ) | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell', | ||||
|         'info_dict': { | ||||
|             'id': '28296', | ||||
|             'title': '30 Minuten Deutschland - Aktuell', | ||||
|         }, | ||||
|         'playlist_mincount': 1, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         fields = [] | ||||
|         fields.extend(self._SHOW_FIELDS) | ||||
|         fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) | ||||
|         fields.extend( | ||||
|             'formatTabs.formatTabPages.container.movies.%s' % field | ||||
|             for field in self._VIDEO_FIELDS) | ||||
|  | ||||
|         list_info = self._call_api( | ||||
|             'formats/seo', season_id, query={ | ||||
|                 'fields': ','.join(fields), | ||||
|                 'name': show_id + '.php' | ||||
|             }) | ||||
|  | ||||
|         season = next( | ||||
|             season for season in list_info['formatTabs']['items'] | ||||
|             if season.get('seoheadline') == season_id) | ||||
|  | ||||
|         title = '%s - %s' % (list_info['title'], season['headline']) | ||||
|  | ||||
|         entries = [] | ||||
|         for container in season['formatTabPages']['items']: | ||||
|             for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: | ||||
|                 seo_url = info.get('seoUrl') | ||||
|                 if not seo_url: | ||||
|                     continue | ||||
|                 entries.append(self.url_result( | ||||
|                     base_url + seo_url + '/player', 'TVNow', info.get('id'))) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, compat_str(season.get('id') or season_id), title) | ||||
| @@ -43,7 +43,7 @@ class TwitterBaseIE(InfoExtractor): | ||||
|  | ||||
| class TwitterCardIE(TwitterBaseIE): | ||||
|     IE_NAME = 'twitter:card' | ||||
|     _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', | ||||
| @@ -51,11 +51,10 @@ class TwitterCardIE(TwitterBaseIE): | ||||
|             'info_dict': { | ||||
|                 'id': '560070183650213889', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Twitter Card', | ||||
|                 'title': 'Twitter web player', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'duration': 30.033, | ||||
|             }, | ||||
|             'skip': 'Video gone', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', | ||||
| @@ -63,11 +62,9 @@ class TwitterCardIE(TwitterBaseIE): | ||||
|             'info_dict': { | ||||
|                 'id': '623160978427936768', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Twitter Card', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg', | ||||
|                 'duration': 80.155, | ||||
|                 'title': 'Twitter web player', | ||||
|                 'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg', | ||||
|             }, | ||||
|             'skip': 'Video gone', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', | ||||
| @@ -120,15 +117,15 @@ class TwitterCardIE(TwitterBaseIE): | ||||
|             elif media_url.endswith('.mpd'): | ||||
|                 formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) | ||||
|             else: | ||||
|                 vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) | ||||
|                 tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) | ||||
|                 a_format = { | ||||
|                     'url': media_url, | ||||
|                     'format_id': 'http-%d' % vbr if vbr else 'http', | ||||
|                     'vbr': vbr, | ||||
|                     'format_id': 'http-%d' % tbr if tbr else 'http', | ||||
|                     'tbr': tbr, | ||||
|                 } | ||||
|                 # Reported bitRate may be zero | ||||
|                 if not a_format['vbr']: | ||||
|                     del a_format['vbr'] | ||||
|                 if not a_format['tbr']: | ||||
|                     del a_format['tbr'] | ||||
|  | ||||
|                 self._search_dimensions_in_video_url(a_format, media_url) | ||||
|  | ||||
| @@ -150,79 +147,83 @@ class TwitterCardIE(TwitterBaseIE): | ||||
|         bearer_token = self._search_regex( | ||||
|             r'BEARER_TOKEN\s*:\s*"([^"]+)"', | ||||
|             main_script, 'bearer token') | ||||
|         guest_token = self._search_regex( | ||||
|             r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)', | ||||
|             webpage, 'guest token') | ||||
|         # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id | ||||
|         api_data = self._download_json( | ||||
|             'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id, | ||||
|             video_id, 'Downloading mobile API data', | ||||
|             'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id, | ||||
|             video_id, 'Downloading API data', | ||||
|             headers={ | ||||
|                 'Authorization': 'Bearer ' + bearer_token, | ||||
|                 'x-guest-token': guest_token, | ||||
|             }) | ||||
|         media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id] | ||||
|                                                   ['extended_entities']['media'][0]['video_info']) or {} | ||||
|         media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {} | ||||
|         return self._parse_media_info(media_info, video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         path, video_id = re.search(self._VALID_URL, url).groups() | ||||
|  | ||||
|         config = None | ||||
|         formats = [] | ||||
|         duration = None | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         urls = [url] | ||||
|         if path.startswith('cards/'): | ||||
|             urls.append('https://twitter.com/i/videos/' + video_id) | ||||
|  | ||||
|         iframe_url = self._html_search_regex( | ||||
|             r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', | ||||
|             webpage, 'video iframe', default=None) | ||||
|         if iframe_url: | ||||
|             return self.url_result(iframe_url) | ||||
|         for u in urls: | ||||
|             webpage = self._download_webpage(u, video_id) | ||||
|  | ||||
|         config = self._parse_json(self._html_search_regex( | ||||
|             r'data-(?:player-)?config="([^"]+)"', webpage, | ||||
|             'data player config', default='{}'), | ||||
|             video_id) | ||||
|             iframe_url = self._html_search_regex( | ||||
|                 r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', | ||||
|                 webpage, 'video iframe', default=None) | ||||
|             if iframe_url: | ||||
|                 return self.url_result(iframe_url) | ||||
|  | ||||
|         if config.get('source_type') == 'vine': | ||||
|             return self.url_result(config['player_url'], 'Vine') | ||||
|             config = self._parse_json(self._html_search_regex( | ||||
|                 r'data-(?:player-)?config="([^"]+)"', webpage, | ||||
|                 'data player config', default='{}'), | ||||
|                 video_id) | ||||
|  | ||||
|         periscope_url = PeriscopeIE._extract_url(webpage) | ||||
|         if periscope_url: | ||||
|             return self.url_result(periscope_url, PeriscopeIE.ie_key()) | ||||
|             if config.get('source_type') == 'vine': | ||||
|                 return self.url_result(config['player_url'], 'Vine') | ||||
|  | ||||
|         video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') | ||||
|             periscope_url = PeriscopeIE._extract_url(webpage) | ||||
|             if periscope_url: | ||||
|                 return self.url_result(periscope_url, PeriscopeIE.ie_key()) | ||||
|  | ||||
|         if video_url: | ||||
|             if determine_ext(video_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) | ||||
|             else: | ||||
|                 f = { | ||||
|                     'url': video_url, | ||||
|                 } | ||||
|             video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') | ||||
|  | ||||
|                 self._search_dimensions_in_video_url(f, video_url) | ||||
|             if video_url: | ||||
|                 if determine_ext(video_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) | ||||
|                 else: | ||||
|                     f = { | ||||
|                         'url': video_url, | ||||
|                     } | ||||
|  | ||||
|                 formats.append(f) | ||||
|                     self._search_dimensions_in_video_url(f, video_url) | ||||
|  | ||||
|         vmap_url = config.get('vmapUrl') or config.get('vmap_url') | ||||
|         if vmap_url: | ||||
|             formats.extend( | ||||
|                 self._extract_formats_from_vmap_url(vmap_url, video_id)) | ||||
|                     formats.append(f) | ||||
|  | ||||
|         media_info = None | ||||
|             vmap_url = config.get('vmapUrl') or config.get('vmap_url') | ||||
|             if vmap_url: | ||||
|                 formats.extend( | ||||
|                     self._extract_formats_from_vmap_url(vmap_url, video_id)) | ||||
|  | ||||
|         for entity in config.get('status', {}).get('entities', []): | ||||
|             if 'mediaInfo' in entity: | ||||
|                 media_info = entity['mediaInfo'] | ||||
|             media_info = None | ||||
|  | ||||
|         if media_info: | ||||
|             formats.extend(self._parse_media_info(media_info, video_id)) | ||||
|             duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) | ||||
|             for entity in config.get('status', {}).get('entities', []): | ||||
|                 if 'mediaInfo' in entity: | ||||
|                     media_info = entity['mediaInfo'] | ||||
|  | ||||
|         username = config.get('user', {}).get('screen_name') | ||||
|         if username: | ||||
|             formats.extend(self._extract_mobile_formats(username, video_id)) | ||||
|             if media_info: | ||||
|                 formats.extend(self._parse_media_info(media_info, video_id)) | ||||
|                 duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) | ||||
|  | ||||
|             username = config.get('user', {}).get('screen_name') | ||||
|             if username: | ||||
|                 formats.extend(self._extract_mobile_formats(username, video_id)) | ||||
|  | ||||
|             if formats: | ||||
|                 break | ||||
|  | ||||
|         self._remove_duplicate_formats(formats) | ||||
|         self._sort_formats(formats) | ||||
| @@ -258,9 +259,6 @@ class TwitterIE(InfoExtractor): | ||||
|             'uploader_id': 'freethenipple', | ||||
|             'duration': 12.922, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', | ||||
|         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', | ||||
| @@ -277,7 +275,6 @@ class TwitterIE(InfoExtractor): | ||||
|         'skip': 'Account suspended', | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/starwars/status/665052190608723968', | ||||
|         'md5': '39b7199856dee6cd4432e72c74bc69d4', | ||||
|         'info_dict': { | ||||
|             'id': '665052190608723968', | ||||
|             'ext': 'mp4', | ||||
| @@ -303,20 +300,16 @@ class TwitterIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/jaydingeer/status/700207533655363584', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': '700207533655363584', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel', | ||||
|             'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', | ||||
|             'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', | ||||
|             'description': 'JG on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'uploader': 'あかさ', | ||||
|             'uploader': 'JG', | ||||
|             'uploader_id': 'jaydingeer', | ||||
|             'duration': 30.0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', | ||||
|         'md5': '89a15ed345d13b86e9a5a5e051fa308a', | ||||
| @@ -342,9 +335,6 @@ class TwitterIE(InfoExtractor): | ||||
|             'uploader': 'Captain America', | ||||
|             'duration': 3.17, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', | ||||
|         'info_dict': { | ||||
| @@ -370,9 +360,6 @@ class TwitterIE(InfoExtractor): | ||||
|             'uploader_id': 'news_al3alm', | ||||
|             'duration': 277.4, | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=http-]', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/i/web/status/910031516746514432', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -62,11 +62,11 @@ class UdemyIE(InfoExtractor): | ||||
|     def _extract_course_info(self, webpage, video_id): | ||||
|         course = self._parse_json( | ||||
|             unescapeHTML(self._search_regex( | ||||
|                 r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')), | ||||
|                 r'ng-init=["\'].*\bcourse=({.+?})[;"\']', | ||||
|                 webpage, 'course', default='{}')), | ||||
|             video_id, fatal=False) or {} | ||||
|         course_id = course.get('id') or self._search_regex( | ||||
|             (r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'), | ||||
|             webpage, 'course id') | ||||
|             r'data-course-id=["\'](\d+)', webpage, 'course id') | ||||
|         return course_id, course.get('title') | ||||
|  | ||||
|     def _enroll_course(self, base_url, webpage, course_id): | ||||
| @@ -257,6 +257,11 @@ class UdemyIE(InfoExtractor): | ||||
|                 video_url = source.get('file') or source.get('src') | ||||
|                 if not video_url or not isinstance(video_url, compat_str): | ||||
|                     continue | ||||
|                 if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|                     continue | ||||
|                 format_id = source.get('label') | ||||
|                 f = { | ||||
|                     'url': video_url, | ||||
|   | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/umg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/umg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_filesize, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UMGDeIE(InfoExtractor): | ||||
|     IE_NAME = 'umg:de' | ||||
|     IE_DESC = 'Universal Music Deutschland' | ||||
|     _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803', | ||||
|         'md5': 'ebd90f48c80dcc82f77251eb1902634f', | ||||
|         'info_dict': { | ||||
|             'id': '457803', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Jedes Wort ist Gold wert', | ||||
|             'timestamp': 1513591800, | ||||
|             'upload_date': '20171218', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json( | ||||
|             'https://api.universal-music.de/graphql', | ||||
|             video_id, query={ | ||||
|                 'query': '''{ | ||||
|   universalMusic(channel:16) { | ||||
|     video(id:%s) { | ||||
|       headline | ||||
|       formats { | ||||
|         formatId | ||||
|         url | ||||
|         type | ||||
|         width | ||||
|         height | ||||
|         mimeType | ||||
|         fileSize | ||||
|       } | ||||
|       duration | ||||
|       createdDate | ||||
|     } | ||||
|   } | ||||
| }''' % video_id})['data']['universalMusic']['video'] | ||||
|  | ||||
|         title = video_data['headline'] | ||||
|         hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8' | ||||
|  | ||||
|         thumbnails = [] | ||||
|         formats = [] | ||||
|  | ||||
|         def add_m3u8_format(format_id): | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 hls_url_template % format_id, video_id, 'mp4', | ||||
|                 'm3u8_native', m3u8_id='hls', fatal='False') | ||||
|             if m3u8_formats and m3u8_formats[0].get('height'): | ||||
|                 formats.extend(m3u8_formats) | ||||
|  | ||||
|         for f in video_data.get('formats', []): | ||||
|             f_url = f.get('url') | ||||
|             mime_type = f.get('mimeType') | ||||
|             if not f_url or mime_type == 'application/mxf': | ||||
|                 continue | ||||
|             fmt = { | ||||
|                 'url': f_url, | ||||
|                 'width': int_or_none(f.get('width')), | ||||
|                 'height': int_or_none(f.get('height')), | ||||
|                 'filesize': parse_filesize(f.get('fileSize')), | ||||
|             } | ||||
|             f_type = f.get('type') | ||||
|             if f_type == 'Image': | ||||
|                 thumbnails.append(fmt) | ||||
|             elif f_type == 'Video': | ||||
|                 format_id = f.get('formatId') | ||||
|                 if format_id: | ||||
|                     fmt['format_id'] = format_id | ||||
|                     if mime_type == 'video/mp4': | ||||
|                         add_m3u8_format(format_id) | ||||
|                 urlh = self._request_webpage(f_url, video_id, fatal=False) | ||||
|                 if urlh: | ||||
|                     first_byte = urlh.read(1) | ||||
|                     if first_byte not in (b'F', b'\x00'): | ||||
|                         continue | ||||
|                     formats.append(fmt) | ||||
|         if not formats: | ||||
|             for format_id in (867, 836, 940): | ||||
|                 add_m3u8_format(format_id) | ||||
|         self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': parse_iso8601(video_data.get('createdDate'), ' '), | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -414,7 +414,7 @@ class VKIE(VKBaseIE): | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', | ||||
|             info_page, 'view count', fatal=False)) | ||||
|             info_page, 'view count', default=None)) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, format_url in data.items(): | ||||
|   | ||||
| @@ -2,7 +2,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| @@ -21,7 +20,6 @@ class VootIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', | ||||
|             'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', | ||||
|             'uploader_id': 'batchUser', | ||||
|             'timestamp': 1472162937, | ||||
|             'upload_date': '20160825', | ||||
|             'duration': 1146, | ||||
| @@ -63,6 +61,10 @@ class VootIE(InfoExtractor): | ||||
|  | ||||
|         entry_id = media['EntryId'] | ||||
|         title = media['MediaName'] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id, | ||||
|             video_id, 'mp4', m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description, series, season_number, episode, episode_number = [None] * 5 | ||||
|  | ||||
| @@ -82,9 +84,8 @@ class VootIE(InfoExtractor): | ||||
|                 episode_number = int_or_none(value) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'kaltura:1982551:%s' % entry_id, | ||||
|             'ie_key': KalturaIE.ie_key(), | ||||
|             'extractor_key': 'Kaltura', | ||||
|             'id': entry_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'series': series, | ||||
| @@ -95,4 +96,5 @@ class VootIE(InfoExtractor): | ||||
|             'duration': int_or_none(media.get('Duration')), | ||||
|             'view_count': int_or_none(media.get('ViewCounter')), | ||||
|             'like_count': int_or_none(media.get('like_counter')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -75,6 +75,10 @@ class XHamsterIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # mobile site | ||||
|         'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', | ||||
|         'only_matching': True, | ||||
| @@ -93,7 +97,8 @@ class XHamsterIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') or mobj.group('id_2') | ||||
|         display_id = mobj.group('display_id') or mobj.group('display_id_2') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) | ||||
|         webpage = self._download_webpage(desktop_url, video_id) | ||||
|  | ||||
|         error = self._html_search_regex( | ||||
|             r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', | ||||
| @@ -229,8 +234,8 @@ class XHamsterIE(InfoExtractor): | ||||
|             webpage, 'uploader', default='anonymous') | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|             [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', | ||||
|              r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''], | ||||
|             [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', | ||||
|              r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''], | ||||
|             webpage, 'thumbnail', fatal=False, group='thumbnail') | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
| @@ -274,15 +279,16 @@ class XHamsterIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class XHamsterEmbedIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://xhamster.com/xembed.php?video=3328539', | ||||
|         'info_dict': { | ||||
|             'id': '3328539', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Pen Masturbation', | ||||
|             'timestamp': 1406581861, | ||||
|             'upload_date': '20140728', | ||||
|             'uploader_id': 'anonymous', | ||||
|             'uploader': 'ManyakisArt', | ||||
|             'duration': 5, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|   | ||||
| @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): | ||||
|         # request basic data | ||||
|         basic_data_params = { | ||||
|             'vid': video_id, | ||||
|             'ccode': '0501', | ||||
|             'ccode': '0507', | ||||
|             'client_ip': '192.168.1.1', | ||||
|             'utid': cna, | ||||
|             'client_ts': time.time() / 1000, | ||||
| @@ -241,6 +241,10 @@ class YoukuShowIE(InfoExtractor): | ||||
|         # Ongoing playlist. The initial page is the last one | ||||
|         'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         #  No data-id value. | ||||
|         'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _extract_entries(self, playlist_data_url, show_id, note, query): | ||||
| @@ -276,9 +280,9 @@ class YoukuShowIE(InfoExtractor): | ||||
|             r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id') | ||||
|         # The first reload_id has the same items as first_page | ||||
|         reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page) | ||||
|         entries.extend(initial_entries) | ||||
|         for idx, reload_id in enumerate(reload_ids): | ||||
|             if reload_id == first_page_reload_id: | ||||
|                 entries.extend(initial_entries) | ||||
|                 continue | ||||
|             _, new_entries = self._extract_entries( | ||||
|                 'http://list.youku.com/show/episode', show_id, | ||||
|   | ||||
| @@ -2270,6 +2270,19 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ||||
|             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>', | ||||
|             page, 'title', default=None) | ||||
|  | ||||
|         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref=' | ||||
|         uploader = self._search_regex( | ||||
|             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE, | ||||
|             page, 'uploader', default=None) | ||||
|         mobj = re.search( | ||||
|             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE, | ||||
|             page) | ||||
|         if mobj: | ||||
|             uploader_id = mobj.group('uploader_id') | ||||
|             uploader_url = compat_urlparse.urljoin(url, mobj.group('path')) | ||||
|         else: | ||||
|             uploader_id = uploader_url = None | ||||
|  | ||||
|         has_videos = True | ||||
|  | ||||
|         if not playlist_title: | ||||
| @@ -2280,8 +2293,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ||||
|             except StopIteration: | ||||
|                 has_videos = False | ||||
|  | ||||
|         return has_videos, self.playlist_result( | ||||
|         playlist = self.playlist_result( | ||||
|             self._entries(page, playlist_id), playlist_id, playlist_title) | ||||
|         playlist.update({ | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'uploader_url': uploader_url, | ||||
|         }) | ||||
|  | ||||
|         return has_videos, playlist | ||||
|  | ||||
|     def _check_download_just_video(self, url, playlist_id): | ||||
|         # Check if it's a video-specific URL | ||||
|   | ||||
| @@ -42,6 +42,7 @@ class XAttrMetadataPP(PostProcessor): | ||||
|                 'user.dublincore.format': 'format', | ||||
|             } | ||||
|  | ||||
|             num_written = 0 | ||||
|             for xattrname, infoname in xattr_mapping.items(): | ||||
|  | ||||
|                 value = info.get(infoname) | ||||
| @@ -52,6 +53,7 @@ class XAttrMetadataPP(PostProcessor): | ||||
|  | ||||
|                     byte_value = value.encode('utf-8') | ||||
|                     write_xattr(filename, xattrname, byte_value) | ||||
|                     num_written += 1 | ||||
|  | ||||
|             return [], info | ||||
|  | ||||
| @@ -62,8 +64,8 @@ class XAttrMetadataPP(PostProcessor): | ||||
|         except XAttrMetadataError as e: | ||||
|             if e.reason == 'NO_SPACE': | ||||
|                 self._downloader.report_warning( | ||||
|                     'There\'s no disk space left or disk quota exceeded. ' + | ||||
|                     'Extended attributes are not written.') | ||||
|                     'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' + | ||||
|                     (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) | ||||
|             elif e.reason == 'VALUE_TOO_LONG': | ||||
|                 self._downloader.report_warning( | ||||
|                     'Unable to write extended attributes due to too long values.') | ||||
|   | ||||
| @@ -159,6 +159,8 @@ DATE_FORMATS = ( | ||||
|     '%Y-%m-%dT%H:%M', | ||||
|     '%b %d %Y at %H:%M', | ||||
|     '%b %d %Y at %H:%M:%S', | ||||
|     '%B %d %Y at %H:%M', | ||||
|     '%B %d %Y at %H:%M:%S', | ||||
| ) | ||||
|  | ||||
| DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2017.12.02' | ||||
| __version__ = '2017.12.23' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 sprhawk
					sprhawk