[nytimes] Add support for podcasts
This commit is contained in:
		 John Hawkinson
					John Hawkinson
				
			
				
					committed by
					
						 Sergey M․
						Sergey M․
					
				
			
			
				
	
			
			
			 Sergey M․
						Sergey M․
					
				
			
						parent
						
							b0dfcab60a
						
					
				
				
					commit
					74324a7ac2
				
			| @@ -1,3 +1,4 @@ | |||||||
|  | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import hmac | import hmac | ||||||
| @@ -8,6 +9,7 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     js_to_json, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
|     determine_ext, |     determine_ext, | ||||||
| @@ -96,6 +98,43 @@ class NYTimesBaseIE(InfoExtractor): | |||||||
|             'thumbnails': thumbnails, |             'thumbnails': thumbnails, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |     def _extract_podcast_from_json(self, json, page_id, webpage): | ||||||
|  |         audio_data = self._parse_json(json, page_id, transform_source=js_to_json)['data'] | ||||||
|  |          | ||||||
|  |         description = audio_data['track'].get('description') | ||||||
|  |         if not description: | ||||||
|  |             description = self._html_search_meta(['og:description', 'twitter:description'], webpage) | ||||||
|  |  | ||||||
|  |         episode_title = audio_data['track']['title'] | ||||||
|  |         episode_number = None | ||||||
|  |         episode = audio_data['podcast']['episode'].split() | ||||||
|  |         if episode: | ||||||
|  |             episode_number = int_or_none(episode[-1]) | ||||||
|  |             video_id = episode[-1] | ||||||
|  |         else: | ||||||
|  |             video_id = page_id | ||||||
|  |  | ||||||
|  |         podcast_title = audio_data['podcast']['title'] | ||||||
|  |         title = None | ||||||
|  |         if podcast_title: | ||||||
|  |             title = "%s: %s" % (podcast_title, episode_title) | ||||||
|  |         else: | ||||||
|  |             title = episode_title | ||||||
|  |          | ||||||
|  |         info_dict = { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'creator': audio_data['track'].get('credit'), | ||||||
|  |             'series': podcast_title, | ||||||
|  |             'episode': episode_title, | ||||||
|  |             'episode_number': episode_number, | ||||||
|  |             'url': audio_data['track']['source'], | ||||||
|  |             'duration': audio_data['track'].get('duration'), | ||||||
|  |             'description': description, | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         return info_dict | ||||||
|  |  | ||||||
|  |  | ||||||
| class NYTimesIE(NYTimesBaseIE): | class NYTimesIE(NYTimesBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' |     _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' | ||||||
| @@ -138,16 +177,37 @@ class NYTimesArticleIE(NYTimesBaseIE): | |||||||
|             'upload_date': '20150414', |             'upload_date': '20150414', | ||||||
|             'uploader': 'Matthew Williams', |             'uploader': 'Matthew Williams', | ||||||
|         } |         } | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html', | ||||||
|  |         'md5': 'e0d52040cafb07662acf3c9132db3575', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '20', | ||||||
|  |             'title': "The Run-Up: \u2018He Was Like an Octopus\u2019", | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'description': 'We go behind the story of the two women who told us that Donald Trump touched them inappropriately (which he denies) and check in on Hillary Clinton’s campaign.', | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html', | ||||||
|  |         'md5': '66fb5471d7ef15da98af176dc1af4cb9', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'inside-the-new-york-times-book-review-the-rise-of-hitler', | ||||||
|  |             'title': "The Rise of Hitler", | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'description': 'Adam Kirsch discusses Volker Ullrich\'s new biography of Hitler; Billy Collins talks about his latest collection of poems; and iO Tillett Wright on his new memoir, "Darling Days."', | ||||||
|  |             } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1', |         'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         page_id = self._match_id(url) | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, page_id) | ||||||
|  |  | ||||||
|         video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id') |         video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id', None, False) | ||||||
|  |         if video_id is not None: | ||||||
|  |             return self._extract_video_from_id(video_id) | ||||||
|          |          | ||||||
|         return self._extract_video_from_id(video_id) |         data_json = self._html_search_regex(r'NYTD\.FlexTypes\.push\(({.*})\);', webpage, 'json data') | ||||||
|  |         return self._extract_podcast_from_json(data_json, page_id, webpage) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user