[yesjapan] Add new extractor (Closes #4466)
This commit is contained in:
		| @@ -510,6 +510,7 @@ from .yahoo import ( | |||||||
|     YahooIE, |     YahooIE, | ||||||
|     YahooSearchIE, |     YahooSearchIE, | ||||||
| ) | ) | ||||||
|  | from .yesjapan import YesJapanIE | ||||||
| from .ynet import YnetIE | from .ynet import YnetIE | ||||||
| from .youjizz import YouJizzIE | from .youjizz import YouJizzIE | ||||||
| from .youku import YoukuIE | from .youku import YoukuIE | ||||||
|   | |||||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/yesjapan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/yesjapan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     HEADRequest, | ||||||
|  |     get_element_by_attribute, | ||||||
|  |     parse_iso8601, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class YesJapanIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html', | ||||||
|  |         'md5': 'f0be416314e5be21a12b499b330c21cf', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '726497834', | ||||||
|  |             'title': 'Japanese in 5! #20 - WA And GA Particle Usages', | ||||||
|  |             'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'timestamp': 1416391590, | ||||||
|  |             'upload_date': '20141119', | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         title = self._og_search_title(webpage) | ||||||
|  |         video_url = self._og_search_video_url(webpage) | ||||||
|  |         description = self._og_search_description(webpage) | ||||||
|  |         thumbnail = self._og_search_thumbnail(webpage) | ||||||
|  |  | ||||||
|  |         timestamp = None | ||||||
|  |         submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage) | ||||||
|  |         if submit_info: | ||||||
|  |             timestamp = parse_iso8601(self._search_regex( | ||||||
|  |                 r'datetime="([^"]+)"', webpage, 'upload date', fatal=False, default=None)) | ||||||
|  |  | ||||||
|  |         # attempt to resolve the final URL in order to get a proper extension | ||||||
|  |         redirect_req = HEADRequest(video_url) | ||||||
|  |         req = self._request_webpage( | ||||||
|  |             redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False) | ||||||
|  |         if req: | ||||||
|  |             video_url = req.geturl() | ||||||
|  |  | ||||||
|  |         formats = [{ | ||||||
|  |             'format_id': 'sd', | ||||||
|  |             'url': video_url, | ||||||
|  |         }] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'formats': formats, | ||||||
|  |             'description': description, | ||||||
|  |             'timestamp': timestamp, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |         } | ||||||
		Reference in New Issue
	
	Block a user
	 Naglis Jonaitis
					Naglis Jonaitis