[yesjapan] Add new extractor (Closes #4466)
This commit is contained in:
		| @@ -510,6 +510,7 @@ from .yahoo import ( | ||||
|     YahooIE, | ||||
|     YahooSearchIE, | ||||
| ) | ||||
| from .yesjapan import YesJapanIE | ||||
| from .ynet import YnetIE | ||||
| from .youjizz import YouJizzIE | ||||
| from .youku import YoukuIE | ||||
|   | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/yesjapan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/yesjapan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     HEADRequest, | ||||
|     get_element_by_attribute, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YesJapanIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html', | ||||
|         'md5': 'f0be416314e5be21a12b499b330c21cf', | ||||
|         'info_dict': { | ||||
|             'id': '726497834', | ||||
|             'title': 'Japanese in 5! #20 - WA And GA Particle Usages', | ||||
|             'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....', | ||||
|             'ext': 'mp4', | ||||
|             'timestamp': 1416391590, | ||||
|             'upload_date': '20141119', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage) | ||||
|         video_url = self._og_search_video_url(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         timestamp = None | ||||
|         submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage) | ||||
|         if submit_info: | ||||
|             timestamp = parse_iso8601(self._search_regex( | ||||
|                 r'datetime="([^"]+)"', webpage, 'upload date', fatal=False, default=None)) | ||||
|  | ||||
|         # attempt to resolve the final URL in order to get a proper extension | ||||
|         redirect_req = HEADRequest(video_url) | ||||
|         req = self._request_webpage( | ||||
|             redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False) | ||||
|         if req: | ||||
|             video_url = req.geturl() | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': video_url, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
		Reference in New Issue
	
	Block a user
	 Naglis Jonaitis
					Naglis Jonaitis