Fix some IEs that didn't return the uploade_date in the YYYYMMDD format
Create a function unified_strdate in utils.py to fix these problems
This commit is contained in:
		| @@ -15,6 +15,7 @@ from youtube_dl.utils import sanitize_filename | |||||||
| from youtube_dl.utils import unescapeHTML | from youtube_dl.utils import unescapeHTML | ||||||
| from youtube_dl.utils import orderedSet | from youtube_dl.utils import orderedSet | ||||||
| from youtube_dl.utils import DateRange | from youtube_dl.utils import DateRange | ||||||
|  | from youtube_dl.utils import unified_strdate | ||||||
|  |  | ||||||
| if sys.version_info < (3, 0): | if sys.version_info < (3, 0): | ||||||
|     _compat_str = lambda b: b.decode('unicode-escape') |     _compat_str = lambda b: b.decode('unicode-escape') | ||||||
| @@ -105,5 +106,11 @@ class TestUtil(unittest.TestCase): | |||||||
|         _firstmilenium = DateRange(end="10000101") |         _firstmilenium = DateRange(end="10000101") | ||||||
|         self.assertTrue("07110427" in _firstmilenium) |         self.assertTrue("07110427" in _firstmilenium) | ||||||
|          |          | ||||||
|  |     def test_unified_dates(self): | ||||||
|  |         self.assertEqual(unified_strdate('December 21, 2010'), '20101221') | ||||||
|  |         self.assertEqual(unified_strdate('8/7/2009'), '20090708') | ||||||
|  |         self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') | ||||||
|  |         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor): | |||||||
|         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) |         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) |             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) | ||||||
|             format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] |             upload_date = unified_strdate(upload_date) | ||||||
|             for expression in format_expressions: |  | ||||||
|                 try: |  | ||||||
|                     upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') |  | ||||||
|                 except: |  | ||||||
|                     pass |  | ||||||
|  |  | ||||||
|         # description |         # description | ||||||
|         video_description = get_element_by_id("eow-description", video_webpage) |         video_description = get_element_by_id("eow-description", video_webpage) | ||||||
| @@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor): | |||||||
|             shortMediaId = mediaId.split(':')[-1] |             shortMediaId = mediaId.split(':')[-1] | ||||||
|             showId = mediaId.split(':')[-2].replace('.com', '') |             showId = mediaId.split(':')[-2].replace('.com', '') | ||||||
|             officialTitle = itemEl.findall('./title')[0].text |             officialTitle = itemEl.findall('./title')[0].text | ||||||
|             officialDate = itemEl.findall('./pubDate')[0].text |             officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) | ||||||
|  |  | ||||||
|             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + |             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + | ||||||
|                         compat_urllib_parse.urlencode({'uri': mediaId})) |                         compat_urllib_parse.urlencode({'uri': mediaId})) | ||||||
| @@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor): | |||||||
|  |  | ||||||
|         streams = json.loads(stream_json) |         streams = json.loads(stream_json) | ||||||
|         mediaURL = streams['http_mp3_128_url'] |         mediaURL = streams['http_mp3_128_url'] | ||||||
|  |         upload_date = unified_strdate(info['created_at']) | ||||||
|  |  | ||||||
|         return [{ |         return [{ | ||||||
|             'id':       info['id'], |             'id':       info['id'], | ||||||
|             'url':      mediaURL, |             'url':      mediaURL, | ||||||
|             'uploader': info['user']['username'], |             'uploader': info['user']['username'], | ||||||
|             'upload_date':  info['created_at'], |             'upload_date': upload_date, | ||||||
|             'title':    info['title'], |             'title':    info['title'], | ||||||
|             'ext':      u'mp3', |             'ext':      u'mp3', | ||||||
|             'description': info['description'], |             'description': info['description'], | ||||||
| @@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor): | |||||||
|             self._downloader.report_warning(u'unable to extract video date') |             self._downloader.report_warning(u'unable to extract video date') | ||||||
|             upload_date = None |             upload_date = None | ||||||
|         else: |         else: | ||||||
|             upload_date = result.group('date').strip() |             upload_date = unified_strdate(result.group('date').strip()) | ||||||
|  |  | ||||||
|         # Get the video uploader |         # Get the video uploader | ||||||
|         result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) |         result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) | ||||||
| @@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor): | |||||||
|         if result is None: |         if result is None: | ||||||
|             self._downloader.report_error(u'unable to extract video title') |             self._downloader.report_error(u'unable to extract video title') | ||||||
|             return |             return | ||||||
|         upload_date = result.group('date') |         upload_date = unified_strdate(result.group('date')) | ||||||
|  |  | ||||||
|         info = {'id': video_id, |         info = {'id': video_id, | ||||||
|                 'url': video_url, |                 'url': video_url, | ||||||
|   | |||||||
| @@ -570,6 +570,21 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|     https_request = http_request |     https_request = http_request | ||||||
|     https_response = http_response |     https_response = http_response | ||||||
|  |  | ||||||
|  | def unified_strdate(date_str): | ||||||
|  |     """Return a string with the date in the format YYYYMMDD""" | ||||||
|  |     upload_date = None | ||||||
|  |     #Replace commas | ||||||
|  |     date_str = date_str.replace(',',' ') | ||||||
|  |     # %z (UTC offset) is only supported in python>=3.2 | ||||||
|  |     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) | ||||||
|  |     format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] | ||||||
|  |     for expression in format_expressions: | ||||||
|  |         try: | ||||||
|  |             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  |     return upload_date | ||||||
|  |  | ||||||
| def date_from_str(date_str): | def date_from_str(date_str): | ||||||
|     """Return a datetime object from a string in the format YYYYMMDD""" |     """Return a datetime object from a string in the format YYYYMMDD""" | ||||||
|     return datetime.datetime.strptime(date_str, "%Y%m%d").date() |     return datetime.datetime.strptime(date_str, "%Y%m%d").date() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz