[extractor/common] Extract upload date from SMIL
This commit is contained in:
		@@ -39,6 +39,7 @@ from ..utils import (
 | 
				
			|||||||
    RegexNotFoundError,
 | 
					    RegexNotFoundError,
 | 
				
			||||||
    sanitize_filename,
 | 
					    sanitize_filename,
 | 
				
			||||||
    unescapeHTML,
 | 
					    unescapeHTML,
 | 
				
			||||||
 | 
					    unified_strdate,
 | 
				
			||||||
    url_basename,
 | 
					    url_basename,
 | 
				
			||||||
    xpath_text,
 | 
					    xpath_text,
 | 
				
			||||||
    xpath_with_ns,
 | 
					    xpath_with_ns,
 | 
				
			||||||
@@ -1044,6 +1045,7 @@ class InfoExtractor(object):
 | 
				
			|||||||
        video_id = os.path.splitext(url_basename(smil_url))[0]
 | 
					        video_id = os.path.splitext(url_basename(smil_url))[0]
 | 
				
			||||||
        title = None
 | 
					        title = None
 | 
				
			||||||
        description = None
 | 
					        description = None
 | 
				
			||||||
 | 
					        upload_date = None
 | 
				
			||||||
        for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
 | 
					        for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
 | 
				
			||||||
            name = meta.attrib.get('name')
 | 
					            name = meta.attrib.get('name')
 | 
				
			||||||
            content = meta.attrib.get('content')
 | 
					            content = meta.attrib.get('content')
 | 
				
			||||||
@@ -1053,6 +1055,8 @@ class InfoExtractor(object):
 | 
				
			|||||||
                title = content
 | 
					                title = content
 | 
				
			||||||
            elif not description and name in ('description', 'abstract'):
 | 
					            elif not description and name in ('description', 'abstract'):
 | 
				
			||||||
                description = content
 | 
					                description = content
 | 
				
			||||||
 | 
					            elif not upload_date and name == 'date':
 | 
				
			||||||
 | 
					                upload_date = unified_strdate(content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        thumbnails = [{
 | 
					        thumbnails = [{
 | 
				
			||||||
            'id': image.get('type'),
 | 
					            'id': image.get('type'),
 | 
				
			||||||
@@ -1065,6 +1069,7 @@ class InfoExtractor(object):
 | 
				
			|||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
            'title': title or video_id,
 | 
					            'title': title or video_id,
 | 
				
			||||||
            'description': description,
 | 
					            'description': description,
 | 
				
			||||||
 | 
					            'upload_date': upload_date,
 | 
				
			||||||
            'thumbnails': thumbnails,
 | 
					            'thumbnails': thumbnails,
 | 
				
			||||||
            'formats': formats,
 | 
					            'formats': formats,
 | 
				
			||||||
            'subtitles': subtitles,
 | 
					            'subtitles': subtitles,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user