Compare commits

...

18 Commits

Author SHA1 Message Date
Sergey M․
011bd3221b release 2016.06.23.1 2016-06-23 09:42:56 +07:00
Sergey M․
b46eabecd3 [jsinterp] Relax JS function regex (Closes #9863) 2016-06-23 09:41:34 +07:00
Remita Amine
0437307a41 [nbc:nbcnews] improve extraction and add msnbc to the extractor 2016-06-23 01:36:19 +01:00
Remita Amine
22b7ac13ef [tf1] fix wat id extraction(closes #9862) 2016-06-23 00:14:34 +01:00
Sergey M․
96f88e91b7 release 2016.06.23 2016-06-23 04:29:34 +07:00
Sergey M․
3331a4644d [vk] Remove unused import 2016-06-23 04:27:10 +07:00
Sergey M․
adf1921dc1 [xnxx] Improve _VALID_URL (Closes #9858) 2016-06-23 04:26:49 +07:00
Sergey M․
97674f0419 [xnxx] Replace test 2016-06-23 04:24:00 +07:00
rr-
73843ae8ac [xnxx] fix url regex
The pattern has changed from "video123412" to "video-o8xa19".
The changes maintain backwards compatibility with old-style URLs.
2016-06-23 04:19:55 +07:00
Sergey M․
f2bb8c036a [vk] Modernize 2016-06-23 04:18:43 +07:00
Sergey M․
75ca6bcee2 [vk] Workaround buggy new.vk.com Set-Cookie headers 2016-06-23 04:17:13 +07:00
Sergey M․
089657ed1f [vimeo:album] Add paged example URL 2016-06-23 02:00:03 +07:00
Sergey M․
b5eab86c24 [vimeo:album] Impove _VALID_URL 2016-06-23 01:56:58 +07:00
Sergey M․
c8e3e0974b [vimeo:channel] Improve playlist extraction 2016-06-23 01:28:36 +07:00
Purdea Andrei
dfc8f46e1c [vimeo:channel] Add video id to url_result
This will allow us to decide much faster that we don't want an already archived video,
and will allow having to download webpages for each video that has already been downloaded,
thus significantly speeding up the archival of channels that have no new content.
2016-06-23 01:26:27 +07:00
Sergey M․
c143ddce5d [vimeo] Override original URL only when necessary 2016-06-23 00:51:36 +07:00
Jaime Marquínez Ferrándiz
169d836feb lazy-extractors: Fix after commit 6e6b9f600f
The problem was in the following code:

    class ArteTVPlus7IE(ArteTVBaseIE):

        ...

        @classmethod
        def suitable(cls, url):
            return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)

And its sublcasses like ArteTVCinemaIE.

Since in the lazy_extractors.py file ArteTVCinemaIE was not a subclass of ArteTVPlus7IE, super(ArteTVPlus7IE, cls) failed.

To fix it we have to make it a subclass. Since the order of _ALL_CLASSES is arbitrary we must sort them so that the base classes are defined first. We also must add base classes like YoutubeBaseInfoExtractor.
2016-06-22 19:20:50 +02:00
TRox1972
6ae938b295 [Vine] Extract view count 2016-06-22 23:57:35 +07:00
12 changed files with 191 additions and 144 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.22**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.22
[debug] youtube-dl version 2016.06.23.1
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
from youtube_dl.extractor import _ALL_CLASSES
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read()
module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
module_contents = [
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
ie_template = '''
class {name}(LazyLoadExtractor):
class {name}({bases}):
_VALID_URL = {valid_url!r}
_module = '{module}'
'''
@@ -34,10 +36,20 @@ make_valid_template = '''
'''
def get_base_name(base):
if base is InfoExtractor:
return 'LazyLoadExtractor'
elif base is SearchInfoExtractor:
return 'LazyLoadSearchExtractor'
else:
return base.__name__
def build_lazy_ie(ie, name):
valid_url = getattr(ie, '_VALID_URL', None)
s = ie_template.format(
name=name,
bases=', '.join(map(get_base_name, ie.__bases__)),
valid_url=valid_url,
module=ie.__module__)
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
@@ -47,12 +59,35 @@ def build_lazy_ie(ie, name):
s += make_valid_template.format(valid_url=ie._make_valid_url())
return s
# find the correct sorting and add the required base classes so that sublcasses
# can be correctly created
classes = _ALL_CLASSES[:-1]
ordered_cls = []
while classes:
for c in classes[:]:
bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
stop = False
for b in bases:
if b not in classes and b not in ordered_cls:
if b.__name__ == 'GenericIE':
exit()
classes.insert(0, b)
stop = True
if stop:
break
if all(b in ordered_cls for b in bases):
ordered_cls.append(c)
classes.remove(c)
break
ordered_cls.append(_ALL_CLASSES[-1])
names = []
for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
name = ie.ie_key() + 'IE'
for ie in ordered_cls:
name = ie.__name__
src = build_lazy_ie(ie, name)
module_contents.append(src)
names.append(name)
if ie in _ALL_CLASSES:
names.append(name)
module_contents.append(
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))

View File

@@ -44,8 +44,8 @@
- **appletrailers:section**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**: Saarländischer Rundfunk
- **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:cinema**
@@ -385,7 +385,6 @@
- **MovieFap**
- **Moviezine**
- **MPORA**
- **MSNBC**
- **MTV**
- **mtv.de**
- **mtviggy.com**

View File

@@ -480,7 +480,6 @@ from .nbc import (
NBCNewsIE,
NBCSportsIE,
NBCSportsVPlayerIE,
MSNBCIE,
)
from .ndr import (
NDRIE,

View File

@@ -9,10 +9,6 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
int_or_none,
HEADRequest,
parse_iso8601,
)
@@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor):
class NBCNewsIE(ThePlatformIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
(?:video/.+?/(?P<id>\d+)|
([^/]+/)*(?P<display_id>[^/?]+))
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
'''
_TESTS = [
@@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE):
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'uploader': 'NBCU-NEWS',
'timestamp': 1401363060,
'upload_date': '20140529',
},
},
{
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
'md5': 'fdbf39ab73a72df5896b6234ff98518a',
'info_dict': {
'id': 'Wjf9EDR3A_60',
'id': '529953347624',
'ext': 'mp4',
'title': 'FULL EPISODE: Family Business',
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
@@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE):
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900,
'uploader': 'NBCU-NEWS',
'upload_date': '20150205',
},
},
{
@@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE):
'info_dict': {
'id': '529953347624',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
'upload_date': '20150922',
'timestamp': 1442917800,
'uploader': 'NBCU-NEWS',
},
'expected_warnings': ['http-6000 is not available']
},
{
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
@@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE):
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420',
'timestamp': 1461152093,
'uploader': 'NBCU-NEWS',
},
},
{
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': '314487875924',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE):
}
else:
# "feature" and "nightly-news" pages use theplatform.com
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
bootstrap = self._parse_json(
bootstrap_json, display_id, transform_source=unescapeHTML)
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
else:
info = bootstrap
video_id = info['mpxId']
title = info['title']
subtitles = {}
caption_links = info.get('captionLinks')
if caption_links:
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
sub_url = caption_links.get(sub_key)
if sub_url:
subtitles.setdefault('en', []).append({
'url': sub_url,
'ext': sub_ext,
})
formats = []
for video_asset in info['videoAssets']:
video_url = video_asset.get('publicUrl')
if not video_url:
continue
container = video_asset.get('format')
asset_type = video_asset.get('assetType') or ''
if container == 'ISM' or asset_type == 'FireTV-Once':
continue
elif asset_type == 'OnceURL':
tp_formats, tp_subtitles = self._extract_theplatform_smil(
video_url, video_id)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
video_id = mobj.group('mpx_id')
if not video_id.isdigit():
webpage = self._download_webpage(url, video_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
else:
tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000)
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
video_url = update_url_query(
video_url, {'format': 'redirect'})
# resolve the url so that we can check availability and detect the correct extension
head = self._request_webpage(
HEADRequest(video_url), video_id,
'Checking %s url' % format_id,
'%s is not available' % format_id,
fatal=False)
if head:
video_url = head.geturl()
formats.append({
'format_id': format_id,
'url': video_url,
'width': int_or_none(video_asset.get('width')),
'height': int_or_none(video_asset.get('height')),
'tbr': tbr,
'container': video_asset.get('format'),
})
self._sort_formats(formats)
info = bootstrap
video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'description': info.get('description'),
'thumbnail': info.get('thumbnail'),
'duration': int_or_none(info.get('duration')),
'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')),
'formats': formats,
'subtitles': subtitles,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
'ie_key': 'ThePlatformFeed',
}
class MSNBCIE(InfoExtractor):
# https URLs redirect to corresponding http ones
_VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_meta('embedURL', webpage)
return self.url_result(embed_url)

View File

@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
\.
)?
vimeo(?P<pro>pro)?\.com/
(?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
@@ -227,8 +227,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
{
'url': 'http://vimeo.com/channels/keypeele/75629013',
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
'note': 'Video is freely available via original URL '
'and protected with password when accessed via http://vimeo.com/75629013',
'info_dict': {
'id': '75629013',
'ext': 'mp4',
@@ -272,7 +270,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
{
# contains original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'md5': '2d9f5475e0537f013d0073e812ab89e6',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
@@ -284,6 +282,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
},
{
# only available via https://vimeo.com/channels/tributes/6213729 and
# not via https://vimeo.com/6213729
'url': 'https://vimeo.com/channels/tributes/6213729',
'info_dict': {
'id': '6213729',
'ext': 'mp4',
'title': 'Vimeo Tribute: The Shining',
'uploader': 'Casey Donahue',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue',
'uploader_id': 'caseydonahue',
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download JSON metadata'],
},
{
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
'only_matching': True,
},
{
'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status',
@@ -293,6 +314,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
'only_matching': True,
},
{
'url': 'https://vimeo.com/album/2632481/video/79010983',
'only_matching': True,
},
{
# source file returns 403: Forbidden
'url': 'https://vimeo.com/7809605',
@@ -369,7 +394,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
orig_url = url
if mobj.group('pro') or mobj.group('player'):
url = 'https://player.vimeo.com/video/' + video_id
else:
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@@ -630,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
# Try extracting href first since not all videos are available via
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
clips = re.findall(
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
if clips:
for video_id, video_url in clips:
yield self.url_result(
compat_urlparse.urljoin(base_url, video_url),
VimeoIE.ie_key(), video_id=video_id)
# More relaxed fallback
else:
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
yield self.url_result(
'https://vimeo.com/%s' % video_id,
VimeoIE.ie_key(), video_id=video_id)
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
@@ -668,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',
@@ -688,6 +726,13 @@ class VimeoAlbumIE(VimeoChannelIE):
'params': {
'videopassword': 'youtube-dl',
}
}, {
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
'only_matching': True,
}, {
# TODO: respect page number
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
'only_matching': True,
}]
def _page_url(self, base_url, pagenum):

View File

@@ -24,6 +24,7 @@ class VineIE(InfoExtractor):
'upload_date': '20130519',
'uploader': 'Jack Dorsey',
'uploader_id': '76',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
@@ -39,6 +40,7 @@ class VineIE(InfoExtractor):
'upload_date': '20140815',
'uploader': 'Mars Ruiz',
'uploader_id': '1102363502380728320',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
@@ -54,6 +56,7 @@ class VineIE(InfoExtractor):
'upload_date': '20130430',
'uploader': 'Z3k3',
'uploader_id': '936470460173008896',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
@@ -71,6 +74,7 @@ class VineIE(InfoExtractor):
'upload_date': '20150705',
'uploader': 'Pimry_zaa',
'uploader_id': '1135760698325307392',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
@@ -109,6 +113,7 @@ class VineIE(InfoExtractor):
'upload_date': unified_strdate(data.get('created')),
'uploader': username,
'uploader_id': data.get('userIdStr'),
'view_count': int_or_none(data.get('loops', {}).get('count')),
'like_count': int_or_none(data.get('likes', {}).get('count')),
'comment_count': int_or_none(data.get('comments', {}).get('count')),
'repost_count': int_or_none(data.get('reposts', {}).get('count')),

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
import json
import sys
from .common import InfoExtractor
from ..compat import compat_str
@@ -10,7 +11,6 @@ from ..utils import (
ExtractorError,
int_or_none,
orderedSet,
sanitized_Request,
str_to_int,
unescapeHTML,
unified_strdate,
@@ -190,7 +190,7 @@ class VKIE(InfoExtractor):
if username is None:
return
login_page = self._download_webpage(
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
@@ -200,11 +200,26 @@ class VKIE(InfoExtractor):
'pass': password.encode('cp1251'),
})
request = sanitized_Request(
'https://login.vk.com/?act=login',
urlencode_postdata(login_form))
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
# and expects the first one to be set rather than second (see
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
# As of RFC6265 the newer one cookie should be set into cookie store
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
cookies = url_handle.headers.get('Set-Cookie')
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
if remixlhk:
value, domain = remixlhk.groups()
self._set_cookie(domain, 'remixlhk', value)
login_page = self._download_webpage(
request, None, note='Logging in as %s' % username)
'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username,
data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page):
raise ExtractorError(

View File

@@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote
class XNXXIE(InfoExtractor):
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
_TEST = {
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
'md5': '0831677e2b4761795f68d417e0b7b445',
_VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
_TESTS = [{
'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
'info_dict': {
'id': '1135332',
'id': '55awb78',
'ext': 'flv',
'title': 'lida » Naked Funny Actress (5)',
'title': 'Skyrim Test Video',
'age_limit': 18,
}
}
},
}, {
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
'only_matching': True,
}, {
'url': 'http://www.xnxx.com/video-55awb78/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.06.22'
__version__ = '2016.06.23.1'