Compare commits

...

10 Commits

Author SHA1 Message Date
Sergey M․
d2161cade5 release 2016.06.16 2016-06-16 22:40:55 +07:00
Sergey M․
27e5fa8198 [cda] Fix extraction (Closes #9803) 2016-06-16 22:33:12 +07:00
Yen Chi Hsuan
efbd1eb51a [wimp] Fix extraction and update _TESTS 2016-06-16 12:27:21 +08:00
Yen Chi Hsuan
369ff75081 [jwplatform] Improved JWPlayer support 2016-06-16 12:26:45 +08:00
Yen Chi Hsuan
47212f7bcb [utils] Don't transform numbers not starting with a zero
Fix test_Viidea and maybe others
2016-06-16 11:00:54 +08:00
Sergey M․
4c93ee8d14 [imdb] Improve _VALID_URL (Closes #9788) 2016-06-15 22:34:55 +07:00
Yen Chi Hsuan
8bc4dbb1af [wrzuta.pl] Detect error and update _TESTS 2016-06-14 11:14:59 +08:00
Sergey M․
6c3760292c [pornhub] Improve title extraction (Closes #9777) 2016-06-14 04:57:59 +07:00
Sergey M․
4cef70db6c [devscripts/release.sh] Add flag for gpg-sign commits 2016-06-14 03:16:56 +07:00
Sergey M․
ff4af6ec59 [lynda] Remove superfluous _NETRC_MACHINE 2016-06-14 02:49:33 +07:00
13 changed files with 117 additions and 43 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.16**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.14
[debug] youtube-dl version 2016.06.16
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -15,6 +15,7 @@
set -e
skip_tests=true
gpg_sign_commits=""
buildserver='localhost:8142'
while true
@@ -24,6 +25,10 @@ case "$1" in
skip_tests=false
shift
;;
--gpg-sign-commits|-S)
gpg_sign_commits="-S"
shift
;;
--buildserver)
buildserver="$2"
shift 2
@@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
git commit -m "release $version"
git commit $gpg_sign_commits -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..."
git tag -s -m "Release $version" "$version"
@@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages
"$ROOT/devscripts/gh-pages/update-copyright.py"
"$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update
git commit -m "release $version"
git commit $gpg_sign_commits -m "release $version"
git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)

View File

@@ -44,8 +44,8 @@
- **appletrailers:section**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**: Saarländischer Rundfunk
- **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:cinema**

View File

@@ -640,6 +640,9 @@ class TestUtil(unittest.TestCase):
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
}''')
inp = '''{"foo":101}'''
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@@ -58,7 +58,8 @@ class CDAIE(InfoExtractor):
def extract_format(page, version):
unpacked = decode_packed_codes(page)
format_url = self._search_regex(
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
'%s url' % version, fatal=False, group='url')
if not format_url:
return
f = {
@@ -75,7 +76,8 @@ class CDAIE(InfoExtractor):
info_dict['formats'].append(f)
if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex(
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
extract_format(webpage, 'default')

View File

@@ -12,7 +12,7 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor):
}, {
'url': 'http://www.imdb.com/video/_/vi2524815897',
'only_matching': True,
}, {
'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
'only_matching': True,
}, {
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -12,9 +12,35 @@ from ..utils import (
class JWPlatformBaseIE(InfoExtractor):
@staticmethod
def _find_jwplayer_data(webpage):
# TODO: Merge this with JWPlayer-related codes in generic.py
mobj = re.search(
'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
webpage)
if mobj:
return mobj.group('options')
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
jwplayer_data = self._parse_json(
self._find_jwplayer_data(webpage), video_id)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
jwplayer_data = {'playlist': [jwplayer_data]}
video_data = jwplayer_data['playlist'][0]
# JWPlayer backward compatibility: flattened sources
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
if 'sources' not in video_data:
video_data['sources'] = [video_data]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])

View File

@@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
_NETRC_MACHINE = 'lynda'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
@@ -39,7 +40,25 @@ class PornHubIE(InfoExtractor):
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
}
},
}, {
# non-ASCII title
'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
'info_dict': {
'id': '1331683002',
'ext': 'mp4',
'title': '重庆婷婷女王足交',
'uploader': 'cj397186295',
'duration': 1753,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
@@ -76,19 +95,25 @@ class PornHubIE(InfoExtractor):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
'twitter:title', webpage, default=None) or self._search_regex(
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
webpage, 'title', group='title')
flashvars = self._parse_json(
self._search_regex(
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id)
if flashvars:
video_title = flashvars.get('video_title')
thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration'))
else:
video_title, thumbnail, duration = [None] * 3
if not video_title:
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
title, thumbnail, duration = [None] * 3
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
@@ -137,7 +162,7 @@ class PornHubIE(InfoExtractor):
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,

View File

@@ -1,29 +1,33 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .jwplatform import JWPlatformBaseIE
class WimpIE(InfoExtractor):
class WimpIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
'url': 'http://www.wimp.com/maru-is-exhausted/',
'md5': 'ee21217ffd66d058e8b16be340b74883',
'info_dict': {
'id': 'maruexhausted',
'id': 'maru-is-exhausted',
'ext': 'mp4',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}, {
'url': 'http://www.wimp.com/clowncar/',
'md5': '4e2986c793694b55b37cf92521d12bb4',
'md5': '5c31ad862a90dc5b1f023956faec13fe',
'info_dict': {
'id': 'clowncar',
'id': 'cG4CEr2aiSg',
'ext': 'webm',
'title': 'It\'s like a clown car.',
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
'title': 'Basset hound clown car...incredible!',
'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
'upload_date': '20140303',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
@@ -41,14 +45,13 @@ class WimpIE(InfoExtractor):
'ie_key': YoutubeIE.ie_key(),
}
video_url = self._search_regex(
r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'video URL', group='url')
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
return {
info_dict.update({
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}
})
return info_dict

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
qualities,
remove_start,
@@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor):
'uploader_id': 'laboratoriumdextera',
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
},
'skip': 'Redirected to wrzuta.pl',
}, {
'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus',
'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d',
'info_dict': {
'id': '063jOPX5ue2',
'ext': 'ogg',
'title': 'Liber & Natalia Szroeder - Teraz Ty',
'duration': 203,
'uploader_id': 'jolka85',
'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
'id': '01xBFabGXu6',
'ext': 'mp3',
'title': 'James Horner - Into The Na\'vi World [Bonus]',
'description': 'md5:30a70718b2cd9df3120fce4445b0263b',
'duration': 95,
'uploader_id': 'vexling',
},
}]
@@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor):
typ = mobj.group('typ')
uploader = mobj.group('uploader')
webpage = self._download_webpage(url, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
if urlh.geturl() == 'http://www.wrzuta.pl/':
raise ExtractorError('Video removed', expected=True)
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])

View File

@@ -1970,7 +1970,7 @@ def js_to_json(code):
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
/\*.*?\*/|,(?=\s*[\]}])|
[a-zA-Z_][.a-zA-Z_0-9]*|
(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
[0-9]+(?=\s*:)
''', fix_kv, code)

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.06.14'
__version__ = '2016.06.16'