Compare commits
169 Commits
2016.09.15
...
2016.10.12
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5c4bfd4da5 | ||
![]() |
7104ae799c | ||
![]() |
bcd6276520 | ||
![]() |
591e384552 | ||
![]() |
9feb1c9731 | ||
![]() |
a093cfc78b | ||
![]() |
6f20b65e72 | ||
![]() |
cea364f70c | ||
![]() |
55642487f0 | ||
![]() |
3d643f4cec | ||
![]() |
c452e69d3d | ||
![]() |
555787d717 | ||
![]() |
f165ca70eb | ||
![]() |
27b8d2ee95 | ||
![]() |
71cdcb2331 | ||
![]() |
176006a120 | ||
![]() |
65f4c1de3d | ||
![]() |
b0082629a9 | ||
![]() |
8204c73352 | ||
![]() |
2b51dac1f9 | ||
![]() |
f68901e50a | ||
![]() |
3adb9d119e | ||
![]() |
1dd58e14d8 | ||
![]() |
dd4291f729 | ||
![]() |
888f8d6ba4 | ||
![]() |
f475e88121 | ||
![]() |
3c6b3bf221 | ||
![]() |
38588ab977 | ||
![]() |
85bcdd081c | ||
![]() |
9dcd6fd3aa | ||
![]() |
98763ee354 | ||
![]() |
3d83a1ae92 | ||
![]() |
c0a7b9b348 | ||
![]() |
831a34caa2 | ||
![]() |
09b9c45e24 | ||
![]() |
33898fb19c | ||
![]() |
017eb82934 | ||
![]() |
b1d798887e | ||
![]() |
0a33bb2cb2 | ||
![]() |
185744f92f | ||
![]() |
7232e54813 | ||
![]() |
6eb5503b12 | ||
![]() |
539c881bfc | ||
![]() |
c1b2a0858c | ||
![]() |
215ff6e0f3 | ||
![]() |
dcdb292fdd | ||
![]() |
c1084ddb0c | ||
![]() |
ee5de4e38e | ||
![]() |
25291b979a | ||
![]() |
567a5996ca | ||
![]() |
6c152ce20f | ||
![]() |
26406d33c7 | ||
![]() |
703b3afa93 | ||
![]() |
99ed78c79e | ||
![]() |
fd15264172 | ||
![]() |
bd26441205 | ||
![]() |
b19e275d99 | ||
![]() |
f6ba581f89 | ||
![]() |
6d2549fb4f | ||
![]() |
4da4516973 | ||
![]() |
e1e97c2446 | ||
![]() |
53a7e3d287 | ||
![]() |
d54739a2e6 | ||
![]() |
63e0fd5bcc | ||
![]() |
9c51a24642 | ||
![]() |
9bd7bd0b80 | ||
![]() |
4a76b73c6c | ||
![]() |
e295618f9e | ||
![]() |
d7753d1948 | ||
![]() |
eaf9b22f94 | ||
![]() |
a1001f47fc | ||
![]() |
1609782258 | ||
![]() |
de6babf922 | ||
![]() |
b0582fc806 | ||
![]() |
af33dd8ee7 | ||
![]() |
70d7b323b6 | ||
![]() |
a7ee8a00f4 | ||
![]() |
c6eed6b8c0 | ||
![]() |
3aa3953d28 | ||
![]() |
efa97bdcf1 | ||
![]() |
475f8a4580 | ||
![]() |
93aa0b6318 | ||
![]() |
0ce26ef228 | ||
![]() |
0d72ff9c51 | ||
![]() |
a56e74e271 | ||
![]() |
f533490bb7 | ||
![]() |
8bfda726c2 | ||
![]() |
8f0cf20ab9 | ||
![]() |
c8f45f763c | ||
![]() |
dd2cffeeec | ||
![]() |
cdfcc4ce95 | ||
![]() |
e384552590 | ||
![]() |
1a2fbe322e | ||
![]() |
f9dd86a112 | ||
![]() |
2342733f85 | ||
![]() |
93933c9819 | ||
![]() |
d75d9e343e | ||
![]() |
72c3d02d29 | ||
![]() |
d3dbb46330 | ||
![]() |
fffb9cff94 | ||
![]() |
d3c97bad61 | ||
![]() |
2d5b4af007 | ||
![]() |
f1ee462c82 | ||
![]() |
5742c18bc1 | ||
![]() |
ddb19772d5 | ||
![]() |
a3d8b38168 | ||
![]() |
e590b7ff9e | ||
![]() |
f3625cc4ca | ||
![]() |
2d3d29976b | ||
![]() |
493353c7fd | ||
![]() |
0a078550b9 | ||
![]() |
f92bb612c6 | ||
![]() |
ddde91952f | ||
![]() |
63c583eb2c | ||
![]() |
7fd57de6fb | ||
![]() |
e71a450956 | ||
![]() |
27e99078d3 | ||
![]() |
6f126d903f | ||
![]() |
7518a61d41 | ||
![]() |
8e45e1cc4d | ||
![]() |
f0bc5a8609 | ||
![]() |
a54ffb8aa7 | ||
![]() |
8add4bfecb | ||
![]() |
0711995bca | ||
![]() |
5968d7d2fe | ||
![]() |
e6332059ac | ||
![]() |
8eec691e8a | ||
![]() |
24628cf7db | ||
![]() |
71ad00c09f | ||
![]() |
45cae3b021 | ||
![]() |
4ddcb5999d | ||
![]() |
628406db96 | ||
![]() |
e3d6bdc8fc | ||
![]() |
0a439c5c4c | ||
![]() |
1978540a51 | ||
![]() |
12f211d0cb | ||
![]() |
3a5a18705f | ||
![]() |
1ae0ae5db0 | ||
![]() |
f62a77b99a | ||
![]() |
4bfd294e2f | ||
![]() |
e33a7253b2 | ||
![]() |
c38f06818d | ||
![]() |
cb57386873 | ||
![]() |
59fd8f931d | ||
![]() |
70b4cf9b1b | ||
![]() |
cc764a6da8 | ||
![]() |
d8dbf8707d | ||
![]() |
a1da888d0c | ||
![]() |
3acff9423d | ||
![]() |
9ca93b99d1 | ||
![]() |
14ae11efab | ||
![]() |
190d2027d0 | ||
![]() |
26394d021d | ||
![]() |
30d0b549be | ||
![]() |
86f4d14f81 | ||
![]() |
21d21b0c72 | ||
![]() |
b4c1d6e800 | ||
![]() |
a0d5077c8d | ||
![]() |
584d6f3457 | ||
![]() |
e14c82bd6b | ||
![]() |
c51a7f0b2f | ||
![]() |
d05ef09d9d | ||
![]() |
30d9e20938 | ||
![]() |
fc86d4eed0 | ||
![]() |
7d273a387a | ||
![]() |
6ad0219556 | ||
![]() |
98b7506e96 | ||
![]() |
52dc8a9b3f | ||
![]() |
9d8985a165 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.12**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.09.15
|
||||
[debug] youtube-dl version 2016.10.12
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -10,8 +10,13 @@
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence)
|
||||
|
||||
### What is the purpose of your *pull request*?
|
||||
- [ ] Bug fix
|
||||
- [ ] Improvement
|
||||
- [ ] New extractor
|
||||
- [ ] New feature
|
||||
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@ updates_key.pem
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.3gp
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
|
2
AUTHORS
2
AUTHORS
@@ -26,7 +26,7 @@ Albert Kim
|
||||
Pierre Rudloff
|
||||
Huarong Huo
|
||||
Ismael Mejía
|
||||
Steffan 'Ruirize' James
|
||||
Steffan Donal
|
||||
Andras Elso
|
||||
Jelle van der Waa
|
||||
Marcin Cieślak
|
||||
|
@@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
140
ChangeLog
140
ChangeLog
@@ -1,3 +1,143 @@
|
||||
version 2016.10.12
|
||||
|
||||
Core
|
||||
+ Support HTML media elements without child nodes
|
||||
* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
|
||||
|
||||
Extractors
|
||||
* [dailymotion] Fix extraction (#10901)
|
||||
* [vimeo:review] Fix extraction (#10900)
|
||||
* [nhl] Correctly handle invalid formats (#10713)
|
||||
* [footyroom] Fix extraction (#10810)
|
||||
* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
|
||||
+ [hbo] Add support for episode pages (#10892)
|
||||
* [allocine] Fix extraction (#10860)
|
||||
+ [nextmedia] Recognize action news on AppleDaily
|
||||
* [lego] Improve info extraction and bypass geo restriction (#10872)
|
||||
|
||||
|
||||
version 2016.10.07
|
||||
|
||||
Extractors
|
||||
+ [iprima] Detect geo restriction
|
||||
* [facebook] Fix video extraction (#10846)
|
||||
+ [commonprotocols] Support direct MMS links (#10838)
|
||||
+ [generic] Add support for multiple vimeo embeds (#10862)
|
||||
+ [nzz] Add support for nzz.ch (#4407)
|
||||
+ [npo] Detect geo restriction
|
||||
+ [npo] Add support for 2doc.nl (#10842)
|
||||
+ [lego] Add support for lego.com (#10369)
|
||||
+ [tonline] Add support for t-online.de (#10376)
|
||||
* [techtalks] Relax URL regular expression (#10840)
|
||||
* [youtube:live] Extend URL regular expression (#10839)
|
||||
+ [theweatherchannel] Add support for weather.com (#7188)
|
||||
+ [thisoldhouse] Add support for thisoldhouse.com (#10837)
|
||||
+ [nhl] Add support for wch2016.com (#10833)
|
||||
* [pornoxo] Use JWPlatform to improve metadata extraction
|
||||
|
||||
|
||||
version 2016.10.02
|
||||
|
||||
Core
|
||||
* Fix possibly lost extended attributes during post-processing
|
||||
+ Support pyxattr as well as python-xattr for --xattrs and
|
||||
--xattr-set-filesize (#9054)
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Support DASH streams in JWPlayer
|
||||
+ [jwplatform] Support old-style JWPlayer playlists
|
||||
+ [byutv:event] Add extractor
|
||||
* [periscope:user] Fix extraction (#10820)
|
||||
* [dctp] Fix extraction (#10734)
|
||||
+ [instagram] Extract video dimensions (#10790)
|
||||
+ [tvland] Extend URL regular expression (#10812)
|
||||
+ [vgtv] Add support for tv.aftonbladet.se (#10800)
|
||||
- [aftonbladet] Remove extractor
|
||||
* [vk] Fix timestamp and view count extraction (#10760)
|
||||
+ [vk] Add support for running and finished live streams (#10799)
|
||||
+ [leeco] Recognize more Le Sports URLs (#10794)
|
||||
+ [instagram] Extract comments (#10788)
|
||||
+ [ketnet] Extract mzsource formats (#10770)
|
||||
* [limelight:media] Improve HTTP formats extraction
|
||||
|
||||
|
||||
version 2016.09.27
|
||||
|
||||
Core
|
||||
+ Add hdcore query parameter to akamai f4m formats
|
||||
+ Delegate HLS live streams downloading to ffmpeg
|
||||
+ Improved support for HTML5 subtitles
|
||||
|
||||
Extractors
|
||||
+ [vk] Add support for dailymotion embeds (#10661)
|
||||
* [promptfile] Fix extraction (#10634)
|
||||
* [kaltura] Speed up embed regular expressions (#10764)
|
||||
+ [npo] Add support for anderetijden.nl (#10754)
|
||||
+ [prosiebensat1] Add support for advopedia sites
|
||||
* [mwave] Relax URL regular expression (#10735, #10748)
|
||||
* [prosiebensat1] Fix playlist support (#10745)
|
||||
+ [prosiebensat1] Add support for sat1gold sites (#10745)
|
||||
+ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
|
||||
+ [brightcove:new] Add support for live streams
|
||||
* [soundcloud] Generalize playlist entries extraction (#10733)
|
||||
+ [mtv] Add support for new URL schema (#8169, #9808)
|
||||
* [einthusan] Fix extraction (#10714)
|
||||
+ [twitter] Support Periscope embeds (#10737)
|
||||
+ [openload] Support subtitles (#10625)
|
||||
|
||||
|
||||
version 2016.09.24
|
||||
|
||||
Core
|
||||
+ Add support for watchTVeverywhere.com authentication provider based MSOs for
|
||||
Adobe Pass authentication (#10709)
|
||||
|
||||
Extractors
|
||||
+ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
|
||||
+ [prosiebensat1] Add support for kabeleinsdoku (#10732)
|
||||
* [cbs] Extract info from thunder videoPlayerService (#10728)
|
||||
* [openload] Fix extraction (#10408)
|
||||
+ [ustream] Support the new HLS streams (#10698)
|
||||
+ [ooyala] Extract all HLS formats
|
||||
+ [cartoonnetwork] Add support for Adobe Pass authentication
|
||||
+ [soundcloud] Extract license metadata
|
||||
+ [fox] Add support for Adobe Pass authentication (#8584)
|
||||
+ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
|
||||
+ [trutv] Add support for Adobe Pass authentication (#10519)
|
||||
+ [turner] Add support for Adobe Pass authentication
|
||||
|
||||
|
||||
version 2016.09.19
|
||||
|
||||
Extractors
|
||||
+ [crunchyroll] Check if already authenticated (#10700)
|
||||
- [twitch:stream] Remove fallback to profile extraction when stream is offline
|
||||
* [thisav] Improve title extraction (#10682)
|
||||
* [vyborymos] Improve station info extraction
|
||||
|
||||
|
||||
version 2016.09.18
|
||||
|
||||
Core
|
||||
+ Introduce manifest_url and fragments fields in formats dictionary for
|
||||
fragmented media
|
||||
+ Provide manifest_url field for DASH segments, HLS and HDS
|
||||
+ Provide fragments field for DASH segments
|
||||
* Rework DASH segments downloader to use fragments field
|
||||
+ Add helper method for Wowza Streaming Engine formats extraction
|
||||
|
||||
Extractors
|
||||
+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
|
||||
+ [xfileshare] Add title regular expression for streamin.to (#10646)
|
||||
+ [globo:article] Add support for multiple videos (#10653)
|
||||
+ [thisav] Recognize HTML5 videos (#10447)
|
||||
* [jwplatform] Improve JWPlayer detection
|
||||
+ [mangomolo] Add support for Mangomolo embeds
|
||||
+ [toutv] Add support for authentication (#10669)
|
||||
* [franceinter] Fix upload date extraction
|
||||
* [tv4] Fix HLS and HDS formats extraction (#10659)
|
||||
|
||||
|
||||
version 2016.09.15
|
||||
|
||||
Core
|
||||
|
6
Makefile
6
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
|
||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
@@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish
|
||||
|
||||
lazy-extractors: youtube_dl/extractor/lazy_extractors.py
|
||||
|
||||
_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
|
||||
_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
|
@@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
#
|
||||
# youtube-dl documentation build configuration file, created by
|
||||
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
|
||||
|
@@ -34,12 +34,12 @@
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **anderetijden**: npo.nl and ntr.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@@ -111,6 +111,7 @@
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **BYUtvEvent**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
@@ -127,8 +128,8 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCTV**
|
||||
- **CDA**
|
||||
@@ -288,6 +289,7 @@
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HBO**
|
||||
- **HBOEpisode**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
- **HellPorno**
|
||||
@@ -363,6 +365,7 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
@@ -388,6 +391,8 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
@@ -422,8 +427,9 @@
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
- **MTV**
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -503,6 +509,7 @@
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **NZZ**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
@@ -688,6 +695,7 @@
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **t-online.de**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
@@ -717,8 +725,10 @@
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@@ -849,6 +859,7 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VyboryMos**
|
||||
- **Walla**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
@@ -862,7 +873,7 @@
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WNL**
|
||||
- **wnl**: npo.nl and ntr.nl
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
|
2
setup.py
2
setup.py
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
@@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
@@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
|
@@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
@@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -266,8 +266,6 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
@@ -276,6 +274,10 @@ def _real_main(argv=None):
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# XAttrMetadataPP should be run after post-processors that may change file
|
||||
# contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
@@ -283,12 +285,6 @@ def _real_main(argv=None):
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
try:
|
||||
import xattr
|
||||
xattr # Confuse flake8
|
||||
except ImportError:
|
||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||
|
@@ -346,7 +346,6 @@ class FileDownloader(object):
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
print(min_sleep_interval, max_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
@@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
base_url = info_dict['url']
|
||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
||||
initialization_url = info_dict.get('initialization_url')
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def process_segment(segment, tmp_filename, fatal):
|
||||
target_url, segment_name = segment
|
||||
def process_segment(segment, tmp_filename, num):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % num
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = num == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
@@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD):
|
||||
return False
|
||||
return True
|
||||
|
||||
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
|
||||
segments_to_download.extend([
|
||||
(segment_url, 'Seg%d' % i)
|
||||
for i, segment_url in enumerate(segment_urls)])
|
||||
|
||||
for i, segment in enumerate(segments_to_download):
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
if not process_segment(segment, ctx['tmpfilename'], fatal):
|
||||
for i, segment in enumerate(segments):
|
||||
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
@@ -31,7 +31,7 @@ class HlsFD(FragmentFD):
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest):
|
||||
def can_download(manifest, info_dict):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
@@ -53,6 +53,7 @@ class HlsFD(FragmentFD):
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@@ -62,7 +63,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
|
||||
if not self.can_download(s):
|
||||
if not self.can_download(s, info_dict):
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
|
@@ -13,6 +13,9 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
)
|
||||
|
||||
|
||||
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
import xattr
|
||||
xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
|
||||
except(OSError, IOError, ImportError) as err:
|
||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
|
@@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'FA1505V024S00',
|
||||
'id': 'ZX9735A001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 27 Ep 24',
|
||||
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||
'upload_date': '20160820',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
'title': 'Diaries Of A Broken Mind',
|
||||
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
|
||||
'upload_date': '20161010',
|
||||
'uploader_id': 'abc2',
|
||||
'timestamp': 1476064920,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
@@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params['title']
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||
@@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||
'episode': self._html_search_meta('episode_title', webpage),
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,64 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'info_dict': {
|
||||
'id': '36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'timestamp': 1394142732,
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
|
||||
player_config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||
internal_meta_id = player_config['aptomaVideoId']
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = format_url % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json.get('imageUrl'),
|
||||
'description': internal_meta_json.get('shortPreamble'),
|
||||
'timestamp': int_or_none(internal_meta_json.get('timePublished')),
|
||||
'duration': int_or_none(internal_meta_json.get('duration')),
|
||||
'view_count': int_or_none(internal_meta_json.get('views')),
|
||||
}
|
@@ -1,29 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
xpath_element,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
'md5': '0c9fcf59a841f65635fa300ac43d8269',
|
||||
'info_dict': {
|
||||
'id': '19546517',
|
||||
'display_id': '18635087',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
|
||||
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
|
||||
'info_dict': {
|
||||
'id': '19540403',
|
||||
'display_id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||
'md5': '101250fb127ef9ca3d73186ff22a47ce',
|
||||
'info_dict': {
|
||||
'id': '19544709',
|
||||
'display_id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
'only_matching': True,
|
||||
'md5': '3566c0668c0235e2d224fd8edb389f67',
|
||||
'info_dict': {
|
||||
'id': '19550147',
|
||||
'ext': 'mp4',
|
||||
'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
|
||||
'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
typ = mobj.group('typ')
|
||||
display_id = mobj.group('id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||
if player:
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
else:
|
||||
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||
video_id = compat_str(model_data['id'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||
formats = []
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
for k, v in video.items():
|
||||
if re.match(r'.+_path', k):
|
||||
format_id = k.split('_')[0]
|
||||
model = self._html_search_regex(
|
||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||
if model:
|
||||
model_data = self._parse_json(model, display_id)
|
||||
|
||||
for video_url in model_data['sources'].values():
|
||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': v,
|
||||
'url': video_url,
|
||||
})
|
||||
|
||||
title = model_data['title']
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
||||
format_id = key[:-len('Path')]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': value,
|
||||
})
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title'
|
||||
).strip(), ' - AlloCiné')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['videoTitle'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
|
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
@@ -85,6 +66,7 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
'uploader_id': '71',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
@@ -99,16 +81,18 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloVideo',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -138,16 +122,18 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), channel_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloLive',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'url': text_track['src'],
|
||||
})
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
if duration and duration < 0:
|
||||
is_live = True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'display_id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1486.486,
|
||||
},
|
||||
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
|
||||
ep = self._parse_json(
|
||||
episode_code, display_id, transform_source=lambda s:
|
||||
re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
|
||||
|
||||
if ep['providerType'] != 'Ooyala':
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class BYUtvEventIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'info_dict': {
|
||||
'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toledo vs. BYU (9/30/16)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
episode_json = re.sub(
|
||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
||||
ep = json.loads(episode_json)
|
||||
|
||||
if ep['providerType'] == 'Ooyala':
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
ooyala_id = self._search_regex(
|
||||
r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
|
||||
'title').strip()
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ooyala_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -33,4 +33,10 @@ class CartoonNetworkIE(TurnerBaseIE):
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
})
|
||||
|
@@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
ExtractorError,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, guid):
|
||||
path = 'dJ5BDC/media/guid/2198311517/' + guid
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
|
||||
for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
try:
|
||||
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
|
||||
formats.extend(tp_formats)
|
||||
except ExtractorError:
|
||||
def _extract_video_info(self, content_id):
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': asset_type,
|
||||
}
|
||||
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
metadata = self._download_theplatform_metadata(path, guid)
|
||||
info = self._parse_theplatform_metadata(metadata)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
info.update({
|
||||
'id': guid,
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': metadata.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(metadata.get('cbs$SeasonNumber')),
|
||||
'episode': metadata.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@@ -68,15 +69,16 @@ class CBSNewsIE(CBSIE):
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_NAME = 'cbsnews:livevideo'
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
||||
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
@@ -84,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._download_json(
|
||||
'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
|
||||
'device': 'desktop',
|
||||
'dvr_slug': display_id,
|
||||
})
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
self._sort_formats(f4m_formats)
|
||||
formats = self._extract_akamai_formats(video_info['url'], display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,9 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
config_json = self._search_regex(
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||
'configuration')
|
||||
config = json.loads(config_json)
|
||||
'configuration'), video_id)
|
||||
|
||||
video_info = config['videoInfo']
|
||||
sources = config['sources']
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -21,6 +21,7 @@ from ..compat import (
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -87,6 +88,9 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media (DASH, hls, hds)
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@@ -115,6 +119,11 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field, regardless of all other values.
|
||||
@@ -1142,6 +1151,7 @@ class InfoExtractor(object):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'ext': 'flv' if bootstrap_info is not None else None,
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
@@ -1247,9 +1257,11 @@ class InfoExtractor(object):
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
manifest_url = format_url(line.strip())
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
@@ -1521,9 +1533,10 @@ class InfoExtractor(object):
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@@ -1544,42 +1557,52 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
|
||||
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||
# common attributes and elements. We will only extract relevant
|
||||
# for us.
|
||||
def extract_common(source):
|
||||
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
start_number = source.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
timescale = source.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = source.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
|
||||
def extract_Initialization(source):
|
||||
initialization = source.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
extract_common(segment_list)
|
||||
extract_Initialization(segment_list)
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
@@ -1587,11 +1610,14 @@ class InfoExtractor(object):
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
@@ -1634,6 +1660,7 @@ class InfoExtractor(object):
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
@@ -1648,9 +1675,7 @@ class InfoExtractor(object):
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
@@ -1659,46 +1684,79 @@ class InfoExtractor(object):
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
})
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
for fragment in f['fragments']:
|
||||
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
@@ -1744,7 +1802,11 @@ class InfoExtractor(object):
|
||||
return is_plain_url, formats
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
@@ -1771,7 +1833,7 @@ class InfoExtractor(object):
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
if not kind or kind in ('subtitles', 'captions'):
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
@@ -1779,22 +1841,70 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id):
|
||||
formats = []
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(f4m_url, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if 'hdcore=' not in f4m_url:
|
||||
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False)
|
||||
for entry in f4m_formats:
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.extend(f4m_formats)
|
||||
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return formats
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
||||
http_base_url = 'http' + url_base
|
||||
formats = []
|
||||
if 'm3u8' not in skip_protocols:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
http_base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
if 'f4m' not in skip_protocols:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
http_base_url + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if re.search(r'(?:/smil:|\.smil)', url_base):
|
||||
if 'dash' not in skip_protocols:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
http_base_url + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if 'smil' not in skip_protocols:
|
||||
rtmp_formats = self._extract_smil_formats(
|
||||
http_base_url + '/jwplayer.smil',
|
||||
video_id, fatal=False)
|
||||
for rtmp_format in rtmp_formats:
|
||||
rtsp_format = rtmp_format.copy()
|
||||
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||
del rtsp_format['play_path']
|
||||
del rtsp_format['ext']
|
||||
rtsp_format.update({
|
||||
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
|
||||
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||
'protocol': 'rtsp',
|
||||
})
|
||||
formats.extend([rtmp_format, rtsp_format])
|
||||
else:
|
||||
for protocol in ('rtmp', 'rtsp'):
|
||||
if protocol not in skip_protocols:
|
||||
formats.append({
|
||||
'url': protocol + url_base,
|
||||
'format_id': protocol,
|
||||
'protocol': protocol,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -1915,6 +2025,12 @@ class InfoExtractor(object):
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
||||
def _generic_id(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
|
||||
def _generic_title(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -1,13 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class RtmpIE(InfoExtractor):
|
||||
@@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
|
||||
'format_id': compat_urlparse.urlparse(url).scheme,
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class MmsIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'(?i)mms://.+'
|
||||
|
||||
_TEST = {
|
||||
# Direct MMS link
|
||||
'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
|
||||
'info_dict': {
|
||||
'id': 'MilesReid(0709)',
|
||||
'ext': 'wmv',
|
||||
'title': 'MilesReid(0709)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # rtsp downloads, requiring mplayer or mpv
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
}
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Samouraï',
|
||||
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(
|
||||
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -46,6 +46,13 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return '<title>Redirecting' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
return
|
||||
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
@@ -69,7 +76,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if '<title>Redirecting' in response:
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
|
@@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||
'uploader': 'HotWaves1012',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
@@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
player_v5 = self._search_regex(
|
||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
r'buildPlayer\(({.+?})\);'],
|
||||
r'buildPlayer\(({.+?})\);',
|
||||
r'var\s+config\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -1,61 +1,54 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'md5': '174dd4a8a6225cf5655952f969cfbe24',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
'ext': 'mp4',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
object_id = self._html_search_meta('DC.identifier', webpage)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
server = servers_json[0]['server']
|
||||
m3u8_path = self._search_regex(
|
||||
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
'formats': formats,
|
||||
'display_id': video_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
@@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
@@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
||||
|
||||
video_url = self._download_webpage(
|
||||
m3u8_url = self._download_webpage(
|
||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
||||
% video_id, video_id)
|
||||
% video_id, video_id, headers={'Referer': url})
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
@@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -31,7 +31,6 @@ from .aenetworks import (
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@@ -117,7 +116,10 @@ from .brightcove import (
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .byutv import (
|
||||
BYUtvIE,
|
||||
BYUtvEventIE,
|
||||
)
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
@@ -184,7 +186,10 @@ from .comedycentral import (
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
from .commonprotocols import (
|
||||
MmsIE,
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@@ -343,7 +348,10 @@ from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hbo import (
|
||||
HBOIE,
|
||||
HBOEpisodeIE,
|
||||
)
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
@@ -435,6 +443,7 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
@@ -472,6 +481,10 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meta import METAIE
|
||||
@@ -512,6 +525,7 @@ from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
@@ -607,13 +621,14 @@ from .nowtv import (
|
||||
)
|
||||
from .noz import NozIE
|
||||
from .npo import (
|
||||
AndereTijdenIE,
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
WNLIE,
|
||||
)
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
@@ -629,6 +644,7 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
@@ -882,8 +898,10 @@ from .theplatform import (
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
@@ -898,6 +916,7 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -1065,6 +1084,7 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
|
@@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -2,25 +2,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .streamable import StreamableIE
|
||||
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
|
||||
'info_dict': {
|
||||
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||
'id': '79922154',
|
||||
'title': 'VIDEO Hull City 0 - 2 Chelsea',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Video for this match is not available',
|
||||
'playlist_count': 2,
|
||||
'add_ie': [StreamableIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
|
||||
'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
|
||||
'info_dict': {
|
||||
'id': 'georgia-0-2-germany-2015-03',
|
||||
'title': 'Georgia 0 – 2 Germany',
|
||||
'id': '75817984',
|
||||
'title': 'VIDEO Georgia 0 - 2 Germany',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'add_ie': ['Playwire']
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||
playlist = self._parse_json(self._search_regex(
|
||||
r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
|
||||
playlist_id)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
@@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor):
|
||||
payload = video.get('payload')
|
||||
if not payload:
|
||||
continue
|
||||
playwire_url = self._search_regex(
|
||||
playwire_url = self._html_search_regex(
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(self._proto_relative_url(
|
||||
playwire_url, 'http:'), 'Playwire'))
|
||||
|
||||
streamable_url = StreamableIE._extract_url(payload)
|
||||
if streamable_url:
|
||||
entries.append(self.url_result(
|
||||
streamable_url, StreamableIE.ie_key()))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
@@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor):
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
|
@@ -1,14 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
class FOXIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
@@ -30,14 +30,26 @@ class FOXIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url']
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), video_id)
|
||||
fox_pdk_player = settings['fox_pdk_player']
|
||||
release_url = fox_pdk_player['release_url']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'switch': 'http'
|
||||
}
|
||||
if fox_pdk_player.get('access') == 'locked':
|
||||
ap_p = settings['foxAdobePassProvider']
|
||||
rating = ap_p.get('videoRating')
|
||||
if rating == 'n/a':
|
||||
rating = None
|
||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import month_by_name
|
||||
|
||||
|
||||
@@ -10,14 +9,14 @@ class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
||||
'info_dict': {
|
||||
'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013',
|
||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter',
|
||||
'description': 'md5:7f2ce449894d1e585932273080fb410d',
|
||||
'upload_date': '20131218',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -39,7 +38,8 @@ class FranceInterIE(InfoExtractor):
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
upload_date_list.reverse()
|
||||
upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr'))
|
||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
||||
upload_date = ''.join(upload_date_list)
|
||||
else:
|
||||
upload_date = None
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -27,7 +27,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import (
|
||||
@@ -1549,7 +1548,7 @@ class GenericIE(InfoExtractor):
|
||||
force_videoid = smuggled_data['force_videoid']
|
||||
video_id = force_videoid
|
||||
else:
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
self.to_screen('%s: Requesting header' % video_id)
|
||||
|
||||
@@ -1578,7 +1577,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'title': self._generic_title(url),
|
||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||
}
|
||||
|
||||
@@ -1657,7 +1656,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||
doc, video_id,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
@@ -1752,9 +1753,9 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
||||
if vimeo_urls:
|
||||
return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
@@ -2254,6 +2255,35 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
||||
|
||||
# Look for Mangomolo embeds
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
||||
(?:
|
||||
video\?.*?\bid=(?P<video_id>\d+)|
|
||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
).+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
video_id = mobj.group('video_id')
|
||||
if video_id:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloVideo',
|
||||
'id': video_id,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloLive',
|
||||
'id': mobj.group('channel_id'),
|
||||
})
|
||||
return info
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
if instagram_embed_url is not None:
|
||||
@@ -2301,12 +2331,23 @@ class GenericIE(InfoExtractor):
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
for entry in entries:
|
||||
entry.update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
})
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
|
||||
def filter_video(urls):
|
||||
return list(filter(check_video, urls))
|
||||
@@ -2356,9 +2397,6 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||
if not found:
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,6 +15,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
@@ -63,6 +65,9 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5(object):
|
||||
@@ -396,7 +401,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
@@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
}
|
||||
'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
|
||||
'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
|
||||
'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
|
||||
'info_dict': {
|
||||
'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
|
||||
'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
|
||||
'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
|
||||
'only_matching': True,
|
||||
@@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
return self.url_result('globo:%s' % video_id, 'Globo')
|
||||
video_ids = []
|
||||
for video_regex in self._VIDEOID_REGEXES:
|
||||
video_ids.extend(re.findall(video_regex, webpage))
|
||||
entries = [
|
||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||
for video_id in orderedSet(video_ids)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -12,17 +12,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class HBOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 64 Clip: Encryption',
|
||||
}
|
||||
}
|
||||
class HBOBaseIE(InfoExtractor):
|
||||
_FORMATS_INFO = {
|
||||
'1920': {
|
||||
'width': 1280,
|
||||
@@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_from_id(self, video_id):
|
||||
video_data = self._download_xml(
|
||||
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
|
||||
title = xpath_text(video_data, 'title', 'title', True)
|
||||
@@ -116,7 +105,60 @@ class HBOIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
|
||||
'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class HBOIE(HBOBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 64 Clip: Encryption',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 1072,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_id(video_id)
|
||||
|
||||
|
||||
class HBOEpisodeIE(HBOBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
|
||||
'md5': '689132b253cc0ab7434237fc3a293210',
|
||||
'info_dict': {
|
||||
'id': '1439518',
|
||||
'display_id': 'ep-52-inside-the-episode',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 52: Inside the Episode',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 240,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
|
||||
webpage, 'video ID', group='video_id')
|
||||
|
||||
info_dict = self._extract_from_id(video_id)
|
||||
info_dict['display_id'] = display_id
|
||||
|
||||
return info_dict
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
}, {
|
||||
# missing description
|
||||
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Britney Spears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count) = [None] * 8
|
||||
uploader_id, like_count, comment_count, height, width) = [None] * 10
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
|
||||
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('date'))
|
||||
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
if not uploader_id:
|
||||
uploader_id = self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
|
||||
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor):
|
||||
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
|
||||
extract_formats(src)
|
||||
|
||||
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
@@ -19,24 +20,32 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
# TODO: Merge this with JWPlayer-related codes in generic.py
|
||||
|
||||
mobj = re.search(
|
||||
'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id)
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
@@ -55,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
|
@@ -105,20 +105,20 @@ class KalturaIE(InfoExtractor):
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
||||
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
|
||||
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
|
||||
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
|
||||
""", webpage) or
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
|
||||
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage))
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
|
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# mzsource, geo restricted to Belgium
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
config['source']['hls'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
formats = []
|
||||
for source_key in ('', 'mz'):
|
||||
source = config.get('%ssource' % source_key)
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
for format_id, format_url in source.items():
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif format_id == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
@@ -29,7 +29,7 @@ from ..utils import (
|
||||
|
||||
class LeIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
|
||||
|
||||
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
||||
|
||||
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://sports.le.com/video/25737697.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.lesports.com/match/1023203003.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sports.le.com/match/1023203003.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||
|
128
youtube_dl/extractor/lego.py
Normal file
128
youtube_dl/extractor/lego.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
parse_duration,
|
||||
get_element_by_class,
|
||||
)
|
||||
|
||||
|
||||
class LEGOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
|
||||
'md5': 'f34468f176cfd76488767fc162c405fa',
|
||||
'info_dict': {
|
||||
'id': '55492d823b1b4d5e985787fa8c2973b1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
|
||||
'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
|
||||
},
|
||||
}, {
|
||||
# geo-restricted but the contentUrl contain a valid url
|
||||
'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
|
||||
'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
|
||||
'info_dict': {
|
||||
'id': '13bdc2299ab24d9685701a915b3d71e7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aflevering 20 - Helden van het koninkrijk',
|
||||
'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
|
||||
},
|
||||
}, {
|
||||
# special characters in title
|
||||
'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
|
||||
'info_dict': {
|
||||
'id': '9685ee9d12e84ff38e84b4e3d0db533d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
|
||||
'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_BITRATES = [256, 512, 1024, 1536, 2560]
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_class('video-header', webpage).strip()
|
||||
progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
|
||||
streaming_base = 'http://legoprod-f.akamaihd.net/'
|
||||
content_url = self._html_search_meta('contentUrl', webpage)
|
||||
path = self._search_regex(
|
||||
r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
|
||||
content_url, 'video path', default=None)
|
||||
if not path:
|
||||
player_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
|
||||
webpage, 'player url', default=None))
|
||||
if not player_url:
|
||||
base_url = self._proto_relative_url(self._search_regex(
|
||||
r'data-baseurl="([^"]+)"', webpage, 'base url',
|
||||
default='http://www.lego.com/%s/mediaplayer/video/' % locale))
|
||||
player_url = base_url + video_id
|
||||
player_webpage = self._download_webpage(player_url, video_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r"video='([^']+)'", player_webpage, 'video data')), video_id)
|
||||
progressive_base = self._search_regex(
|
||||
r'data-video-progressive-url="([^"]+)"',
|
||||
player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
|
||||
streaming_base = self._search_regex(
|
||||
r'data-video-streaming-url="([^"]+)"',
|
||||
player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
|
||||
item_id = video_data['ItemId']
|
||||
|
||||
net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
|
||||
base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
|
||||
path = '/'.join([net_storage_path, base_path])
|
||||
streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
if len(m3u8_formats) == len(self._BITRATES):
|
||||
self._sort_formats(m3u8_formats)
|
||||
for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
|
||||
progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
|
||||
mp4_f = m3u8_format.copy()
|
||||
mp4_f.update({
|
||||
'url': progressive_base_url + 'mp4',
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
web_f = {
|
||||
'url': progressive_base_url + 'webm',
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
|
||||
'width': m3u8_format['width'],
|
||||
'height': m3u8_format['height'],
|
||||
'tbr': m3u8_format.get('tbr'),
|
||||
'ext': 'webm',
|
||||
}
|
||||
formats.extend([web_f, mp4_f])
|
||||
else:
|
||||
for bitrate in self._BITRATES:
|
||||
for ext in ('web', 'mp4'):
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (ext, bitrate),
|
||||
'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
|
||||
'tbr': bitrate,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._html_search_meta('thumbnail', webpage),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
'formats': formats,
|
||||
}
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
|
||||
http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
|
||||
urls.append(http_url)
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
54
youtube_dl/extractor/mangomolo.py
Normal file
54
youtube_dl/extractor/mangomolo.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
def _get_real_id(self, page_id):
|
||||
return page_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._get_real_id(self._match_id(url))
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
|
||||
|
||||
format_url = self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats = self._extract_wowza_formats(
|
||||
format_url, page_id, m3u8_entry_protocol, ['smil'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
'title': self._live_title(page_id) if self._IS_LIVE else page_id,
|
||||
'uploader_id': hidden_inputs.get('userid'),
|
||||
'duration': int_or_none(hidden_inputs.get('duration')),
|
||||
'is_live': self._IS_LIVE,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MangomoloVideoIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:video'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
|
||||
_IS_LIVE = False
|
||||
|
||||
|
||||
class MangomoloLiveIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:live'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -270,6 +270,29 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://www.mtv.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
|
||||
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
|
||||
'info_dict': {
|
||||
'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
|
||||
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
|
||||
'timestamp': 1468846800,
|
||||
'upload_date': '20160718',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MTVVideoIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv:video'
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
|
||||
m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -9,9 +9,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class MwaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
@@ -23,7 +23,10 @@ class MwaveIE(InfoExtractor):
|
||||
'duration': 206,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -60,8 +63,8 @@ class MwaveIE(InfoExtractor):
|
||||
|
||||
|
||||
class MwaveMeetGreetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://mwave.interest.me/meetgreet/view/256',
|
||||
'info_dict': {
|
||||
'id': '173294',
|
||||
@@ -72,7 +75,10 @@ class MwaveMeetGreetIE(InfoExtractor):
|
||||
'duration': 3634,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://mwave.interest.me/en/meetgreet/view/256',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
||||
|
||||
class AppleDailyIE(NextMediaIE):
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
@@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE):
|
||||
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
|
||||
'upload_date': '20140417',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
|
@@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
|
||||
|
||||
class NHLIE(InfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
|
||||
_SITES_MAP = {
|
||||
'nhl': 'nhl',
|
||||
'wch2016': 'wch',
|
||||
}
|
||||
_TESTS = [{
|
||||
# type=video
|
||||
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
||||
@@ -270,13 +274,32 @@ class NHLIE(InfoExtractor):
|
||||
'upload_date': '20160204',
|
||||
'timestamp': 1454544904,
|
||||
},
|
||||
}, {
|
||||
# Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
|
||||
'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
|
||||
'md5': '50b2bb47f405121484dda3ccbea25459',
|
||||
'info_dict': {
|
||||
'id': '44315003',
|
||||
'ext': 'mp4',
|
||||
'title': 'Poile, Laviolette on Subban trade',
|
||||
'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
|
||||
'timestamp': 1467242866,
|
||||
'upload_date': '20160629',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tmp_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
tmp_id, site = mobj.group('id'), mobj.group('site')
|
||||
video_data = self._download_json(
|
||||
'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
|
||||
tmp_id)
|
||||
'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
|
||||
% (self._SITES_MAP[site], tmp_id), tmp_id)
|
||||
if video_data.get('type') == 'article':
|
||||
video_data = video_data['media']
|
||||
|
||||
@@ -290,9 +313,11 @@ class NHLIE(InfoExtractor):
|
||||
continue
|
||||
ext = determine_ext(playback_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
playback_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=playback.get('name', 'hls'), fatal=False))
|
||||
m3u8_id=playback.get('name', 'hls'), fatal=False)
|
||||
self._check_formats(m3u8_formats, video_id)
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
height = int_or_none(playback.get('height'))
|
||||
formats.append({
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import (
|
||||
|
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
qualities,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -180,9 +183,16 @@ class NPOIE(NPOBaseIE):
|
||||
continue
|
||||
streams = format_info.get('streams')
|
||||
if streams:
|
||||
video_info = self._download_json(
|
||||
streams[0] + '&type=json',
|
||||
video_id, 'Downloading %s stream JSON' % format_id)
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
streams[0] + '&type=json',
|
||||
video_id, 'Downloading %s stream JSON' % format_id)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
else:
|
||||
video_info = format_info
|
||||
video_url = video_info.get('url')
|
||||
@@ -438,9 +448,30 @@ class SchoolTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
class NPOPlaylistBaseIE(NPOIE):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
||||
for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
|
||||
]
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
||||
|
||||
class VPROIE(NPOPlaylistBaseIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
|
||||
r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
|
||||
_PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -453,12 +484,13 @@ class VPROIE(NPOIE):
|
||||
'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
|
||||
'upload_date': '20130225',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
|
||||
'info_dict': {
|
||||
'id': 'sergio-herman',
|
||||
'title': 'Sergio Herman: Fucking perfect',
|
||||
'title': 'sergio herman: fucking perfect',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
@@ -467,54 +499,61 @@ class VPROIE(NPOIE):
|
||||
'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
|
||||
'info_dict': {
|
||||
'id': 'education-education',
|
||||
'title': '2Doc',
|
||||
'title': 'education education',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
|
||||
'info_dict': {
|
||||
'id': 'de-tegenprestatie',
|
||||
'title': 'De Tegenprestatie',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
|
||||
'info_dict': {
|
||||
'id': 'VARA_101375237',
|
||||
'ext': 'm4v',
|
||||
'title': 'MH17: Het verdriet van Nederland',
|
||||
'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
|
||||
'upload_date': '20150716',
|
||||
},
|
||||
'params': {
|
||||
# Skip because of m3u8 download
|
||||
'skip_download': True
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
||||
for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._search_regex(
|
||||
r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
|
||||
webpage, 'playlist title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
||||
|
||||
class WNLIE(InfoExtractor):
|
||||
class WNLIE(NPOPlaylistBaseIE):
|
||||
IE_NAME = 'wnl'
|
||||
_VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
|
||||
_PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
|
||||
_PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
|
||||
'info_dict': {
|
||||
'id': 'vandaag-de-dag-6-mei',
|
||||
'title': 'Vandaag de Dag 6 mei',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
class AndereTijdenIE(NPOPlaylistBaseIE):
|
||||
IE_NAME = 'anderetijden'
|
||||
_VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
|
||||
_PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
|
||||
|
||||
entries = [
|
||||
self.url_result('npo:%s' % video_id, 'NPO')
|
||||
for video_id, part in re.findall(
|
||||
r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
|
||||
webpage, 'playlist title')
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
_TESTS = [{
|
||||
'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
|
||||
'info_dict': {
|
||||
'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
|
||||
'title': 'Duitse soldaten over de Slag bij Arnhem',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
36
youtube_dl/extractor/nzz.py
Normal file
36
youtube_dl/extractor/nzz.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class NZZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
||||
'info_dict': {
|
||||
'id': '9153',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = []
|
||||
for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
|
||||
player_params = extract_attributes(player_element)
|
||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||
self.report_warning('Unsupported player type')
|
||||
continue
|
||||
entry_id = player_params['data-id']
|
||||
entries.append(self.url_result(
|
||||
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user