Compare commits
312 Commits
2015.06.04
...
2015.07.07
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4dd09c9add | ||
![]() |
267dc07e6b | ||
![]() |
d7b4d5dd50 | ||
![]() |
7f220b2fac | ||
![]() |
275c0423aa | ||
![]() |
d3ee4bbc5a | ||
![]() |
85a064861f | ||
![]() |
d0b436bff2 | ||
![]() |
92b2f18072 | ||
![]() |
dfc4eca21f | ||
![]() |
fc7ae675e2 | ||
![]() |
804ad79985 | ||
![]() |
da839880e9 | ||
![]() |
e9d33454b5 | ||
![]() |
d80891efc4 | ||
![]() |
59a83d3e5b | ||
![]() |
13af92fdc4 | ||
![]() |
0c20ee7d4b | ||
![]() |
89d42c2c75 | ||
![]() |
04611765a4 | ||
![]() |
9dfc4fa1a1 | ||
![]() |
43232d5c14 | ||
![]() |
f7c272d4fa | ||
![]() |
ede21449c8 | ||
![]() |
d7c9a3e976 | ||
![]() |
35eb649e9d | ||
![]() |
e56a4c9e9b | ||
![]() |
95506e37af | ||
![]() |
e41840c522 | ||
![]() |
2a46a27e6c | ||
![]() |
0bcdc27653 | ||
![]() |
ddf0f74de7 | ||
![]() |
91b21b2334 | ||
![]() |
66e568de3b | ||
![]() |
f5ca97e393 | ||
![]() |
8d06a62485 | ||
![]() |
93f9420993 | ||
![]() |
5b61070c70 | ||
![]() |
dbe1a93526 | ||
![]() |
86511ea417 | ||
![]() |
33f1f81b8b | ||
![]() |
9d0b581fea | ||
![]() |
c05724cb18 | ||
![]() |
f0714c9f86 | ||
![]() |
cf386750c9 | ||
![]() |
54f428f645 | ||
![]() |
dc2bd20e55 | ||
![]() |
c608ee491f | ||
![]() |
738b926322 | ||
![]() |
bea41c7f3f | ||
![]() |
1bbe660dfa | ||
![]() |
c4bd188da4 | ||
![]() |
5414623791 | ||
![]() |
c93d53f5e3 | ||
![]() |
507683780e | ||
![]() |
e8b9ee5e08 | ||
![]() |
d16154d163 | ||
![]() |
c342041fba | ||
![]() |
bf42a9906d | ||
![]() |
9603e8a7d9 | ||
![]() |
c7c040b825 | ||
![]() |
ac0474f89d | ||
![]() |
bb512e57dc | ||
![]() |
db652ea186 | ||
![]() |
5a9cc19972 | ||
![]() |
1a5fd4eebc | ||
![]() |
8a1b49ff19 | ||
![]() |
b971abe897 | ||
![]() |
43b925ce74 | ||
![]() |
62b742ece3 | ||
![]() |
d16ef949ca | ||
![]() |
23e7cba87f | ||
![]() |
a8e6f30d8e | ||
![]() |
9c49410898 | ||
![]() |
802d74aa6b | ||
![]() |
71f9e49e67 | ||
![]() |
82ea1051b5 | ||
![]() |
6c4d20cd6f | ||
![]() |
04c27802c0 | ||
![]() |
c3b7202f4f | ||
![]() |
81103ef35d | ||
![]() |
0eb5c1c62a | ||
![]() |
a9de951744 | ||
![]() |
a42a1bb09d | ||
![]() |
9fbfc9bd4d | ||
![]() |
242a998bdc | ||
![]() |
9d1bf70234 | ||
![]() |
b8c1cc1a51 | ||
![]() |
eedd20ef96 | ||
![]() |
7c197ad96d | ||
![]() |
654fd03c73 | ||
![]() |
cee16e0fa3 | ||
![]() |
73c471e9ef | ||
![]() |
533b99fbf9 | ||
![]() |
f39eb98bab | ||
![]() |
da77d856a1 | ||
![]() |
b2575b38e7 | ||
![]() |
0a3cf9ad3d | ||
![]() |
00334d0de0 | ||
![]() |
226b886ca8 | ||
![]() |
bc93bdb5bb | ||
![]() |
af214c3a79 | ||
![]() |
4eb10f6621 | ||
![]() |
7d7d469025 | ||
![]() |
fd40bdc0be | ||
![]() |
7e0480ae0e | ||
![]() |
d80265ccd6 | ||
![]() |
1b5a1ae257 | ||
![]() |
d8d24a922a | ||
![]() |
03339b7b5b | ||
![]() |
2988835af5 | ||
![]() |
62cca96b72 | ||
![]() |
b4dea075a3 | ||
![]() |
533f67d3fa | ||
![]() |
906e2f0eac | ||
![]() |
b8091db6b9 | ||
![]() |
381c067755 | ||
![]() |
2182ab5187 | ||
![]() |
a1593a4a0e | ||
![]() |
12e9e8445d | ||
![]() |
8084be78c5 | ||
![]() |
1ac1c4c26e | ||
![]() |
d4f58034f7 | ||
![]() |
f843300fe5 | ||
![]() |
03b9c94437 | ||
![]() |
a219d175c6 | ||
![]() |
3c3b4176bd | ||
![]() |
022383139b | ||
![]() |
1a1251e877 | ||
![]() |
18b5e1e534 | ||
![]() |
2fece970b8 | ||
![]() |
e20d0c1e69 | ||
![]() |
255f5694aa | ||
![]() |
25701d5a2c | ||
![]() |
a5158f38a3 | ||
![]() |
c76799c555 | ||
![]() |
2bb5b6d0a1 | ||
![]() |
0bbba43ed0 | ||
![]() |
98ca102441 | ||
![]() |
3f3308cd75 | ||
![]() |
6f96e308d0 | ||
![]() |
756f574e4e | ||
![]() |
78294e6a9c | ||
![]() |
4e33577173 | ||
![]() |
607841af64 | ||
![]() |
396726244a | ||
![]() |
c5895d5dbd | ||
![]() |
b407e173e4 | ||
![]() |
6a745c2c0f | ||
![]() |
c9ac7fa909 | ||
![]() |
964afd0689 | ||
![]() |
16d6973f8a | ||
![]() |
edcd2d665b | ||
![]() |
385c3e5e91 | ||
![]() |
c8e337450b | ||
![]() |
10464af5d1 | ||
![]() |
cbcd1a5474 | ||
![]() |
c9bebed294 | ||
![]() |
360075e28a | ||
![]() |
accf79b107 | ||
![]() |
4d58b24c15 | ||
![]() |
c33a8639a7 | ||
![]() |
25fa8d66e6 | ||
![]() |
974a6146fe | ||
![]() |
0392ac98d2 | ||
![]() |
5e3915cbe3 | ||
![]() |
29b809de68 | ||
![]() |
0d0d5d3717 | ||
![]() |
a69e8bfdd9 | ||
![]() |
062a3fdf36 | ||
![]() |
028a33d7f2 | ||
![]() |
447053668f | ||
![]() |
f3aecb27a4 | ||
![]() |
7f0172b3e5 | ||
![]() |
79fe954d79 | ||
![]() |
0501bfa159 | ||
![]() |
a155b7e76c | ||
![]() |
5228b756af | ||
![]() |
04e7596680 | ||
![]() |
f1e66cb2eb | ||
![]() |
4fd35ee072 | ||
![]() |
ee69799262 | ||
![]() |
636a9637f4 | ||
![]() |
9383e66f94 | ||
![]() |
99e6833c85 | ||
![]() |
c203be3fb4 | ||
![]() |
02175a7986 | ||
![]() |
8117df4cd9 | ||
![]() |
7c7dd9dc7f | ||
![]() |
054932f403 | ||
![]() |
aed473ccf9 | ||
![]() |
8268e94cd4 | ||
![]() |
4af98ecdfb | ||
![]() |
4b9f9010b0 | ||
![]() |
2a0fcf6113 | ||
![]() |
67d95f177c | ||
![]() |
44773ad125 | ||
![]() |
5774ef35c4 | ||
![]() |
b95cfa9170 | ||
![]() |
afa1ded425 | ||
![]() |
00ac23e6e0 | ||
![]() |
7d0c934a3e | ||
![]() |
8f75761f24 | ||
![]() |
9fd24e3a22 | ||
![]() |
755a9d3d1a | ||
![]() |
ac499cb61c | ||
![]() |
180940e02d | ||
![]() |
976b03c56b | ||
![]() |
450d89ddc1 | ||
![]() |
463b2e5542 | ||
![]() |
70a2002399 | ||
![]() |
a617b10075 | ||
![]() |
0029071adb | ||
![]() |
ad49fe7c8f | ||
![]() |
49bc802f81 | ||
![]() |
af9cdee9cb | ||
![]() |
b4e1576aee | ||
![]() |
78e2b74bb9 | ||
![]() |
65d161c480 | ||
![]() |
9fcbd5db2a | ||
![]() |
4f3bf679f5 | ||
![]() |
0072afca8e | ||
![]() |
61aa5ba36e | ||
![]() |
9f4323252a | ||
![]() |
8b6c896c4b | ||
![]() |
185dbc4974 | ||
![]() |
3d535e0471 | ||
![]() |
d22dec74ff | ||
![]() |
9872d3110c | ||
![]() |
b859971873 | ||
![]() |
e5095f1198 | ||
![]() |
499a077761 | ||
![]() |
5da7177729 | ||
![]() |
3507766bd0 | ||
![]() |
f37bdbe537 | ||
![]() |
2da09ff8b0 | ||
![]() |
5ccddb7ecf | ||
![]() |
954c1d0529 | ||
![]() |
494f20cbdc | ||
![]() |
29902c8ec0 | ||
![]() |
9f15bdabc8 | ||
![]() |
fff3455f58 | ||
![]() |
87446dc618 | ||
![]() |
99ac0390f5 | ||
![]() |
ff0f0b9172 | ||
![]() |
97b570a94c | ||
![]() |
a9d56c6843 | ||
![]() |
f98470df69 | ||
![]() |
eb8be1fe76 | ||
![]() |
7ebd5376fe | ||
![]() |
70219b0f43 | ||
![]() |
bd5bc0cd5a | ||
![]() |
6e054aacca | ||
![]() |
9d581f3d52 | ||
![]() |
9bf99891d0 | ||
![]() |
d9cf48e81e | ||
![]() |
e1b9322b09 | ||
![]() |
627b964825 | ||
![]() |
a55e36f48d | ||
![]() |
01e21b89ee | ||
![]() |
788be3313d | ||
![]() |
e1ec93304d | ||
![]() |
edb99d4c18 | ||
![]() |
68477c3dab | ||
![]() |
65ba8b23f4 | ||
![]() |
621ed9f5f4 | ||
![]() |
b26733ba7f | ||
![]() |
9836cfb8d6 | ||
![]() |
665b6c1236 | ||
![]() |
9414338a48 | ||
![]() |
de390ea077 | ||
![]() |
717b0239fd | ||
![]() |
d00735a0c5 | ||
![]() |
c23d5ce926 | ||
![]() |
b5a3c7f109 | ||
![]() |
9c5f685ef1 | ||
![]() |
08bb8ef201 | ||
![]() |
865ab62f43 | ||
![]() |
9948113590 | ||
![]() |
c4ee87022b | ||
![]() |
ffba4edb06 | ||
![]() |
958d0b659b | ||
![]() |
aacda28b28 | ||
![]() |
29e7e0781b | ||
![]() |
7012620e2b | ||
![]() |
f1da861018 | ||
![]() |
05aa9c82d9 | ||
![]() |
a9e58ecd3f | ||
![]() |
223544552f | ||
![]() |
3d8e9573a4 | ||
![]() |
54eb81a087 | ||
![]() |
c33c547d66 | ||
![]() |
dfe7dd9bdb | ||
![]() |
63ccf6474d | ||
![]() |
e8ac61e840 | ||
![]() |
f00a650705 | ||
![]() |
4bde5ce992 | ||
![]() |
d31573fa37 | ||
![]() |
8b8cde2140 | ||
![]() |
55e5841f14 | ||
![]() |
9bac8c57e3 | ||
![]() |
670861bd20 | ||
![]() |
08f7db20c1 | ||
![]() |
605ec701b7 | ||
![]() |
1498940b10 | ||
![]() |
ca45246627 | ||
![]() |
f9355dc989 | ||
![]() |
680f9744c4 | ||
![]() |
2c935c0c72 | ||
![]() |
7198063d96 | ||
![]() |
137597b0ea | ||
![]() |
f670ef1c8e |
3
AUTHORS
3
AUTHORS
@@ -127,3 +127,6 @@ Julian Richen
|
||||
Ping O.
|
||||
Mister Hat
|
||||
Peter Ding
|
||||
jackyzy823
|
||||
George Brighton
|
||||
Remita Amine
|
||||
|
13
README.md
13
README.md
@@ -52,8 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors and the URLs they would handle
|
||||
--list-extractors List all supported extractors
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--force-generic-extractor Force extraction to use the generic extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||
@@ -107,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -189,8 +190,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
||||
-F, --list-formats List all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and related data on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
|
||||
merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
@@ -223,7 +224,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
|
||||
%(title)s" matches a title like "Coldplay - Paradise"
|
||||
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default;
|
||||
fix file if we can, warn otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
|
||||
@@ -379,7 +380,7 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
||||
|
||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||
|
||||
### SyntaxError: Non-ASCII character ###
|
||||
|
||||
|
@@ -17,6 +17,7 @@
|
||||
- **AcademicEarth:Course**
|
||||
- **AddAnime**
|
||||
- **AdobeTV**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **Aftenposten**
|
||||
- **Aftonbladet**
|
||||
@@ -110,6 +111,7 @@
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
- **dailymotion:user**
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DctpTv**
|
||||
@@ -120,6 +122,8 @@
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **dramafever**
|
||||
- **dramafever:series**
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
@@ -153,6 +157,7 @@
|
||||
- **fernsehkritik.tv**
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
@@ -217,6 +222,7 @@
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **Izlesene**
|
||||
@@ -277,6 +283,7 @@
|
||||
- **Motherless**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **movshare**: MovShare
|
||||
- **MPORA**
|
||||
@@ -340,6 +347,7 @@
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **OpenFilm**
|
||||
@@ -353,6 +361,7 @@
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
@@ -375,6 +384,7 @@
|
||||
- **Pyvideo**
|
||||
- **qqmusic**
|
||||
- **qqmusic:album**
|
||||
- **qqmusic:playlist**
|
||||
- **qqmusic:singer**
|
||||
- **qqmusic:toplist**
|
||||
- **QuickVid**
|
||||
@@ -407,6 +417,7 @@
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Ruutu**
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
@@ -431,6 +442,8 @@
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **SnagFilms**
|
||||
- **SnagFilmsEmbed**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **soompi**
|
||||
@@ -493,6 +506,7 @@
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **TheSixtyOne**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
@@ -519,6 +533,8 @@
|
||||
- **TV2**
|
||||
- **TV2Article**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
@@ -531,6 +547,7 @@
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **TwitterCard**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
@@ -605,6 +622,7 @@
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
@@ -621,7 +639,7 @@
|
||||
- **YesJapan**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **Youku**
|
||||
- **youku**
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
|
@@ -119,7 +119,7 @@ class YoutubeDL(object):
|
||||
|
||||
username: Username for authentication purposes.
|
||||
password: Password for authentication purposes.
|
||||
videopassword: Password for acces a video.
|
||||
videopassword: Password for accessing a video.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
@@ -139,6 +139,7 @@ class YoutubeDL(object):
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
nooverwrites: Prevent overwriting files.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
@@ -626,13 +627,16 @@ class YoutubeDL(object):
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True):
|
||||
process=True, force_generic_extractor=False):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
|
||||
if not ie_key and force_generic_extractor:
|
||||
ie_key = 'Generic'
|
||||
|
||||
if ie_key:
|
||||
ies = [self.get_info_extractor(ie_key)]
|
||||
else:
|
||||
@@ -1004,7 +1008,7 @@ class YoutubeDL(object):
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
if 'width' in t and 'height' in t:
|
||||
if t.get('width') and t.get('height'):
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
if t.get('id') is None:
|
||||
t['id'] = '%d' % i
|
||||
@@ -1016,13 +1020,13 @@ class YoutubeDL(object):
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
# Working around negative timestamps in Windows
|
||||
# (see http://bugs.python.org/issue1646728)
|
||||
if info_dict['timestamp'] < 0 and os.name == 'nt':
|
||||
info_dict['timestamp'] = 0
|
||||
upload_date = datetime.datetime.utcfromtimestamp(
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
if 'automatic_captions' in info_dict:
|
||||
@@ -1033,12 +1037,6 @@ class YoutubeDL(object):
|
||||
info_dict['id'], info_dict.get('subtitles'),
|
||||
info_dict.get('automatic_captions'))
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
self.process_info(info_dict)
|
||||
return info_dict
|
||||
|
||||
# We now pick which formats have to be downloaded
|
||||
if info_dict.get('formats') is None:
|
||||
# There's only one format available
|
||||
@@ -1499,7 +1497,8 @@ class YoutubeDL(object):
|
||||
for url in url_list:
|
||||
try:
|
||||
# It also downloads the videos
|
||||
res = self.extract_info(url)
|
||||
res = self.extract_info(
|
||||
url, force_generic_extractor=self.params.get('force_generic_extractor', False))
|
||||
except UnavailableVideoError:
|
||||
self.report_error('unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
|
@@ -293,6 +293,7 @@ def _real_main(argv=None):
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts_retries,
|
||||
|
@@ -109,6 +109,14 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
return cmd
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
@@ -123,5 +131,6 @@ def list_external_downloaders():
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
bn = os.path.basename(external_downloader)
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME[bn]
|
||||
|
@@ -4,7 +4,10 @@ from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import AdobeTVIE
|
||||
from .adobetv import (
|
||||
AdobeTVIE,
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftenposten import AftenpostenIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
@@ -103,6 +106,7 @@ from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
@@ -112,6 +116,10 @@ from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dramafever import (
|
||||
DramaFeverIE,
|
||||
DramaFeverSeriesIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -136,7 +144,6 @@ from .ellentv import (
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
@@ -152,6 +159,7 @@ from .fc2 import FC2IE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
@@ -229,6 +237,7 @@ from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -378,6 +387,7 @@ from .nytimes import (
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
@@ -395,6 +405,7 @@ from .pbs import PBSIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
@@ -421,6 +432,7 @@ from .qqmusic import (
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
@@ -453,6 +465,7 @@ from .rutube import (
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
@@ -480,6 +493,10 @@ from .smotri import (
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snagfilms import (
|
||||
SnagFilmsIE,
|
||||
SnagFilmsEmbedIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .soompi import (
|
||||
@@ -553,6 +570,7 @@ from .tf1 import TF1IE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
@@ -560,7 +578,11 @@ from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .tnaflix import (
|
||||
TNAFlixIE,
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
)
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
@@ -582,6 +604,10 @@ from .tv2 import (
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv4 import TV4IE
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
@@ -600,6 +626,7 @@ from .twitch import (
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import TwitterCardIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
@@ -685,7 +712,10 @@ from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
@@ -5,6 +5,8 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
float_or_none,
|
||||
ISO639Utils,
|
||||
)
|
||||
|
||||
|
||||
@@ -69,3 +71,61 @@ class AdobeTVIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AdobeTVVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||
'url': 'https://video.tv.adobe.com/v/2456/',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'width': source.get('width'),
|
||||
'height': source.get('height'),
|
||||
'tbr': source.get('bitrate'),
|
||||
} for source in player_params['sources']]
|
||||
|
||||
# For both metadata and downloaded files the duration varies among
|
||||
# formats. I just pick the max one
|
||||
duration = max(filter(None, [
|
||||
float_or_none(source.get('duration'), scale=1000)
|
||||
for source in player_params['sources']]))
|
||||
|
||||
subtitles = {}
|
||||
for translation in player_params.get('translations', []):
|
||||
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||
if lang_id not in subtitles:
|
||||
subtitles[lang_id] = []
|
||||
subtitles[lang_id].append({
|
||||
'url': translation['vttPath'],
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': player_params['title'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -129,6 +129,20 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -237,26 +251,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
|
||||
def _extract_text(p):
|
||||
if p.text is not None:
|
||||
stripped_text = p.text.strip()
|
||||
if stripped_text:
|
||||
return stripped_text
|
||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
{
|
||||
'data': srt,
|
||||
'ext': 'srt',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
@@ -267,7 +266,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
|
||||
@@ -362,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="medium-description">([^<]+)</p>',
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
@@ -105,7 +105,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
if hq_durl:
|
||||
if hq_durl is not None:
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
|
@@ -13,6 +13,7 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||
@@ -156,6 +157,28 @@ class BrightcoveIE(InfoExtractor):
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
params['linkBaseURL'] = linkBase
|
||||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url_from_js(cls, object_js):
|
||||
# The layout of JS is as follows:
|
||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||
# // build Brightcove <object /> XML
|
||||
# }
|
||||
m = re.search(
|
||||
r'''(?x)customBC.\createVideo\(
|
||||
.*? # skipping width and height
|
||||
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
|
||||
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
|
||||
# in length, however it's appended to itself
|
||||
# in places, so truncate
|
||||
["\'](?P<videoID>\d+)["\'] # @videoPlayer
|
||||
''', object_js)
|
||||
if m:
|
||||
return cls._make_brightcove_url(m.groupdict())
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
|
||||
@@ -172,7 +195,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
"""Return a list of all Brightcove URLs from the webpage """
|
||||
|
||||
url_m = re.search(
|
||||
r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
|
||||
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
|
||||
webpage)
|
||||
if url_m:
|
||||
url = unescapeHTML(url_m.group(1))
|
||||
@@ -188,7 +211,12 @@ class BrightcoveIE(InfoExtractor):
|
||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
@@ -4,12 +4,13 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '4JUVEwq3wUT7',
|
||||
'display_id': 'connect-chat-feat-garth-brooks',
|
||||
'ext': 'flv',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
@@ -24,6 +25,7 @@ class CBSIE(InfoExtractor):
|
||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
||||
'info_dict': {
|
||||
'id': 'WWF_5KqY3PK1',
|
||||
'display_id': 'st-vincent',
|
||||
'ext': 'flv',
|
||||
'title': 'Live on Letterman - St. Vincent',
|
||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
||||
@@ -34,12 +36,23 @@ class CBSIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
|
||||
webpage, 'real video ID')
|
||||
return self.url_result('theplatform:%s' % real_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': 'theplatform:%s' % real_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class CNETIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'info_dict': {
|
||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||
@@ -25,7 +25,20 @@ class CNETIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'requires rtmpdump',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||
'info_dict': {
|
||||
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
||||
'ext': 'flv',
|
||||
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||
'uploader': 'Ashley Esqueda',
|
||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -42,7 +55,7 @@ class CNETIE(InfoExtractor):
|
||||
raise ExtractorError('Cannot find video data')
|
||||
|
||||
mpx_account = data['config']['players']['default']['mpx_account']
|
||||
vid = vdata['files']['rtmp']
|
||||
vid = vdata['files'].get('rtmp', vdata['files']['hds'])
|
||||
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
|
||||
|
||||
video_id = vdata['id']
|
||||
|
@@ -22,6 +22,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
@@ -33,7 +34,6 @@ from ..utils import (
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
)
|
||||
_NO_DEFAULT = object()
|
||||
|
||||
|
||||
class InfoExtractor(object):
|
||||
@@ -523,7 +523,7 @@ class InfoExtractor(object):
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
@@ -549,7 +549,7 @@ class InfoExtractor(object):
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
else:
|
||||
return mobj.group(group)
|
||||
elif default is not _NO_DEFAULT:
|
||||
elif default is not NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
@@ -557,7 +557,7 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
"""
|
||||
@@ -846,7 +846,8 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True):
|
||||
|
||||
formats = [{
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
@@ -865,8 +866,11 @@ class InfoExtractor(object):
|
||||
|
||||
m3u8_doc = self._download_webpage(
|
||||
m3u8_url, video_id,
|
||||
note='Downloading m3u8 information',
|
||||
errnote='Failed to download m3u8 information')
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
if m3u8_doc is False:
|
||||
return m3u8_doc
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
|
@@ -27,7 +27,7 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@@ -45,6 +45,22 @@ class CrunchyrollIE(InfoExtractor):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||
'info_dict': {
|
||||
'id': '589804',
|
||||
'ext': 'flv',
|
||||
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Danny Choo Network',
|
||||
'upload_date': '20120213',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@@ -251,16 +267,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||
# urlencode doesn't work!
|
||||
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
|
||||
streamdata_req = compat_urllib_request.Request(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||
% (stream_id, stream_format, stream_quality),
|
||||
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
|
||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
|
||||
streamdata = self._download_xml(
|
||||
streamdata_req, video_id,
|
||||
note='Downloading media info for %s' % video_format)
|
||||
video_url = streamdata.find('./host').text
|
||||
video_play_path = streamdata.find('./file').text
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_url = stream_info.find('./host').text
|
||||
video_play_path = stream_info.find('./file').text
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
|
@@ -251,3 +251,53 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
'title': full_user,
|
||||
'entries': self._extract_entries(user),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
|
||||
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
|
||||
|
||||
_TESTS = [{
|
||||
# From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
|
||||
# Tested at FranceTvInfo_2
|
||||
'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
|
||||
'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_dmcloud_url(self, webpage):
|
||||
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
mobj = re.search(
|
||||
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
|
||||
|
||||
# TODO: parse ios_url, which is in fact a manifest
|
||||
video_url = video_info['mp4_url']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': video_info.get('thumbnail_url'),
|
||||
}
|
||||
|
@@ -2,19 +2,19 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
|
||||
'info_dict': {
|
||||
'id': 'mission-impossible-outtakes',
|
||||
'ext': 'flv',
|
||||
'id': '20769',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mission Impossible Outtakes',
|
||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
@@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor):
|
||||
'timestamp': 1303099200,
|
||||
'upload_date': '20110418',
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
|
||||
'info_dict': {
|
||||
'id': 'mythbusters-the-simpsons',
|
||||
'title': 'MythBusters: The Simpsons',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_json(url + '?flat=1', video_id)
|
||||
|
||||
info = self._parse_json(self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.*?)</script>',
|
||||
webpage, 'video info'), video_id)
|
||||
video_title = info.get('playlist_title') or info.get('video_title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': info['contentURL'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'timestamp': parse_iso8601(info.get('uploadDate')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
}
|
||||
entries = [{
|
||||
'id': compat_str(video_info['id']),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
video_info['src'], video_id, ext='mp4',
|
||||
note='Download m3u8 information for video %d' % (idx + 1)),
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'duration': parse_duration(video_info.get('video_length')),
|
||||
'webpage_url': video_info.get('href'),
|
||||
'thumbnail': video_info.get('thumbnailURL'),
|
||||
'alt_title': video_info.get('secondary_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||
} for idx, video_info in enumerate(info['playlist'])]
|
||||
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
197
youtube_dl/extractor/dramafever.py
Normal file
197
youtube_dl/extractor/dramafever.py
Normal file
@@ -0,0 +1,197 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
'id': '4512.1',
|
||||
'ext': 'flv',
|
||||
'title': 'Cooking with Shin 4512.1',
|
||||
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1404336058,
|
||||
'upload_date': '20140702',
|
||||
'duration': 343,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', '.')
|
||||
|
||||
try:
|
||||
feed = self._download_json(
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
|
||||
video_id, 'Downloading episode JSON')['channel']['item']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
raise ExtractorError(
|
||||
'Currently unavailable in your country.', expected=True)
|
||||
raise
|
||||
|
||||
media_group = feed.get('media-group', {})
|
||||
|
||||
formats = []
|
||||
for media_content in media_group['media-content']:
|
||||
src = media_content.get('@attributes', {}).get('url')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media_group.get('media-title')
|
||||
description = media_group.get('media-description')
|
||||
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
|
||||
thumbnail = self._proto_relative_url(
|
||||
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
|
||||
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
|
||||
|
||||
subtitles = {}
|
||||
for media_subtitle in media_group.get('media-subTitle', []):
|
||||
lang = media_subtitle.get('@attributes', {}).get('lang')
|
||||
href = media_subtitle.get('@attributes', {}).get('href')
|
||||
if not lang or not href:
|
||||
continue
|
||||
subtitles[lang] = [{
|
||||
'ext': 'ttml',
|
||||
'url': href,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
'id': '4512',
|
||||
'title': 'Cooking with Shin',
|
||||
'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://www.dramafever.com/drama/124/IRIS/',
|
||||
'info_dict': {
|
||||
'id': '124',
|
||||
'title': 'IRIS',
|
||||
'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}]
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
|
||||
|
||||
def _get_consumer_secret(self, video_id):
|
||||
mainjs = self._download_webpage(
|
||||
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||
video_id, 'Downloading main.js', fatal=False)
|
||||
if not mainjs:
|
||||
return self._CONSUMER_SECRET
|
||||
return self._search_regex(
|
||||
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
consumer_secret = self._get_consumer_secret(series_id)
|
||||
|
||||
series = self._download_json(
|
||||
'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
|
||||
% (consumer_secret, series_id),
|
||||
series_id, 'Downloading series JSON')['series'][series_id]
|
||||
|
||||
title = clean_html(series['name'])
|
||||
description = clean_html(series.get('description') or series.get('description_short'))
|
||||
|
||||
entries = []
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
|
||||
% (consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
series_id, 'Downloading episodes JSON page #%d' % page_num)
|
||||
for episode in episodes.get('value', []):
|
||||
episode_url = episode.get('episode_url')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, episode_url),
|
||||
'DramaFever', episode.get('guid')))
|
||||
if page_num == episodes['num_pages']:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||
'md5': 'fe330252ddea607635cf2eb2c99a0af3',
|
||||
'info_dict': {
|
||||
'id': '65517',
|
||||
'ext': 'mp4',
|
||||
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'upload_date': '20110120',
|
||||
'duration': 3664,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmp
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||
@@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'format_id': file['Type'].replace('Video', ''),
|
||||
'preference': preferencemap.get(file['Type'], -10),
|
||||
})
|
||||
if format['url'].startswith('rtmp'):
|
||||
rtmp_url = format['url']
|
||||
format['rtmp_live'] = True # --resume does not work
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == "Thumb":
|
||||
thumbnail = file['Location']
|
||||
@@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
info['Description'], info['Actors'], info['Colophon'])
|
||||
|
||||
for f in formats:
|
||||
f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
|
||||
f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
|
||||
self._sort_formats(formats)
|
||||
|
||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||
|
@@ -36,25 +36,24 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
def extract_count(id_, name):
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||
webpage, '%s count' % name, fatal=False))
|
||||
|
||||
like_count = extract_count('rate_likes', 'like')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||
comment_count = extract_count('comments_count', 'comment')
|
||||
|
||||
cats_str = self._search_regex(
|
||||
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
|
@@ -1,31 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .tnaflix import TNAFlixIE
|
||||
|
||||
|
||||
class EMPFlixIE(TNAFlixIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
|
||||
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
|
||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'display_id': 'Amateur-Finger-Fuck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = 'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
'info_dict': {
|
||||
'id': '12610585',
|
||||
@@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
|
||||
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
||||
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/foobarblafasel-13659345.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
88
youtube_dl/extractor/fivetv.py
Normal file
88
youtube_dl/extractor/fivetv.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FiveTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
http://
|
||||
(?:www\.)?5-tv\.ru/
|
||||
(?:
|
||||
(?:[^/]+/)+(?P<id>\d+)|
|
||||
(?P<path>[^/?#]+)(?:[/?#])?
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://5-tv.ru/news/96814/',
|
||||
'md5': 'bbff554ad415ecf5416a2f48c22d9283',
|
||||
'info_dict': {
|
||||
'id': '96814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Россияне выбрали имя для общенациональной платежной системы',
|
||||
'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 180,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://5-tv.ru/video/1021729/',
|
||||
'info_dict': {
|
||||
'id': '1021729',
|
||||
'ext': 'mp4',
|
||||
'title': '3D принтер',
|
||||
'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 180,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
||||
'info_dict': {
|
||||
'id': 'glavnoe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Итоги недели с 8 по 14 июня 2015 года',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://5-tv.ru/films/1507502/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://5-tv.ru/programs/broadcast/508713/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://5-tv.ru/angel/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
}
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -60,7 +61,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
continue
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
||||
@@ -131,12 +132,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': '556e03339473995ee145930c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
@@ -34,6 +34,7 @@ from .brightcove import BrightcoveIE
|
||||
from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .smotri import SmotriIE
|
||||
from .condenast import CondeNastIE
|
||||
@@ -41,6 +42,12 @@ from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .bliptv import BlipTVIE
|
||||
from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -291,6 +298,15 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# TVC embed
|
||||
{
|
||||
'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
|
||||
'info_dict': {
|
||||
'id': '55304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дошкольное воспитание',
|
||||
},
|
||||
},
|
||||
# SportBox embed
|
||||
{
|
||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||
@@ -322,6 +338,15 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XHamster embed
|
||||
{
|
||||
'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
|
||||
'info_dict': {
|
||||
'id': 'showthread',
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
@@ -644,6 +669,18 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Kaltura embed (different embed code)
|
||||
{
|
||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||
'info_dict': {
|
||||
'id': '1_a52wc67y',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20150127',
|
||||
'uploader_id': 'PremierMedia',
|
||||
'timestamp': int,
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -789,6 +826,62 @@ class GenericIE(InfoExtractor):
|
||||
# rtmpe downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Brightcove URL in single quotes
|
||||
{
|
||||
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
|
||||
'md5': '4ae374f1f8b91c889c4b9203c8c752af',
|
||||
'info_dict': {
|
||||
'id': '4255764656001',
|
||||
'ext': 'mp4',
|
||||
'title': 'SN Presents: Russell Martin, World Citizen',
|
||||
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||
'uploader': 'Rogers Sportsnet',
|
||||
},
|
||||
},
|
||||
# Dailymotion Cloud video
|
||||
{
|
||||
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||
'md5': '49444254273501a64675a7e68c502681',
|
||||
'info_dict': {
|
||||
'id': '5585de919473990de4bee11b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le débat',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
},
|
||||
# OnionStudios embed
|
||||
{
|
||||
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||
'info_dict': {
|
||||
'id': '2855',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'uploader': 'ClickHole',
|
||||
'uploader_id': 'clickhole',
|
||||
}
|
||||
},
|
||||
# SnagFilms embed
|
||||
{
|
||||
'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
|
||||
'info_dict': {
|
||||
'id': '74849a00-85a9-11e1-9660-123139220831',
|
||||
'ext': 'mp4',
|
||||
'title': '#whilewewatch',
|
||||
}
|
||||
},
|
||||
# AdobeTVVideo embed
|
||||
{
|
||||
'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -956,7 +1049,9 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
force = self._downloader.params.get('force_generic_extractor', False)
|
||||
self._downloader.report_warning(
|
||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||
|
||||
if not full_response:
|
||||
request = compat_urllib_request.Request(url)
|
||||
@@ -1061,23 +1156,14 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1))
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
@@ -1289,11 +1375,32 @@ class GenericIE(InfoExtractor):
|
||||
if rutv_url:
|
||||
return self.url_result(rutv_url, 'RUTV')
|
||||
|
||||
# Look for embedded TVC player
|
||||
tvc_url = TVCIE._extract_url(webpage)
|
||||
if tvc_url:
|
||||
return self.url_result(tvc_url, 'TVC')
|
||||
|
||||
# Look for embedded SportBox player
|
||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||
if sportbox_urls:
|
||||
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
||||
|
||||
# Look for embedded PornHub player
|
||||
pornhub_url = PornHubIE._extract_url(webpage)
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Tvigle')
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||
@@ -1397,8 +1504,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = re.search(
|
||||
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
|
||||
@@ -1455,6 +1562,30 @@ class GenericIE(InfoExtractor):
|
||||
if senate_isvp_url:
|
||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||
|
||||
# Look for Dailymotion Cloud videos
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
# Look for OnionStudios embeds
|
||||
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
||||
if onionstudios_url:
|
||||
return self.url_result(onionstudios_url)
|
||||
|
||||
# Look for SnagFilms embeds
|
||||
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
|
||||
if snagfilms_url:
|
||||
return self.url_result(snagfilms_url)
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
||||
'AdobeTVVideo')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@@ -6,12 +6,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
qualities,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
'id': 'DeadlyDecisiveGermanpinscher',
|
||||
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
|
||||
'info_dict': {
|
||||
'id': 'JauntyTimelyAmazontreeboa',
|
||||
'ext': 'mp4',
|
||||
'title': 'JauntyTimelyAmazontreeboa',
|
||||
'timestamp': 1411720126,
|
||||
'upload_date': '20140926',
|
||||
'uploader': 'anonymous',
|
||||
'duration': 3.52,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||
video_id, 'Downloading video info')['gfyItem']
|
||||
video_id, 'Downloading video info')
|
||||
if 'error' in gfy:
|
||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||
gfy = gfy['gfyItem']
|
||||
|
||||
title = gfy.get('title') or gfy['gfyName']
|
||||
description = gfy.get('description')
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
|
||||
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
|
||||
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class HowcastIE(InfoExtractor):
|
||||
@@ -13,29 +12,31 @@ class HowcastIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||
'title': 'How to Tie a Square Knot Properly',
|
||||
}
|
||||
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
|
||||
'timestamp': 1276081287,
|
||||
'upload_date': '20100609',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
|
||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||
webpage, 'description', fatal=False)
|
||||
embed_code = self._search_regex(
|
||||
r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
|
||||
webpage, 'ooyala embed code')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % embed_code,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': video_description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'timestamp': parse_iso8601(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp')),
|
||||
}
|
||||
|
@@ -46,7 +46,7 @@ class ImdbIE(InfoExtractor):
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['url'],
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
|
@@ -5,13 +5,14 @@ import base64
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
||||
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
||||
'info_dict': {
|
||||
@@ -20,7 +21,10 @@ class InfoQIE(InfoExtractor):
|
||||
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
||||
'title': 'A Few of My Favorite [Python] Things',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -42,7 +46,7 @@ class InfoQIE(InfoExtractor):
|
||||
video_id, extension = video_filename.split('.')
|
||||
|
||||
http_base = self._search_regex(
|
||||
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
|
||||
r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
|
||||
'HTTP base URL')
|
||||
|
||||
formats = [{
|
||||
@@ -52,7 +56,7 @@ class InfoQIE(InfoExtractor):
|
||||
'play_path': playpath,
|
||||
}, {
|
||||
'format_id': 'http',
|
||||
'url': http_base + real_id,
|
||||
'url': compat_urlparse.urljoin(url, http_base) + real_id,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
limit_length,
|
||||
)
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
@@ -100,11 +103,13 @@ class InstagramUserIE(InfoExtractor):
|
||||
thumbnails_el = it.get('images', {})
|
||||
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
||||
|
||||
title = it.get('caption', {}).get('text', it['id'])
|
||||
# In some cases caption is null, which corresponds to None
|
||||
# in python. As a result, it.get('caption', {}) gives None
|
||||
title = (it.get('caption') or {}).get('text', it['id'])
|
||||
|
||||
entries.append({
|
||||
'id': it['id'],
|
||||
'title': title,
|
||||
'title': limit_length(title, 80),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'webpage_url': it.get('link'),
|
||||
|
296
youtube_dl/extractor/iqiyi.py
Normal file
296
youtube_dl/extractor/iqiyi.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import os.path
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import zlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
|
||||
_VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
|
||||
'md5': '2cb594dc2781e6c941a110d8f358118b',
|
||||
'info_dict': {
|
||||
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
|
||||
'title': '美国德州空中惊现奇异云团 酷似UFO',
|
||||
'ext': 'f4v',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '7e49376fecaffa115d951634917fe105',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '41b75ba13bb7ac0e411131f92bc4f6ca',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '0cee1dd0a3d46a83e71e2badeae2aab0',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '4f8ad72373b0c491b582e7c196b0b1f9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': 'd89ad028bcfad282918e8098e811711d',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '9cb1e5c95da25dff0660c32ae50903b7',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '155116e0ff1867bbc9b98df294faabc9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '53f5db77622ae14fa493ed2a278a082b',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
('1', 'h6'),
|
||||
('2', 'h5'),
|
||||
('3', 'h4'),
|
||||
('4', 'h3'),
|
||||
('5', 'h2'),
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid):
|
||||
def do_xor(x, y):
|
||||
a = y % 3
|
||||
if a == 1:
|
||||
return x ^ 121
|
||||
if a == 2:
|
||||
return x ^ 72
|
||||
return x ^ 103
|
||||
|
||||
def get_encode_code(l):
|
||||
a = 0
|
||||
b = l.split('-')
|
||||
c = len(b)
|
||||
s = ''
|
||||
for i in range(c - 1, -1, -1):
|
||||
a = do_xor(int(b[c - i - 1], 16), i)
|
||||
s += chr(a)
|
||||
return s[::-1]
|
||||
|
||||
def get_path_key(x, format_id, segment_index):
|
||||
mg = ')(*&^flash@#$%a'
|
||||
tm = self._download_json(
|
||||
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
|
||||
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||
)['t']
|
||||
t = str(int(math.floor(int(tm) / (600.0))))
|
||||
return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
|
||||
|
||||
video_urls_dict = {}
|
||||
for format_item in data['vp']['tkl'][0]['vs']:
|
||||
if 0 < int(format_item['bid']) <= 10:
|
||||
format_id = self.get_format(format_item['bid'])
|
||||
else:
|
||||
continue
|
||||
|
||||
video_urls = []
|
||||
|
||||
video_urls_info = format_item['fs']
|
||||
if not format_item['fs'][0]['l'].startswith('/'):
|
||||
t = get_encode_code(format_item['fs'][0]['l'])
|
||||
if t.endswith('mp4'):
|
||||
video_urls_info = format_item['flvs']
|
||||
|
||||
for segment_index, segment in enumerate(video_urls_info):
|
||||
vl = segment['l']
|
||||
if not vl.startswith('/'):
|
||||
vl = get_encode_code(vl)
|
||||
key = get_path_key(
|
||||
vl.split('/')[-1].split('.')[0], format_id, segment_index)
|
||||
filesize = segment['b']
|
||||
base_url = data['vp']['du'].split('/')
|
||||
base_url.insert(-1, key)
|
||||
base_url = '/'.join(base_url)
|
||||
param = {
|
||||
'su': _uuid,
|
||||
'qyid': uuid.uuid4().hex,
|
||||
'client': '',
|
||||
'z': '',
|
||||
'bt': '',
|
||||
'ct': '',
|
||||
'tn': str(int(time.time()))
|
||||
}
|
||||
api_video_url = base_url + vl + '?' + \
|
||||
compat_urllib_parse.urlencode(param)
|
||||
js = self._download_json(
|
||||
api_video_url, video_id,
|
||||
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
|
||||
video_url = js['l']
|
||||
video_urls.append(
|
||||
(video_url, filesize))
|
||||
|
||||
video_urls_dict[format_id] = video_urls
|
||||
return video_urls_dict
|
||||
|
||||
def get_format(self, bid):
|
||||
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
|
||||
return matched_format_ids[0] if len(matched_format_ids) else None
|
||||
|
||||
def get_bid(self, format_id):
|
||||
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
|
||||
return matched_bids[0] if len(matched_bids) else None
|
||||
|
||||
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
|
||||
tm = str(int(time.time()))
|
||||
param = {
|
||||
'key': 'fvip',
|
||||
'src': hashlib.md5(b'youtube-dl').hexdigest(),
|
||||
'tvId': tvid,
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': hashlib.md5(
|
||||
(enc_key + tm + tvid).encode('utf8')).hexdigest(),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
'authkey': hashlib.md5(
|
||||
(tm + tvid).encode('utf8')).hexdigest()
|
||||
}
|
||||
|
||||
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||
compat_urllib_parse.urlencode(param)
|
||||
raw_data = self._download_json(api_url, video_id)
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
filename, _ = os.path.splitext(url_basename(swf_url))
|
||||
enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename)
|
||||
if enc_key_json is not None:
|
||||
return enc_key_json[0]
|
||||
|
||||
req = self._request_webpage(
|
||||
swf_url, video_id, note='download swf content')
|
||||
cn = req.read()
|
||||
cn = zlib.decompress(cn[8:])
|
||||
pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
|
||||
enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
|
||||
|
||||
self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key])
|
||||
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(
|
||||
url, 'temp_id', note='download video page')
|
||||
tvid = self._search_regex(
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
swf_url = self._search_regex(
|
||||
r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
|
||||
_uuid = uuid.uuid4().hex
|
||||
|
||||
enc_key = self.get_enc_key(swf_url, video_id)
|
||||
|
||||
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
|
||||
|
||||
if raw_data['code'] != 'A000000':
|
||||
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
|
||||
|
||||
if not raw_data['data']['vp']['tkl']:
|
||||
raise ExtractorError('No support iQiqy VIP video')
|
||||
|
||||
data = raw_data['data']
|
||||
|
||||
title = data['vi']['vn']
|
||||
|
||||
# generate video_urls_dict
|
||||
video_urls_dict = self.construct_video_urls(
|
||||
data, video_id, _uuid)
|
||||
|
||||
# construct info
|
||||
entries = []
|
||||
for format_id in video_urls_dict:
|
||||
video_urls = video_urls_dict[format_id]
|
||||
for i, video_url_info in enumerate(video_urls):
|
||||
if len(entries) < i + 1:
|
||||
entries.append({'formats': []})
|
||||
entries[i]['formats'].append(
|
||||
{
|
||||
'url': video_url_info[0],
|
||||
'filesize': video_url_info[-1],
|
||||
'format_id': format_id,
|
||||
'preference': int(self.get_bid(format_id))
|
||||
}
|
||||
)
|
||||
|
||||
for i in range(len(entries)):
|
||||
self._sort_formats(entries[i]['formats'])
|
||||
entries[i].update(
|
||||
{
|
||||
'id': '%s_part%d' % (video_id, i + 1),
|
||||
'title': title,
|
||||
}
|
||||
)
|
||||
|
||||
if len(entries) > 1:
|
||||
info = {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
else:
|
||||
info = entries[0]
|
||||
info['id'] = video_id
|
||||
info['title'] = title
|
||||
|
||||
return info
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
@@ -30,7 +31,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404302298,
|
||||
'timestamp': int,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
@@ -46,7 +47,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': 1163322193,
|
||||
'timestamp': int,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
@@ -67,9 +68,9 @@ class IzleseneIE(InfoExtractor):
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';",
|
||||
webpage, 'uploader', fatal=False, default='')
|
||||
webpage, 'uploader', fatal=False)
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
'uploadDate', webpage, 'upload date'))
|
||||
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"',
|
||||
@@ -86,8 +87,7 @@ class IzleseneIE(InfoExtractor):
|
||||
|
||||
# Might be empty for some videos.
|
||||
streams = self._html_search_regex(
|
||||
r'"qualitylevel"\s*:\s*"([^"]+)"',
|
||||
webpage, 'streams', fatal=False, default='')
|
||||
r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
|
||||
|
||||
formats = []
|
||||
if streams:
|
||||
@@ -95,15 +95,15 @@ class IzleseneIE(InfoExtractor):
|
||||
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
||||
formats.append({
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
'url': url,
|
||||
'url': compat_urllib_parse_unquote(url),
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
stream_url = self._search_regex(
|
||||
r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
|
||||
r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': stream_url,
|
||||
'url': compat_urllib_parse_unquote(stream_url),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
|
@@ -28,6 +28,14 @@ class KickStarterIE(InfoExtractor):
|
||||
'uploader': 'Pebble Technology',
|
||||
'title': 'Pebble iOS Notifications',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||
'info_dict': {
|
||||
'id': '1420158244',
|
||||
'ext': 'mp4',
|
||||
'title': 'Power Drive 2000',
|
||||
},
|
||||
'expected_warnings': ['OpenGraph description'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -48,10 +56,15 @@ class KickStarterIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
if thumbnail is None:
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail image', fatal=False)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -39,7 +40,6 @@ class LifeNewsIE(InfoExtractor):
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'upload_date': '20150402',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
@@ -50,7 +50,6 @@ class LifeNewsIE(InfoExtractor):
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
@@ -72,20 +71,20 @@ class LifeNewsIE(InfoExtractor):
|
||||
if not videos and not iframe_link:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
' - Первый по срочным новостям — LIFE | NEWS')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
|
||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||
webpage, 'comment count', fatal=False)
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
|
||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
|
@@ -40,6 +40,17 @@ class LiveLeakIE(InfoExtractor):
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||
'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.",
|
||||
'uploader': 'bony333',
|
||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -85,7 +96,10 @@ class LiveLeakIE(InfoExtractor):
|
||||
'url': s['file'],
|
||||
} for i, s in enumerate(sources)]
|
||||
for i, s in enumerate(sources):
|
||||
orig_url = s['file'].replace('.h264_base.mp4', '')
|
||||
# Removing '.h264_*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
|
||||
if s['file'] != orig_url:
|
||||
formats.append({
|
||||
'format_id': 'original-%s' % i,
|
||||
|
@@ -30,13 +30,13 @@ class LyndaBaseIE(InfoExtractor):
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
'username': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
@@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None,
|
||||
'Confirming log in and log out from another device')
|
||||
|
@@ -31,7 +31,7 @@ class NewstubeIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_guid = self._html_search_regex(
|
||||
r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
page, 'video GUID')
|
||||
|
||||
player = self._download_xml(
|
||||
|
@@ -19,7 +19,7 @@ class NFLIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||
(?:.+?/)*
|
||||
(?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
@@ -58,6 +58,10 @@ class NFLIE(InfoExtractor):
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -182,7 +182,6 @@ class NiconicoIE(InfoExtractor):
|
||||
extension = xpath_text(video_info, './/movie_type')
|
||||
if not extension:
|
||||
extension = determine_ext(video_real_url)
|
||||
video_format = extension.upper()
|
||||
|
||||
thumbnail = (
|
||||
xpath_text(video_info, './/thumbnail_url') or
|
||||
@@ -241,7 +240,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'url': video_real_url,
|
||||
'title': title,
|
||||
'ext': extension,
|
||||
'format': video_format,
|
||||
'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
|
@@ -166,6 +166,10 @@ class NocoIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
||||
|
||||
if timestamp is not None and timestamp < 0:
|
||||
timestamp = None
|
||||
|
||||
uploader = show.get('partner_name')
|
||||
uploader_id = show.get('partner_key')
|
||||
duration = float_or_none(show.get('duration_ms'), 1000)
|
||||
@@ -191,7 +195,7 @@ class NocoIE(InfoExtractor):
|
||||
if episode_number:
|
||||
title += ' #' + compat_str(episode_number)
|
||||
if episode:
|
||||
title += ' - ' + episode
|
||||
title += ' - ' + compat_str(episode)
|
||||
|
||||
description = show.get('show_resume') or show.get('family_resume')
|
||||
|
||||
|
@@ -16,8 +16,24 @@ class NPOBaseIE(InfoExtractor):
|
||||
token_page = self._download_webpage(
|
||||
'http://ida.omroep.nl/npoplayer/i.js',
|
||||
video_id, note='Downloading token')
|
||||
return self._search_regex(
|
||||
token = self._search_regex(
|
||||
r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||
# Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js
|
||||
token_l = list(token)
|
||||
first = second = None
|
||||
for i in range(5, len(token_l) - 4):
|
||||
if token_l[i].isdigit():
|
||||
if first is None:
|
||||
first = i
|
||||
elif second is None:
|
||||
second = i
|
||||
if first is None or second is None:
|
||||
first = 12
|
||||
second = 13
|
||||
|
||||
token_l[first], token_l[second] = token_l[second], token_l[first]
|
||||
|
||||
return ''.join(token_l)
|
||||
|
||||
|
||||
class NPOIE(NPOBaseIE):
|
||||
@@ -92,7 +108,7 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
def _get_info(self, video_id):
|
||||
metadata = self._download_json(
|
||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||
'http://e.omroep.nl/metadata/%s' % video_id,
|
||||
video_id,
|
||||
# We have to remove the javascript callback
|
||||
transform_source=strip_jsonp,
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
_VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -76,7 +76,7 @@ class NRKIE(InfoExtractor):
|
||||
|
||||
|
||||
class NRKPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
@@ -116,11 +116,11 @@ class NRKPlaylistIE(InfoExtractor):
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
_VALID_URL = r'(?P<baseurl>https?://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314',
|
||||
@@ -132,7 +132,7 @@ class NRKTVIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
@@ -145,7 +145,7 @@ class NRKTVIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
# single playlist video
|
||||
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
@@ -157,7 +157,7 @@ class NRKTVIE(InfoExtractor):
|
||||
'skip': 'Only works from Norway',
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
|
74
youtube_dl/extractor/onionstudios.py
Normal file
74
youtube_dl/extractor/onionstudios.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class OnionStudiosIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
||||
'md5': 'd4851405d31adfadf71cd7a487b765bb',
|
||||
'info_dict': {
|
||||
'id': '2937',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hannibal charges forward, stops for a cocktail',
|
||||
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'The A.V. Club',
|
||||
'uploader_id': 'TheAVClub',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
|
||||
if determine_ext(src) != 'm3u8': # m3u8 always results in 403
|
||||
formats.append({
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'share_title\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'share_description\s*=\s*"([^"]+)"', webpage,
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', default=False)
|
||||
|
||||
uploader_id = self._search_regex(
|
||||
r'twitter_handle\s*=\s*"([^"]+)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
uploader = self._search_regex(
|
||||
r'window\.channelName\s*=\s*"Embedded:([^"]+)"',
|
||||
webpage, 'uploader', default=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -35,6 +36,9 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
@@ -46,6 +50,9 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
|
||||
@@ -68,7 +75,10 @@ class PBSIE(InfoExtractor):
|
||||
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
|
||||
@@ -82,7 +92,10 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
|
||||
@@ -90,6 +103,21 @@ class PBSIE(InfoExtractor):
|
||||
'id': 'united-states-of-secrets',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
|
||||
'info_dict': {
|
||||
'id': '2280706814',
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'Death and the Civil War',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'duration': 6705,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -123,7 +151,7 @@ class PBSIE(InfoExtractor):
|
||||
return media_id, presumptive_id, upload_date
|
||||
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
|
||||
webpage, 'player URL')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
|
96
youtube_dl/extractor/pinkbike.py
Normal file
96
youtube_dl/extractor/pinkbike.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
remove_start,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class PinkbikeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pinkbike.com/video/402811/',
|
||||
'md5': '4814b8ca7651034cd87e3361d5c2155a',
|
||||
'info_dict': {
|
||||
'id': '402811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brandon Semenuk - RAW 100',
|
||||
'description': 'Official release: www.redbull.ca/rupertwalker',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 100,
|
||||
'upload_date': '20150406',
|
||||
'uploader': 'revelco',
|
||||
'location': 'Victoria, British Columbia, Canada',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.pinkbike.com/video/%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for _, format_id, src in re.findall(
|
||||
r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)id="media-description"[^>]*>(.+?)<',
|
||||
webpage, 'description', default=None) or remove_start(
|
||||
self._og_search_description(webpage), title + '. ')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration'))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="fullTime"[^>]+title="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
location = self._html_search_regex(
|
||||
r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
|
||||
webpage, 'location', fatal=False)
|
||||
|
||||
def extract_count(webpage, label):
|
||||
return str_to_int(self._search_regex(
|
||||
r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
|
||||
webpage, label, fatal=False))
|
||||
|
||||
view_count = extract_count(webpage, 'Views')
|
||||
comment_count = extract_count(webpage, 'Comments')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'location': location,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats
|
||||
}
|
@@ -18,7 +18,8 @@ class PlanetaPlayIE(InfoExtractor):
|
||||
'id': '3586',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:e829428ee28b1deed00de90de49d1da1',
|
||||
}
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
_SONG_FORMATS = {
|
||||
|
@@ -19,8 +19,8 @@ from ..aes import (
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '882f488fa1f0026f023f33576004a2ed',
|
||||
'info_dict': {
|
||||
@@ -30,7 +30,17 @@ class PornHubIE(InfoExtractor):
|
||||
"title": "Seductive Indian beauty strips down and fingers her pink pussy",
|
||||
"age_limit": 18
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
return str_to_int(self._search_regex(
|
||||
@@ -39,7 +49,8 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -34,7 +34,7 @@ class PornoVoisinesIE(InfoExtractor):
|
||||
'duration': 120,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['Débutante', 'Scénario', 'Sodomie'],
|
||||
'categories': ['Débutantes', 'Scénario', 'Sodomie'],
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -71,7 +71,7 @@ class PornoVoisinesIE(InfoExtractor):
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'(\d+) vues', webpage, 'view count', fatal=False))
|
||||
average_rating = self._search_regex(
|
||||
r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
|
||||
r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
|
@@ -177,6 +177,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
|
||||
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||
r'<h1 class="att-name">\s*(.+?)</h1>',
|
||||
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
|
||||
]
|
||||
_DESCRIPTION_REGEXES = [
|
||||
r'<p itemprop="description">\s*(.+?)</p>',
|
||||
@@ -206,8 +207,8 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
def _extract_clip(self, url, webpage):
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'testclient'
|
||||
client_name = 'kolibri-1.2.5'
|
||||
access_token = 'prosieben'
|
||||
client_name = 'kolibri-1.12.6'
|
||||
client_location = url
|
||||
|
||||
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
|
||||
@@ -275,13 +276,17 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
for source in urls_sources:
|
||||
protocol = source['protocol']
|
||||
if protocol == 'rtmp' or protocol == 'rtmpe':
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
path = mobj.group('path')
|
||||
mp4colon_index = path.rfind('mp4:')
|
||||
app = path[:mp4colon_index]
|
||||
play_path = path[mp4colon_index:]
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'app': mobj.group('app'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'url': '%s/%s' % (mobj.group('url'), app),
|
||||
'app': app,
|
||||
'play_path': play_path,
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
|
@@ -9,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
clean_html,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
@@ -18,17 +19,37 @@ class QQMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||
'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
|
||||
'md5': '9ce1c1c8445f561506d2e3cfb0255705',
|
||||
'info_dict': {
|
||||
'id': '004295Et37taLD',
|
||||
'ext': 'm4a',
|
||||
'ext': 'mp3',
|
||||
'title': '可惜没如果',
|
||||
'upload_date': '20141227',
|
||||
'creator': '林俊杰',
|
||||
'description': 'md5:d327722d0361576fde558f1ac68a7065',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'note': 'There is no mp3-320 version of this song.',
|
||||
'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
|
||||
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
|
||||
'info_dict': {
|
||||
'id': '004MsGEo3DdNxV',
|
||||
'ext': 'mp3',
|
||||
'title': '如果',
|
||||
'upload_date': '20050626',
|
||||
'creator': '李季美',
|
||||
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
|
||||
'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
|
||||
'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
|
||||
}
|
||||
|
||||
# Reference: m_r_GetRUin() in top_player.js
|
||||
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||
@staticmethod
|
||||
@@ -62,21 +83,42 @@ class QQMusicIE(InfoExtractor):
|
||||
if lrc_content:
|
||||
lrc_content = lrc_content.replace('\\n', '\n')
|
||||
|
||||
thumbnail_url = None
|
||||
albummid = self._search_regex(
|
||||
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
|
||||
detail_info_page, 'album mid', default=None)
|
||||
if albummid:
|
||||
thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
|
||||
% (albummid[-2:-1], albummid[-1], albummid)
|
||||
|
||||
guid = self.m_r_get_ruin()
|
||||
|
||||
vkey = self._download_json(
|
||||
'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
|
||||
mid, note='Retrieve vkey', errnote='Unable to get vkey',
|
||||
transform_source=strip_jsonp)['key']
|
||||
song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
|
||||
|
||||
formats = []
|
||||
for format_id, details in self._FORMATS.items():
|
||||
formats.append({
|
||||
'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
|
||||
% (details['prefix'], mid, details['ext'], vkey, guid),
|
||||
'format': format_id,
|
||||
'format_id': format_id,
|
||||
'preference': details['preference'],
|
||||
'abr': details.get('abr'),
|
||||
})
|
||||
self._check_formats(formats, mid)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': mid,
|
||||
'url': song_url,
|
||||
'formats': formats,
|
||||
'title': song_name,
|
||||
'upload_date': publish_time,
|
||||
'creator': singer,
|
||||
'description': lrc_content,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
|
||||
@@ -146,31 +188,40 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:album'
|
||||
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
|
||||
'info_dict': {
|
||||
'id': '000gXCTb2AhRR1',
|
||||
'title': '我们都是这样长大的',
|
||||
'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
|
||||
'description': 'md5:179c5dce203a5931970d306aa9607ea6',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
|
||||
'info_dict': {
|
||||
'id': '002Y5a3b3AlCu3',
|
||||
'title': '그리고...',
|
||||
'description': 'md5:a48823755615508a95080e81b51ba729',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
album_page = self._download_webpage(
|
||||
self.qq_static_url('album', mid), mid, 'Download album page')
|
||||
album = self._download_json(
|
||||
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
|
||||
mid, 'Download album page')['data']
|
||||
|
||||
entries = self.get_entries_from_page(album_page)
|
||||
|
||||
album_name = self._html_search_regex(
|
||||
r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
|
||||
default=None)
|
||||
|
||||
album_detail = self._html_search_regex(
|
||||
r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
|
||||
album_page, 'album details', default=None)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
|
||||
) for song in album['list']
|
||||
]
|
||||
album_name = album.get('name')
|
||||
album_detail = album.get('desc')
|
||||
if album_detail is not None:
|
||||
album_detail = album_detail.strip()
|
||||
|
||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
||||
|
||||
@@ -226,3 +277,36 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
list_name = topinfo.get('ListName')
|
||||
list_description = topinfo.get('info')
|
||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||
|
||||
|
||||
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:playlist'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=taoge&id=3462654915',
|
||||
'info_dict': {
|
||||
'id': '3462654915',
|
||||
'title': '韩国5月新歌精选下旬',
|
||||
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
|
||||
},
|
||||
'playlist_count': 40,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
list_json = self._download_json(
|
||||
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
|
||||
% list_id, list_id, 'Download list page',
|
||||
transform_source=strip_jsonp)['cdlist'][0]
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
|
||||
) for song in list_json['songlist']
|
||||
]
|
||||
|
||||
list_name = list_json.get('dissname')
|
||||
list_description = clean_html(unescapeHTML(list_json.get('desc')))
|
||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||
|
@@ -24,6 +24,7 @@ class QuickVidIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -21,6 +21,13 @@ class RTBFIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_QUALITIES = [
|
||||
('mobile', 'mobile'),
|
||||
('web', 'SD'),
|
||||
('url', 'MD'),
|
||||
('high', 'HD'),
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -32,14 +39,21 @@ class RTBFIE(InfoExtractor):
|
||||
r'data-video="([^"]+)"', webpage, 'data video')),
|
||||
video_id)
|
||||
|
||||
video_url = data.get('downloadUrl') or data.get('url')
|
||||
|
||||
if data.get('provider').lower() == 'youtube':
|
||||
video_url = data.get('downloadUrl') or data.get('url')
|
||||
return self.url_result(video_url, 'Youtube')
|
||||
formats = []
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = data['sources'].get(key)
|
||||
if format_url:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('subtitle'),
|
||||
'thumbnail': data.get('thumbnail'),
|
||||
|
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
|
||||
IE_NAME = 'rtl.nl'
|
||||
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(www\.)?
|
||||
https?://(?:www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
|
||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
@@ -43,12 +43,19 @@ class RtlNlIE(InfoExtractor):
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}, {
|
||||
# encrypted m3u8 streams, georestricted
|
||||
'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
|
||||
uuid)
|
||||
|
||||
material = info['material'][0]
|
||||
@@ -56,9 +63,14 @@ class RtlNlIE(InfoExtractor):
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||
|
||||
meta = info.get('meta', {})
|
||||
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
|
||||
# NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
|
||||
# this adaptive -> flash workaround is not required in general, but it also
|
||||
# allows bypassing georestriction therefore is retained for now.
|
||||
videopath = material['videopath'].replace('/adaptive/', '/flash/')
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
|
||||
@@ -79,7 +91,7 @@ class RtlNlIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
meta = info.get('meta', {})
|
||||
|
||||
for p in ('poster_base_url', '"thumb_base_url"'):
|
||||
if not meta.get(p):
|
||||
continue
|
||||
|
119
youtube_dl/extractor/ruutu.py
Normal file
119
youtube_dl/extractor/ruutu.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class RuutuIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
||||
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
||||
'info_dict': {
|
||||
'id': '2058907',
|
||||
'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
||||
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 114,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
|
||||
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
||||
'info_dict': {
|
||||
'id': '2057306',
|
||||
'display_id': 'superpesis-katso-koko-kausi-ruudussa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Superpesis: katso koko kausi Ruudussa',
|
||||
'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 40,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id="(\d+)"', webpage, 'media id')
|
||||
|
||||
video_xml_url = None
|
||||
|
||||
media_data = self._search_regex(
|
||||
r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
|
||||
'media data', default=None)
|
||||
if media_data:
|
||||
media_json = self._parse_json(media_data, display_id, fatal=False)
|
||||
if media_json:
|
||||
xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
|
||||
if xml_url:
|
||||
video_xml_url = xml_url.replace('{ID}', video_id)
|
||||
|
||||
if not video_xml_url:
|
||||
video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
|
||||
|
||||
video_xml = self._download_xml(video_xml_url, video_id)
|
||||
|
||||
formats = []
|
||||
processed_urls = []
|
||||
|
||||
def extract_formats(node):
|
||||
for child in node:
|
||||
if child.tag.endswith('Files'):
|
||||
extract_formats(child)
|
||||
elif child.tag.endswith('File'):
|
||||
video_url = child.text
|
||||
if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
|
||||
return
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
else:
|
||||
proto = compat_urllib_parse_urlparse(video_url).scheme
|
||||
if not child.tag.startswith('HTTP') and proto != 'rtmp':
|
||||
continue
|
||||
preference = -1 if proto == 'rtmp' else 1
|
||||
label = child.get('label')
|
||||
tbr = int_or_none(child.get('bitrate'))
|
||||
width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')]
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (proto, label if label else tbr),
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
extract_formats(video_xml.find('./Clip'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||
'formats': formats,
|
||||
}
|
@@ -83,7 +83,7 @@ class SafariIE(SafariBaseIE):
|
||||
library/view/[^/]+|
|
||||
api/v1/book
|
||||
)/
|
||||
(?P<course_id>\d+)/
|
||||
(?P<course_id>[^/]+)/
|
||||
(?:chapter(?:-content)?/)?
|
||||
(?P<part>part\d+)\.html
|
||||
'''
|
||||
@@ -100,6 +100,10 @@ class SafariIE(SafariBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-digits in course id
|
||||
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -122,7 +126,7 @@ class SafariCourseIE(SafariBaseIE):
|
||||
IE_NAME = 'safari:course'
|
||||
IE_DESC = 'safaribooksonline.com online courses'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
|
||||
|
@@ -53,7 +53,7 @@ class SmotriIE(InfoExtractor):
|
||||
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
# video-password
|
||||
# video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||
@@ -71,7 +71,24 @@ class SmotriIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# age limit + video-password
|
||||
# video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v6984858774#',
|
||||
'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
|
||||
'info_dict': {
|
||||
'id': 'v6984858774',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дача Солженицина ПАРОЛЬ 223322',
|
||||
'uploader': 'psavari1',
|
||||
'uploader_id': 'psavari1',
|
||||
'upload_date': '20081103',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '223322',
|
||||
},
|
||||
},
|
||||
# age limit + video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||
@@ -90,19 +107,22 @@ class SmotriIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# not approved by moderator, but available
|
||||
# age limit + video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v28888533b73',
|
||||
'md5': 'f44bc7adac90af518ef1ecf04893bb34',
|
||||
'url': 'http://smotri.com/video/view/?id=v7780025814',
|
||||
'md5': 'b4599b068422559374a59300c5337d72',
|
||||
'info_dict': {
|
||||
'id': 'v28888533b73',
|
||||
'id': 'v7780025814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Russian Spies Killed By ISIL Child Soldier',
|
||||
'uploader': 'Mopeder',
|
||||
'uploader_id': 'mopeder',
|
||||
'duration': 71,
|
||||
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
|
||||
'upload_date': '20150114',
|
||||
'title': 'Sexy Beach (пароль 123)',
|
||||
'uploader': 'вАся',
|
||||
'uploader_id': 'asya_prosto',
|
||||
'upload_date': '20081218',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '123'
|
||||
},
|
||||
},
|
||||
# swf player
|
||||
@@ -152,6 +172,10 @@ class SmotriIE(InfoExtractor):
|
||||
'getvideoinfo': '1',
|
||||
}
|
||||
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
@@ -161,13 +185,18 @@ class SmotriIE(InfoExtractor):
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
|
||||
if not video_url:
|
||||
if video.get('_moderate_no') or not video.get('moderated'):
|
||||
if video.get('_moderate_no'):
|
||||
raise ExtractorError(
|
||||
'Video %s has not been approved by moderator' % video_id, expected=True)
|
||||
|
||||
if video.get('error'):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
if video.get('_pass_protected') == 1:
|
||||
msg = ('Invalid video password' if video_password
|
||||
else 'This video is protected by a password, use the --video-password option')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
title = video['title']
|
||||
thumbnail = video['_imgURL']
|
||||
upload_date = unified_strdate(video['added'])
|
||||
|
171
youtube_dl/extractor/snagfilms.py
Normal file
171
youtube_dl/extractor/snagfilms.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class SnagFilmsEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
|
||||
'md5': '2924e9215c6eff7a55ed35b72276bd93',
|
||||
'info_dict': {
|
||||
'id': '74849a00-85a9-11e1-9660-123139220831',
|
||||
'ext': 'mp4',
|
||||
'title': '#whilewewatch',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>This film is not playable in your area.<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Film %s is not playable in your area.' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for source in self._parse_json(js_to_json(self._search_regex(
|
||||
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
type_ = source.get('type')
|
||||
format_id = source.get('label')
|
||||
ext = determine_ext(file_)
|
||||
if any(_ == 'm3u8' for _ in (type_, ext)):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)kbps', file_, 'bitrate', default=None))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': file_,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
|
||||
webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class SnagFilmsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
|
||||
'md5': '19844f897b35af219773fd63bdec2942',
|
||||
'info_dict': {
|
||||
'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
|
||||
'display_id': 'lost_for_life',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lost for Life',
|
||||
'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 4489,
|
||||
'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
|
||||
'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
|
||||
'info_dict': {
|
||||
'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
|
||||
'display_id': 'the_world_cut_project/india',
|
||||
'ext': 'mp4',
|
||||
'title': 'India',
|
||||
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 979,
|
||||
'categories': ['Documentary', 'Sports', 'Politics']
|
||||
}
|
||||
}, {
|
||||
# Film is not playable in your area.
|
||||
'url': 'http://www.snagfilms.com/films/title/inside_mecca',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Film is not available.
|
||||
'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if ">Sorry, the Film you're looking for is not available.<" in webpage:
|
||||
raise ExtractorError(
|
||||
'Film %s is not available.' % display_id, expected=True)
|
||||
|
||||
film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
|
||||
|
||||
snag = self._parse_json(
|
||||
self._search_regex(
|
||||
'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
|
||||
display_id)
|
||||
|
||||
for item in snag:
|
||||
if item.get('data', {}).get('film', {}).get('id') == film_id:
|
||||
data = item['data']['film']
|
||||
title = data['title']
|
||||
description = clean_html(data.get('synopsis'))
|
||||
thumbnail = data.get('image')
|
||||
duration = int_or_none(data.get('duration') or data.get('runtime'))
|
||||
categories = [
|
||||
category['title'] for category in data.get('categories', [])
|
||||
if category.get('title')]
|
||||
break
|
||||
else:
|
||||
title = self._search_regex(
|
||||
r'itemprop="title">([^<]+)<', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
|
||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
|
||||
webpage, 'duration', fatal=False))
|
||||
categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
|
||||
'id': film_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'categories': categories,
|
||||
}
|
@@ -6,9 +6,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@@ -26,7 +29,7 @@ class SohuIE(InfoExtractor):
|
||||
'skip': 'On available in China',
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
@@ -34,7 +37,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '49308ff6dafde5ece51137d04aec311e',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +51,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '492923eac023ba2f13ff69617c32754a',
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
@@ -56,7 +59,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
@@ -64,7 +67,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '93584716ee0657c0b205b8aa3d27aa13',
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
@@ -139,21 +142,42 @@ class SohuIE(InfoExtractor):
|
||||
for i in range(part_count):
|
||||
formats = []
|
||||
for format_id, format_data in formats_json.items():
|
||||
data = format_data['data']
|
||||
allot = format_data['allot']
|
||||
|
||||
data = format_data['data']
|
||||
clips_url = data['clipsURL']
|
||||
su = data['su']
|
||||
|
||||
# URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
|
||||
# so retry until got a working URL
|
||||
video_url = 'newflv.sohu.ccgslb.net'
|
||||
cdnId = None
|
||||
retries = 0
|
||||
while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
|
||||
download_note = 'Download information from CDN gateway for format ' + format_id
|
||||
|
||||
while 'newflv.sohu.ccgslb.net' in video_url:
|
||||
params = {
|
||||
'prot': 9,
|
||||
'file': clips_url[i],
|
||||
'new': su[i],
|
||||
'prod': 'flash',
|
||||
}
|
||||
|
||||
if cdnId is not None:
|
||||
params['idc'] = cdnId
|
||||
|
||||
download_note = 'Downloading %s video URL part %d of %d' % (
|
||||
format_id, i + 1, part_count)
|
||||
|
||||
if retries > 0:
|
||||
download_note += ' (retry #%d)' % retries
|
||||
part_info = self._parse_json(self._download_webpage(
|
||||
'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
|
||||
video_id, download_note), video_id)
|
||||
|
||||
video_url = part_info['url']
|
||||
cdnId = part_info.get('nid')
|
||||
|
||||
retries += 1
|
||||
cdn_info = self._download_json(
|
||||
'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
|
||||
video_id, download_note)
|
||||
video_url = cdn_info['url']
|
||||
if retries > 5:
|
||||
raise ExtractorError('Failed to get video URL')
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!sets/|likes/?(?:$|[?#]))
|
||||
(?!sets/|(?:likes|tracks)/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
@@ -307,6 +307,9 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
'title': 'The Royal Concept',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor):
|
||||
'description': 'Crazy Bitch X rated music video.',
|
||||
'uploader': 'oreusz',
|
||||
'uploader_id': '124697',
|
||||
'upload_date': '20070508',
|
||||
'upload_date': '20070507',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div\s+id="descriptionContent">([^<]+)<',
|
||||
r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
|
||||
@@ -64,12 +64,12 @@ class SpankwireIE(InfoExtractor):
|
||||
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
|
||||
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(
|
||||
compat_urllib_parse.unquote,
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._search_regex(
|
||||
r'flashvars\.video_title = "([^"]+)',
|
||||
|
@@ -2,7 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SpiegeltvIE(InfoExtractor):
|
||||
@@ -17,7 +21,7 @@ class SpiegeltvIE(InfoExtractor):
|
||||
'thumbnail': 're:http://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@@ -53,7 +57,37 @@ class SpiegeltvIE(InfoExtractor):
|
||||
server_json = self._download_json(
|
||||
'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
|
||||
video_id, note='Downloading server information')
|
||||
server = server_json['streamingserver'][0]['endpoint']
|
||||
|
||||
format = '16x9' if is_wide else '4x3'
|
||||
|
||||
formats = []
|
||||
for streamingserver in server_json['streamingserver']:
|
||||
endpoint = streamingserver.get('endpoint')
|
||||
if not endpoint:
|
||||
continue
|
||||
play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
|
||||
if endpoint.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': endpoint,
|
||||
'format_id': 'rtmp',
|
||||
'app': compat_urllib_parse_urlparse(endpoint).path[1:],
|
||||
'play_path': play_path,
|
||||
'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
elif determine_ext(endpoint) == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
endpoint.replace('[video]', play_path),
|
||||
video_id, 'm4v',
|
||||
preference=1, # Prefer hls since it allows to workaround georestriction
|
||||
m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats is not False:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': endpoint,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in media_json['images']:
|
||||
@@ -65,17 +99,12 @@ class SpiegeltvIE(InfoExtractor):
|
||||
|
||||
description = media_json['subtitle']
|
||||
duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
|
||||
format = '16x9' if is_wide else '4x3'
|
||||
|
||||
url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'm4v',
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'rtmp_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -44,7 +44,7 @@ class SunPornoIE(InfoExtractor):
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'class="views">\s*(\d+)\s*<',
|
||||
r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+)</b> Comments?',
|
||||
|
@@ -51,6 +51,17 @@ class TeamcocoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}, {
|
||||
'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
|
||||
'info_dict': {
|
||||
'id': '89341',
|
||||
'ext': 'mp4',
|
||||
'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
|
||||
'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}
|
||||
]
|
||||
_VIDEO_ID_REGEXES = (
|
||||
@@ -110,9 +121,23 @@ class TeamcocoIE(InfoExtractor):
|
||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||
for filed in data['files']:
|
||||
if determine_ext(filed['url']) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
filed['url'], video_id, ext='mp4'))
|
||||
# compat_urllib_parse.urljoin does not work here
|
||||
if filed['url'].startswith('/'):
|
||||
m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
|
||||
else:
|
||||
m3u8_url = filed['url']
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4')
|
||||
for m3u8_format in m3u8_formats:
|
||||
if m3u8_format not in formats:
|
||||
formats.append(m3u8_format)
|
||||
elif determine_ext(filed['url']) == 'f4m':
|
||||
# TODO Correct f4m extraction
|
||||
continue
|
||||
else:
|
||||
if filed['url'].startswith('/mp4:protected/'):
|
||||
# TODO Correct extraction for these files
|
||||
continue
|
||||
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
|
||||
if m_format is not None:
|
||||
format_id = m_format.group(1)
|
||||
|
@@ -26,7 +26,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
@@ -56,6 +56,17 @@ class ThePlatformIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
|
||||
'info_dict': {
|
||||
'id': 'yMBg9E8KFxZD',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:644ad9188d655b742f942bf2e06b002d',
|
||||
'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -85,6 +96,11 @@ class ThePlatformIE(InfoExtractor):
|
||||
if not provider_id:
|
||||
provider_id = 'dJ5BDC'
|
||||
|
||||
path = provider_id
|
||||
if mobj.group('media'):
|
||||
path += '/media'
|
||||
path += '/' + video_id
|
||||
|
||||
if smuggled_data.get('force_smil_url', False):
|
||||
smil_url = url
|
||||
elif mobj.group('config'):
|
||||
@@ -94,8 +110,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(provider_id, video_id))
|
||||
smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
|
||||
|
||||
sig = smuggled_data.get('sig')
|
||||
if sig:
|
||||
@@ -112,7 +127,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
|
||||
info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
|
@@ -1,9 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
@@ -17,7 +14,7 @@ class TheSixtyOneIE(InfoExtractor):
|
||||
song
|
||||
)/(?P<id>[A-Za-z0-9]+)/?$'''
|
||||
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
|
||||
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
|
||||
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
|
||||
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -70,14 +67,19 @@ class TheSixtyOneIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
song_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
self._SONG_URL_TEMPLATE.format(song_id), song_id)
|
||||
|
||||
song_data = json.loads(self._search_regex(
|
||||
r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
|
||||
song_data = self._parse_json(self._search_regex(
|
||||
r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id)
|
||||
|
||||
if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
|
||||
song_data['audio_server'] = 's3.amazonaws.com'
|
||||
else:
|
||||
song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
|
||||
|
||||
keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
|
||||
url = self._SONG_FILE_URL_TEMPLATE.format(
|
||||
"".join(reversed(keys)), **song_data)
|
||||
|
40
youtube_dl/extractor/thisamericanlife.py
Normal file
40
youtube_dl/extractor/thisamericanlife.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ThisAmericanLifeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
|
||||
'md5': '8f7d2da8926298fdfca2ee37764c11ce',
|
||||
'info_dict': {
|
||||
'id': '487',
|
||||
'ext': 'm4a',
|
||||
'title': '487: Harper High School, Part One',
|
||||
'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id),
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'm4a',
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'none',
|
||||
'abr': 64,
|
||||
'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
|
||||
'description': self._html_search_meta(r'description', webpage, 'description'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -12,17 +12,22 @@ class TlcIE(DiscoveryIE):
|
||||
IE_NAME = 'tlc.com'
|
||||
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
|
||||
_TEST = {
|
||||
# DiscoveryIE has _TESTS
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
|
||||
'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
|
||||
'info_dict': {
|
||||
'id': '853232',
|
||||
'id': '104493',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cake Boss: Too Big to Fly',
|
||||
'title': 'Too Big to Fly',
|
||||
'description': 'Buddy has taken on a high flying task.',
|
||||
'duration': 119,
|
||||
'timestamp': 1393365060,
|
||||
'upload_date': '20140225',
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpef
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TlcDeIE(InfoExtractor):
|
||||
|
@@ -3,39 +3,70 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class TNAFlixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||
|
||||
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
|
||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
|
||||
'info_dict': {
|
||||
'id': '553878',
|
||||
'display_id': 'Carmella-Decesare-striptease',
|
||||
'ext': 'mp4',
|
||||
'title': 'Carmella Decesare - striptease',
|
||||
'description': '',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 91,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
|
||||
'only_matching': True,
|
||||
}
|
||||
class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
# May be overridden in descendants if necessary
|
||||
_CONFIG_REGEX = [
|
||||
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
|
||||
r'<input[^>]+name="config\d?" value="([^"]+)"',
|
||||
]
|
||||
_TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
|
||||
_DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
|
||||
_UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
|
||||
_VIEW_COUNT_REGEX = None
|
||||
_COMMENT_COUNT_REGEX = None
|
||||
_AVERAGE_RATING_REGEX = None
|
||||
_CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
|
||||
|
||||
def _extract_thumbnails(self, flix_xml):
|
||||
|
||||
def get_child(elem, names):
|
||||
for name in names:
|
||||
child = elem.find(name)
|
||||
if child is not None:
|
||||
return child
|
||||
|
||||
timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
|
||||
if timeline is None:
|
||||
return
|
||||
|
||||
pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
|
||||
if pattern_el is None or not pattern_el.text:
|
||||
return
|
||||
|
||||
first_el = get_child(timeline, ['imageFirst', 'first'])
|
||||
last_el = get_child(timeline, ['imageLast', 'last'])
|
||||
if first_el is None or last_el is None:
|
||||
return
|
||||
|
||||
first_text = first_el.text
|
||||
last_text = last_el.text
|
||||
if not first_text.isdigit() or not last_text.isdigit():
|
||||
return
|
||||
|
||||
first = int(first_text)
|
||||
last = int(last_text)
|
||||
if first > last:
|
||||
return
|
||||
|
||||
width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
|
||||
height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
|
||||
|
||||
return [{
|
||||
'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
|
||||
'width': width,
|
||||
'height': height,
|
||||
} for i in range(first, last + 1)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -44,39 +75,64 @@ class TNAFlixIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, 'Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
formats = []
|
||||
|
||||
def extract_video_url(vl):
|
||||
return re.sub('speed=\d+', 'speed=', vl.text)
|
||||
|
||||
video_link = cfg_xml.find('./videoLink')
|
||||
if video_link is not None:
|
||||
formats.append({
|
||||
'url': extract_video_url(video_link),
|
||||
'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
|
||||
})
|
||||
|
||||
for item in cfg_xml.findall('./quality/item'):
|
||||
video_link = item.find('./videoLink')
|
||||
if video_link is None:
|
||||
continue
|
||||
res = item.find('res')
|
||||
format_id = None if res is None else res.text
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
|
||||
thumbnails = self._extract_thumbnails(cfg_xml)
|
||||
|
||||
title = self._html_search_regex(
|
||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', default=None))
|
||||
|
||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
|
||||
def extract_field(pattern, name):
|
||||
return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, note='Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
description = extract_field(self._DESCRIPTION_REGEX, 'description')
|
||||
uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
|
||||
view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
|
||||
comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
|
||||
average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
cfg_xml.find('./startThumb').text, 'http:')
|
||||
|
||||
formats = []
|
||||
for item in cfg_xml.findall('./quality/item'):
|
||||
video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
|
||||
format_id = item.find('res').text
|
||||
fmt = {
|
||||
'url': self._proto_relative_url(video_url, 'http:'),
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
|
||||
categories = categories_str.split(', ') if categories_str is not None else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -84,7 +140,130 @@ class TNAFlixIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'average_rating': average_rating,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||
|
||||
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||
_UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
|
||||
|
||||
_TESTS = [{
|
||||
# anonymous uploader, no categories
|
||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
|
||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
|
||||
'info_dict': {
|
||||
'id': '553878',
|
||||
'display_id': 'Carmella-Decesare-striptease',
|
||||
'ext': 'mp4',
|
||||
'title': 'Carmella Decesare - striptease',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 91,
|
||||
'age_limit': 18,
|
||||
'uploader': 'Anonymous',
|
||||
'categories': [],
|
||||
}
|
||||
}, {
|
||||
# non-anonymous uploader, categories
|
||||
'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
|
||||
'md5': '0f5d4d490dbfd117b8607054248a07c0',
|
||||
'info_dict': {
|
||||
'id': '6538',
|
||||
'display_id': 'Educational-xxx-video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Educational xxx video',
|
||||
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 164,
|
||||
'age_limit': 18,
|
||||
'uploader': 'bobwhite39',
|
||||
'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EMPFlixIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'display_id': 'Amateur-Finger-Fuck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 83,
|
||||
'age_limit': 18,
|
||||
'uploader': 'cwbike',
|
||||
'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MovieFapIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
|
||||
|
||||
_VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
|
||||
_COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
|
||||
_AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
|
||||
_CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
|
||||
|
||||
_TESTS = [{
|
||||
# normal, multi-format video
|
||||
'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
|
||||
'md5': '26624b4e2523051b550067d547615906',
|
||||
'info_dict': {
|
||||
'id': 'be9867c9416c19f54a4a',
|
||||
'display_id': 'experienced-milf-amazing-handjob',
|
||||
'ext': 'mp4',
|
||||
'title': 'Experienced MILF Amazing Handjob',
|
||||
'description': 'Experienced MILF giving an Amazing Handjob',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'uploader': 'darvinfred06',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
|
||||
}
|
||||
}, {
|
||||
# quirky single-format case where the extension is given as fid, but the video is really an flv
|
||||
'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
|
||||
'md5': 'fa56683e291fc80635907168a743c9ad',
|
||||
'info_dict': {
|
||||
'id': 'e5da0d3edce5404418f5',
|
||||
'display_id': 'jeune-couple-russe',
|
||||
'ext': 'flv',
|
||||
'title': 'Jeune Couple Russe',
|
||||
'description': 'Amateur',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'uploader': 'whiskeyjar',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['Amateur', 'Teen'],
|
||||
}
|
||||
}]
|
||||
|
@@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
flashvars = json.loads(self._html_search_regex(
|
||||
r'flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
|
||||
r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'))
|
||||
|
||||
video_url = flashvars['video_url']
|
||||
if flashvars.get('encrypted') is True:
|
||||
|
@@ -4,6 +4,8 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .pornhub import PornHubIE
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
@@ -39,6 +41,17 @@ class TumblrIE(InfoExtractor):
|
||||
'timestamp': 1430931613,
|
||||
},
|
||||
'add_ie': ['Vidme'],
|
||||
}, {
|
||||
'url': 'http://camdamage.tumblr.com/post/98846056295/',
|
||||
'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
|
||||
'info_dict': {
|
||||
'id': '105463834',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cam Damage-HD 720p',
|
||||
'uploader': 'John Moyer',
|
||||
'uploader_id': 'user32021558',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -55,6 +68,14 @@ class TumblrIE(InfoExtractor):
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
pornhub_url = PornHubIE._extract_url(webpage)
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url:
|
||||
return self.url_result(vimeo_url, 'Vimeo')
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
|
||||
webpage, 'iframe url')
|
||||
|
@@ -23,7 +23,7 @@ class TurboIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'duration': 3715,
|
||||
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
@@ -42,7 +42,7 @@ class TurboIE(InfoExtractor):
|
||||
title = xpath_text(item, './title', 'title')
|
||||
duration = int_or_none(xpath_text(item, './durate', 'duration'))
|
||||
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['3g', 'sd', 'hq'])
|
||||
|
109
youtube_dl/extractor/tvc.py
Normal file
109
youtube_dl/extractor/tvc.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TVCIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
|
||||
'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
|
||||
'info_dict': {
|
||||
'id': '74622',
|
||||
'ext': 'mp4',
|
||||
'title': 'События. "События". Эфир от 22.05.2015 14:30',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1122,
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.tvc.ru/video/json/id/%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for info in video.get('path', {}).get('quality', []):
|
||||
video_url = info.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
|
||||
'format id', default=None)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(info.get('width')),
|
||||
'height': int_or_none(info.get('height')),
|
||||
'tbr': int_or_none(info.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'thumbnail': video.get('picture'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TVCArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
|
||||
'info_dict': {
|
||||
'id': '74622',
|
||||
'ext': 'mp4',
|
||||
'title': 'События. "События". Эфир от 22.05.2015 14:30',
|
||||
'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1122,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvc.ru/news/show/id/69944',
|
||||
'info_dict': {
|
||||
'id': '75399',
|
||||
'ext': 'mp4',
|
||||
'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
|
||||
'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 278,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
|
||||
'info_dict': {
|
||||
'id': '2185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ещё не поздно. Эфир от 03.08.2013',
|
||||
'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 3316,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, self._match_id(url))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TVC',
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'title': clean_html(self._og_search_title(webpage)),
|
||||
'description': clean_html(self._og_search_description(webpage)),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -26,6 +26,7 @@ class TVPlayIE(InfoExtractor):
|
||||
viasat4play\.no/programmer|
|
||||
tv6play\.no/programmer|
|
||||
tv3play\.dk/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
)/[^/]+/(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
@@ -173,6 +174,22 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '624952',
|
||||
'ext': 'flv',
|
||||
'title': 'Здравей, България (12.06.2015 г.) ',
|
||||
'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
|
||||
'duration': 8838,
|
||||
'timestamp': 1434100372,
|
||||
'upload_date': '20150612',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -189,17 +189,17 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.twitch.tv/ksptv/v/3622000',
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877',
|
||||
'info_dict': {
|
||||
'id': 'v3622000',
|
||||
'id': 'v6528877',
|
||||
'ext': 'mp4',
|
||||
'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
|
||||
'title': 'LCK Summer Split - Week 6 Day 1',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 6951,
|
||||
'timestamp': 1419028564,
|
||||
'upload_date': '20141219',
|
||||
'uploader': 'KSPTV',
|
||||
'uploader_id': 'ksptv',
|
||||
'duration': 17208,
|
||||
'timestamp': 1435131709,
|
||||
'upload_date': '20150624',
|
||||
'uploader': 'Riot Games',
|
||||
'uploader_id': 'riotgames',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
@@ -215,7 +215,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
|
||||
'Downloading %s access token' % self._ITEM_TYPE)
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/vod/%s?nauth=%s&nauthsig=%s'
|
||||
'%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
|
||||
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
|
||||
item_id, 'mp4')
|
||||
self._prefer_source(formats)
|
||||
|
72
youtube_dl/extractor/twitter.py
Normal file
72
youtube_dl/extractor/twitter.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class TwitterCardIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||
'md5': 'a74f50b310c83170319ba16de6955192',
|
||||
'info_dict': {
|
||||
'id': '560070183650213889',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 30.033,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Different formats served for different User-Agents
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', # mp4
|
||||
'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0', # webm
|
||||
]
|
||||
|
||||
config = None
|
||||
formats = []
|
||||
for user_agent in USER_AGENTS:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'data player config')),
|
||||
video_id)
|
||||
|
||||
video_url = config['playlist'][0]['source']
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = config.get('posterImageUrl')
|
||||
duration = float_or_none(config.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'TwitterCard',
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -26,11 +27,21 @@ class Vbox7IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||
redirect_page, 'redirect location')
|
||||
redirect_url = urlh.geturl() + new_location
|
||||
webpage = self._download_webpage(redirect_url, video_id,
|
||||
# need to get the page 3 times for the correct jsSecretToken cookie
|
||||
# which is necessary for the correct title
|
||||
def get_session_id():
|
||||
redirect_page = self._download_webpage(url, video_id)
|
||||
session_id_url = self._search_regex(
|
||||
r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
|
||||
'session id url')
|
||||
self._download_webpage(
|
||||
compat_urlparse.urljoin(url, session_id_url), video_id,
|
||||
'Getting session id')
|
||||
|
||||
get_session_id()
|
||||
get_session_id()
|
||||
|
||||
webpage = self._download_webpage(url, video_id,
|
||||
'Downloading redirect page')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
@@ -11,6 +13,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -23,27 +26,35 @@ class VikiBaseIE(InfoExtractor):
|
||||
_APP_VERSION = '2.2.5.1428709186'
|
||||
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
|
||||
|
||||
def _prepare_call(self, path, timestamp=None):
|
||||
_NETRC_MACHINE = 'viki'
|
||||
|
||||
_token = None
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
path += '?' if '?' not in path else '&'
|
||||
if not timestamp:
|
||||
timestamp = int(time.time())
|
||||
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
|
||||
if self._token:
|
||||
query += '&token=%s' % self._token
|
||||
sig = hmac.new(
|
||||
self._APP_SECRET.encode('ascii'),
|
||||
query.encode('ascii'),
|
||||
hashlib.sha1
|
||||
).hexdigest()
|
||||
return self._API_URL_TEMPLATE % (query, sig)
|
||||
url = self._API_URL_TEMPLATE % (query, sig)
|
||||
return compat_urllib_request.Request(
|
||||
url, json.dumps(post_data).encode('utf-8')) if post_data else url
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None):
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, timestamp), video_id, note)
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note)
|
||||
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
if error == 'invalid timestamp':
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, int(resp['current_timestamp'])),
|
||||
self._prepare_call(path, int(resp['current_timestamp']), post_data),
|
||||
video_id, '%s (retry)' % note)
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
@@ -56,6 +67,27 @@ class VikiBaseIE(InfoExtractor):
|
||||
'%s returned error: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'login_id': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
login = self._call_api(
|
||||
'sessions.json', None,
|
||||
'Logging in as %s' % username, post_data=login_form)
|
||||
|
||||
self._token = login.get('token')
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
|
@@ -22,6 +22,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -173,6 +174,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_vimeo_url(url, webpage):
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return surl
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
|
@@ -13,6 +13,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -34,6 +35,7 @@ class VKIE(InfoExtractor):
|
||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||
'duration': 195,
|
||||
'upload_date': '20120212',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -45,7 +47,8 @@ class VKIE(InfoExtractor):
|
||||
'uploader': 'Tom Cruise',
|
||||
'title': 'No name',
|
||||
'duration': 9,
|
||||
'upload_date': '20130721'
|
||||
'upload_date': '20130721',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -59,6 +62,7 @@ class VKIE(InfoExtractor):
|
||||
'title': 'Lin Dan',
|
||||
'duration': 101,
|
||||
'upload_date': '20120730',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -73,7 +77,8 @@ class VKIE(InfoExtractor):
|
||||
'uploader': 'Триллеры',
|
||||
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
|
||||
'duration': 8352,
|
||||
'upload_date': '20121218'
|
||||
'upload_date': '20121218',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Requires vk account credentials',
|
||||
},
|
||||
@@ -100,6 +105,7 @@ class VKIE(InfoExtractor):
|
||||
'title': 'Книга Илая',
|
||||
'duration': 6771,
|
||||
'upload_date': '20140626',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
},
|
||||
@@ -115,20 +121,27 @@ class VKIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'act': 'login',
|
||||
'role': 'al_frame',
|
||||
'expire': '1',
|
||||
'email': username,
|
||||
'pass': password,
|
||||
}
|
||||
login_page = self._download_webpage(
|
||||
'https://vk.com', None, 'Downloading login page')
|
||||
|
||||
request = compat_urllib_request.Request('https://login.vk.com/?act=login',
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
login_form = dict(re.findall(
|
||||
r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
|
||||
login_page))
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('cp1251'),
|
||||
'pass': password.encode('cp1251'),
|
||||
})
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'https://login.vk.com/?act=login',
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None, note='Logging in as %s' % username)
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
|
||||
raise ExtractorError(
|
||||
'Unable to login, incorrect username and/or password', expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -140,9 +153,14 @@ class VKIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
|
||||
|
||||
info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
|
||||
if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
|
||||
raise ExtractorError(
|
||||
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
|
||||
expected=True)
|
||||
|
||||
ERRORS = {
|
||||
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
|
||||
'Video %s has been removed from public access due to rightholder complaint.',
|
||||
@@ -175,25 +193,29 @@ class VKIE(InfoExtractor):
|
||||
m_rutube.group(1).replace('\\', ''))
|
||||
return self.url_result(rutube_url)
|
||||
|
||||
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
|
||||
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
|
||||
if m_opts:
|
||||
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
|
||||
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
|
||||
if m_opts_url:
|
||||
opts_url = m_opts_url.group(1)
|
||||
if opts_url.startswith('//'):
|
||||
opts_url = 'http:' + opts_url
|
||||
return self.url_result(opts_url)
|
||||
|
||||
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
|
||||
data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
|
||||
data = json.loads(data_json)
|
||||
|
||||
# Extract upload date
|
||||
upload_date = None
|
||||
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
|
||||
mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
|
||||
if mobj is not None:
|
||||
mobj.group(1) + ' ' + mobj.group(2)
|
||||
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'"mv_views_count_number"[^>]*>([\d,.]+) views<',
|
||||
info_page, 'view count', fatal=False))
|
||||
|
||||
formats = [{
|
||||
'format_id': k,
|
||||
'url': v,
|
||||
@@ -210,6 +232,7 @@ class VKIE(InfoExtractor):
|
||||
'uploader': data.get('md_author'),
|
||||
'duration': data.get('duration'),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -36,6 +36,7 @@ class VubeIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
|
@@ -13,7 +13,6 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
"""Information Extractor for xHamster"""
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -133,3 +132,36 @@ class XHamsterIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class XHamsterEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://xhamster.com/xembed.php?video=3328539',
|
||||
'info_dict': {
|
||||
'id': '3328539',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pen Masturbation',
|
||||
'upload_date': '20140728',
|
||||
'uploader_id': 'anonymous',
|
||||
'duration': 5,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
|
||||
webpage, 'xhamster url')
|
||||
|
||||
return self.url_result(video_url, 'XHamster')
|
||||
|
@@ -5,10 +5,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,6 +27,8 @@ class XVideosIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -40,9 +44,30 @@ class XVideosIE(InfoExtractor):
|
||||
video_thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
android_req = compat_urllib_request.Request(url)
|
||||
android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
|
||||
android_webpage = self._download_webpage(android_req, video_id, fatal=False)
|
||||
|
||||
if android_webpage is not None:
|
||||
player_params_str = self._search_regex(
|
||||
'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
|
||||
android_webpage, 'player parameters', default='')
|
||||
player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
|
||||
if player_params:
|
||||
formats.extend([{
|
||||
'url': param,
|
||||
'preference': -10,
|
||||
} for param in player_params if determine_ext(param) == 'mp4'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
|
@@ -1,123 +1,235 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
from ..utils import ExtractorError
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_ord,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class YoukuIE(InfoExtractor):
|
||||
IE_NAME = 'youku'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
||||
youku:)
|
||||
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
|
||||
'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
|
||||
'params': {
|
||||
'test': False
|
||||
},
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
||||
'md5': '5f3af4192eabacc4501508d54a8cabd7',
|
||||
'info_dict': {
|
||||
'id': 'XNDgyMDQ2NTQw_part00',
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐'
|
||||
'id': 'XMTc1ODE5Njcy_part1',
|
||||
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
||||
'ext': 'flv'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
||||
'info_dict': {
|
||||
'id': 'XODgxNjg1Mzk2',
|
||||
'title': '武媚娘传奇 85',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
||||
'info_dict': {
|
||||
'id': 'XMTI1OTczNDM5Mg',
|
||||
'title': '花千骨 04',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'skip': 'Available in China only',
|
||||
}]
|
||||
|
||||
def _gen_sid(self):
|
||||
nowTime = int(time.time() * 1000)
|
||||
random1 = random.randint(1000, 1998)
|
||||
random2 = random.randint(1000, 9999)
|
||||
def construct_video_urls(self, data1, data2):
|
||||
# get sid, token
|
||||
def yk_t(s1, s2):
|
||||
ls = list(range(256))
|
||||
t = 0
|
||||
for i in range(256):
|
||||
t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
|
||||
ls[i], ls[t] = ls[t], ls[i]
|
||||
s = bytearray()
|
||||
x, y = 0, 0
|
||||
for i in range(len(s2)):
|
||||
y = (y + 1) % 256
|
||||
x = (x + ls[y]) % 256
|
||||
ls[x], ls[y] = ls[y], ls[x]
|
||||
s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
|
||||
return bytes(s)
|
||||
|
||||
return "%d%d%d" % (nowTime, random1, random2)
|
||||
sid, token = yk_t(
|
||||
b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
|
||||
).decode('ascii').split('_')
|
||||
|
||||
def _get_file_ID_mix_string(self, seed):
|
||||
mixed = []
|
||||
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
|
||||
seed = float(seed)
|
||||
for i in range(len(source)):
|
||||
seed = (seed * 211 + 30031) % 65536
|
||||
index = math.floor(seed / 65536 * len(source))
|
||||
mixed.append(source[int(index)])
|
||||
source.remove(source[int(index)])
|
||||
# return ''.join(mixed)
|
||||
return mixed
|
||||
# get oip
|
||||
oip = data2['ip']
|
||||
|
||||
def _get_file_id(self, fileId, seed):
|
||||
mixed = self._get_file_ID_mix_string(seed)
|
||||
ids = fileId.split('*')
|
||||
realId = []
|
||||
for ch in ids:
|
||||
if ch:
|
||||
realId.append(mixed[int(ch)])
|
||||
return ''.join(realId)
|
||||
# get fileid
|
||||
string_ls = list(
|
||||
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
|
||||
shuffled_string_ls = []
|
||||
seed = data1['seed']
|
||||
N = len(string_ls)
|
||||
for ii in range(N):
|
||||
seed = (seed * 0xd3 + 0x754f) % 0x10000
|
||||
idx = seed * len(string_ls) // 0x10000
|
||||
shuffled_string_ls.append(string_ls[idx])
|
||||
del string_ls[idx]
|
||||
|
||||
fileid_dict = {}
|
||||
for format in data1['streamtypes']:
|
||||
streamfileid = [
|
||||
int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
|
||||
fileid = ''.join(
|
||||
[shuffled_string_ls[i] for i in streamfileid])
|
||||
fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
|
||||
|
||||
def get_fileid(format, n):
|
||||
fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
|
||||
return fileid
|
||||
|
||||
# get ep
|
||||
def generate_ep(format, n):
|
||||
fileid = get_fileid(format, n)
|
||||
ep_t = yk_t(
|
||||
b'bf7e5f01',
|
||||
('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
|
||||
)
|
||||
ep = base64.b64encode(ep_t).decode('ascii')
|
||||
return ep
|
||||
|
||||
# generate video_urls
|
||||
video_urls_dict = {}
|
||||
for format in data1['streamtypes']:
|
||||
video_urls = []
|
||||
for dt in data1['segs'][format]:
|
||||
n = str(int(dt['no']))
|
||||
param = {
|
||||
'K': dt['k'],
|
||||
'hd': self.get_hd(format),
|
||||
'myp': 0,
|
||||
'ts': dt['seconds'],
|
||||
'ypp': 0,
|
||||
'ctype': 12,
|
||||
'ev': 1,
|
||||
'token': token,
|
||||
'oip': oip,
|
||||
'ep': generate_ep(format, n)
|
||||
}
|
||||
video_url = \
|
||||
'http://k.youku.com/player/getFlvPath/' + \
|
||||
'sid/' + sid + \
|
||||
'_' + str(int(n) + 1).zfill(2) + \
|
||||
'/st/' + self.parse_ext_l(format) + \
|
||||
'/fileid/' + get_fileid(format, n) + '?' + \
|
||||
compat_urllib_parse.urlencode(param)
|
||||
video_urls.append(video_url)
|
||||
video_urls_dict[format] = video_urls
|
||||
|
||||
return video_urls_dict
|
||||
|
||||
def get_hd(self, fm):
|
||||
hd_id_dict = {
|
||||
'flv': '0',
|
||||
'mp4': '1',
|
||||
'hd2': '2',
|
||||
'hd3': '3',
|
||||
'3gp': '0',
|
||||
'3gphd': '1'
|
||||
}
|
||||
return hd_id_dict[fm]
|
||||
|
||||
def parse_ext_l(self, fm):
|
||||
ext_dict = {
|
||||
'flv': 'flv',
|
||||
'mp4': 'mp4',
|
||||
'hd2': 'flv',
|
||||
'hd3': 'flv',
|
||||
'3gp': 'flv',
|
||||
'3gphd': 'mp4'
|
||||
}
|
||||
return ext_dict[fm]
|
||||
|
||||
def get_format_name(self, fm):
|
||||
_dict = {
|
||||
'3gp': 'h6',
|
||||
'3gphd': 'h5',
|
||||
'flv': 'h4',
|
||||
'mp4': 'h3',
|
||||
'hd2': 'h2',
|
||||
'hd3': 'h1'
|
||||
}
|
||||
return _dict[fm]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
|
||||
def retrieve_data(req_url, note):
|
||||
req = compat_urllib_request.Request(req_url)
|
||||
|
||||
config = self._download_json(info_url, video_id)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
error_code = config['data'][0].get('error_code')
|
||||
raw_data = self._download_json(req, video_id, note=note)
|
||||
return raw_data['data'][0]
|
||||
|
||||
# request basic data
|
||||
data1 = retrieve_data(
|
||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
|
||||
'Downloading JSON metadata 1')
|
||||
data2 = retrieve_data(
|
||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
|
||||
'Downloading JSON metadata 2')
|
||||
|
||||
error_code = data1.get('error_code')
|
||||
if error_code:
|
||||
# -8 means blocked outside China.
|
||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
||||
expected=True)
|
||||
|
||||
video_title = config['data'][0]['title']
|
||||
seed = config['data'][0]['seed']
|
||||
|
||||
format = self._downloader.params.get('format', None)
|
||||
supported_format = list(config['data'][0]['streamfileids'].keys())
|
||||
|
||||
# TODO proper format selection
|
||||
if format is None or format == 'best':
|
||||
if 'hd2' in supported_format:
|
||||
format = 'hd2'
|
||||
error = data1.get('error')
|
||||
if error is not None and '因版权原因无法观看此视频' in error:
|
||||
raise ExtractorError(
|
||||
'Youku said: Sorry, this video is available in China only', expected=True)
|
||||
else:
|
||||
format = 'flv'
|
||||
ext = 'flv'
|
||||
elif format == 'worst':
|
||||
format = 'mp4'
|
||||
ext = 'mp4'
|
||||
else:
|
||||
format = 'flv'
|
||||
ext = 'flv'
|
||||
msg = 'Youku server reported error %i' % error_code
|
||||
if error is not None:
|
||||
msg += ': ' + error
|
||||
raise ExtractorError(msg)
|
||||
|
||||
fileid = config['data'][0]['streamfileids'][format]
|
||||
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
||||
# segs is usually a dictionary, but an empty *list* if an error occured.
|
||||
title = data1['title']
|
||||
|
||||
files_info = []
|
||||
sid = self._gen_sid()
|
||||
fileid = self._get_file_id(fileid, seed)
|
||||
# generate video_urls_dict
|
||||
video_urls_dict = self.construct_video_urls(data1, data2)
|
||||
|
||||
# column 8,9 of fileid represent the segment number
|
||||
# fileid[7:9] should be changed
|
||||
for index, key in enumerate(keys):
|
||||
temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
|
||||
download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
|
||||
# construct info
|
||||
entries = [{
|
||||
'id': '%s_part%d' % (video_id, i + 1),
|
||||
'title': title,
|
||||
'formats': [],
|
||||
# some formats are not available for all parts, we have to detect
|
||||
# which one has all
|
||||
} for i in range(max(len(v) for v in data1['segs'].values()))]
|
||||
for fm in data1['streamtypes']:
|
||||
video_urls = video_urls_dict[fm]
|
||||
for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
|
||||
entry['formats'].append({
|
||||
'url': video_url,
|
||||
'format_id': self.get_format_name(fm),
|
||||
'ext': self.parse_ext_l(fm),
|
||||
'filesize': int(seg['size']),
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': '%s_part%02d' % (video_id, index),
|
||||
'url': download_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': ext,
|
||||
}
|
||||
files_info.append(info)
|
||||
|
||||
return files_info
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -29,9 +29,11 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
uppercase_escape,
|
||||
ISO3166Utils,
|
||||
)
|
||||
|
||||
|
||||
@@ -234,6 +236,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
|
||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
|
||||
|
||||
# 3d videos
|
||||
@@ -516,6 +520,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': 'requires avconv',
|
||||
}
|
||||
},
|
||||
# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
|
||||
'info_dict': {
|
||||
'id': 'FIl7x6_3R5Y',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:7b81415841e02ecd4313668cde88737a',
|
||||
'description': 'md5:116377fd2963b81ec4ce64b542173306',
|
||||
'upload_date': '20150625',
|
||||
'uploader_id': 'dorappi2000',
|
||||
'uploader': 'dorappi2000',
|
||||
'formats': 'mincount:33',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -785,7 +803,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
s = mobj.group(1)
|
||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||
return '/signature/%s' % dec_s
|
||||
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
||||
dash_doc = self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note='Downloading DASH manifest',
|
||||
@@ -822,6 +840,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
except StopIteration:
|
||||
full_info = self._formats.get(format_id, {}).copy()
|
||||
full_info.update(f)
|
||||
codecs = r.attrib.get('codecs')
|
||||
if codecs:
|
||||
if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
|
||||
full_info['vcodec'] = codecs
|
||||
elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
|
||||
full_info['acodec'] = codecs
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
@@ -851,6 +875,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
player_url = None
|
||||
|
||||
dash_mpds = []
|
||||
|
||||
def add_dash_mpd(video_info):
|
||||
dash_mpd = video_info.get('dashmpd')
|
||||
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
||||
dash_mpds.append(dash_mpd[0])
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
@@ -871,24 +902,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
add_dash_mpd(video_info)
|
||||
else:
|
||||
age_gate = False
|
||||
try:
|
||||
# Try looking directly into the video webpage
|
||||
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||
if not mobj:
|
||||
raise ValueError('Could not find ytplayer.config') # caught below
|
||||
video_info = None
|
||||
# Try looking directly into the video webpage
|
||||
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||
if mobj:
|
||||
json_code = uppercase_escape(mobj.group(1))
|
||||
ytplayer_config = json.loads(json_code)
|
||||
args = ytplayer_config['args']
|
||||
# Convert to the same format returned by compat_parse_qs
|
||||
video_info = dict((k, [v]) for k, v in args.items())
|
||||
if not args.get('url_encoded_fmt_stream_map'):
|
||||
raise ValueError('No stream_map present') # caught below
|
||||
except ValueError:
|
||||
# We fallback to the get_video_info pages (used by the embed page)
|
||||
if args.get('url_encoded_fmt_stream_map'):
|
||||
# Convert to the same format returned by compat_parse_qs
|
||||
video_info = dict((k, [v]) for k, v in args.items())
|
||||
add_dash_mpd(video_info)
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
|
||||
# manifest pointed by get_video_info's dashmpd).
|
||||
# The general idea is to take a union of itags of both DASH manifests (for example
|
||||
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = (
|
||||
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (proto, video_id, el_type))
|
||||
@@ -896,11 +932,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_info_url,
|
||||
video_id, note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
if 'token' in video_info:
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
add_dash_mpd(get_video_info)
|
||||
if not video_info:
|
||||
video_info = get_video_info
|
||||
if 'token' in get_video_info:
|
||||
break
|
||||
if 'token' not in video_info:
|
||||
if 'reason' in video_info:
|
||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||
regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
|
||||
if regions_allowed is not None:
|
||||
raise ExtractorError('YouTube said: This video is available in %s only' % (
|
||||
', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
|
||||
expected=True)
|
||||
raise ExtractorError(
|
||||
'YouTube said: %s' % video_info['reason'][0],
|
||||
expected=True, video_id=video_id)
|
||||
@@ -954,15 +999,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
|
||||
|
||||
# upload date
|
||||
upload_date = None
|
||||
mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
|
||||
video_webpage)
|
||||
if mobj is not None:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = self._html_search_meta(
|
||||
'datePublished', video_webpage, 'upload date', default=None)
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
|
||||
video_webpage, 'upload date', default=None)
|
||||
if upload_date:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
m_cat_container = self._search_regex(
|
||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||
@@ -996,12 +1042,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_description = ''
|
||||
|
||||
def _extract_count(count_name):
|
||||
count = self._search_regex(
|
||||
r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
|
||||
video_webpage, count_name, default=None)
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
return str_to_int(self._search_regex(
|
||||
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
|
||||
% re.escape(count_name),
|
||||
video_webpage, count_name, default=None))
|
||||
|
||||
like_count = _extract_count('like')
|
||||
dislike_count = _extract_count('dislike')
|
||||
|
||||
@@ -1116,24 +1161,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# Look for the DASH manifest
|
||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
dash_mpd = video_info.get('dashmpd')
|
||||
if dash_mpd:
|
||||
dash_manifest_url = dash_mpd[0]
|
||||
for dash_manifest_url in dash_mpds:
|
||||
dash_formats = {}
|
||||
try:
|
||||
dash_formats = self._parse_dash_manifest(
|
||||
video_id, dash_manifest_url, player_url, age_gate)
|
||||
for df in self._parse_dash_manifest(
|
||||
video_id, dash_manifest_url, player_url, age_gate):
|
||||
# Do not overwrite DASH format found in some previous DASH manifest
|
||||
if df['format_id'] not in dash_formats:
|
||||
dash_formats[df['format_id']] = df
|
||||
except (ExtractorError, KeyError) as e:
|
||||
self.report_warning(
|
||||
'Skipping DASH manifest: %r' % e, video_id)
|
||||
else:
|
||||
if dash_formats:
|
||||
# Remove the formats we found through non-DASH, they
|
||||
# contain less info and it can be wrong, because we use
|
||||
# fixed values (for example the resolution). See
|
||||
# https://github.com/rg3/youtube-dl/issues/5774 for an
|
||||
# example.
|
||||
dash_keys = set(df['format_id'] for df in dash_formats)
|
||||
formats = [f for f in formats if f['format_id'] not in dash_keys]
|
||||
formats.extend(dash_formats)
|
||||
formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
|
||||
formats.extend(dash_formats.values())
|
||||
|
||||
# Check for malformed aspect ratio
|
||||
stretched_m = re.search(
|
||||
@@ -1290,7 +1336,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
def _extract_playlist(self, playlist_id):
|
||||
url = self._TEMPLATE_URL % playlist_id
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
more_widget_html = content_html = page
|
||||
|
||||
for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
|
||||
match = match.strip()
|
||||
@@ -1310,36 +1355,36 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
self.report_warning('Youtube gives an alert message: ' + match)
|
||||
|
||||
# Extract the video ids from the playlist pages
|
||||
ids = []
|
||||
def _entries():
|
||||
more_widget_html = content_html = page
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.finditer(self._VIDEO_RE, content_html)
|
||||
# We remove the duplicates and the link with index 0
|
||||
# (it's not the first video of the playlist)
|
||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||
for vid_id in new_ids:
|
||||
yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.finditer(self._VIDEO_RE, content_html)
|
||||
# We remove the duplicates and the link with index 0
|
||||
# (it's not the first video of the playlist)
|
||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||
ids.extend(new_ids)
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
if not content_html.strip():
|
||||
# Some webpages show a "Load more" button but they don't
|
||||
# have more videos
|
||||
break
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
if not content_html.strip():
|
||||
# Some webpages show a "Load more" button but they don't
|
||||
# have more videos
|
||||
break
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
|
||||
page, 'title')
|
||||
|
||||
url_results = self._ids_to_results(ids)
|
||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||
return self.playlist_result(_entries(), playlist_id, playlist_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
@@ -1406,10 +1451,12 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
channel_page = self._download_webpage(
|
||||
url + '?view=57', channel_id,
|
||||
'Downloading channel page', fatal=False)
|
||||
channel_playlist_id = self._search_regex(
|
||||
[r'<meta itemprop="channelId" content="([^"]+)">',
|
||||
r'data-channel-external-id="([^"]+)"'],
|
||||
channel_page, 'channel id', default=None)
|
||||
channel_playlist_id = self._html_search_meta(
|
||||
'channelId', channel_page, 'channel id', default=None)
|
||||
if not channel_playlist_id:
|
||||
channel_playlist_id = self._search_regex(
|
||||
r'data-channel-external-id="([^"]+)"',
|
||||
channel_page, 'channel id', default=None)
|
||||
if channel_playlist_id and channel_playlist_id.startswith('UC'):
|
||||
playlist_id = 'UU' + channel_playlist_id[2:]
|
||||
return self.url_result(
|
||||
@@ -1503,7 +1550,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
url_query = {
|
||||
'search_query': query,
|
||||
'search_query': query.encode('utf-8'),
|
||||
'page': pagenum,
|
||||
'spf': 'navigate',
|
||||
}
|
||||
|
@@ -145,11 +145,15 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--list-extractors',
|
||||
action='store_true', dest='list_extractors', default=False,
|
||||
help='List all supported extractors and the URLs they would handle')
|
||||
help='List all supported extractors')
|
||||
general.add_option(
|
||||
'--extractor-descriptions',
|
||||
action='store_true', dest='list_extractor_descriptions', default=False,
|
||||
help='Output descriptions of all supported extractors')
|
||||
general.add_option(
|
||||
'--force-generic-extractor',
|
||||
action='store_true', dest='force_generic_extractor', default=False,
|
||||
help='Force extraction to use the generic extractor')
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
@@ -342,12 +346,13 @@ def parseOpts(overrideArguments=None):
|
||||
video_format.add_option(
|
||||
'--youtube-skip-dash-manifest',
|
||||
action='store_false', dest='youtube_include_dash_manifest',
|
||||
help='Do not download the DASH manifest on YouTube videos')
|
||||
help='Do not download the DASH manifests and related data on YouTube videos')
|
||||
video_format.add_option(
|
||||
'--merge-output-format',
|
||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||
help=(
|
||||
'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
|
||||
'If a merge is required (e.g. bestvideo+bestaudio), '
|
||||
'output to given container format. One of mkv, mp4, ogg, webm, flv. '
|
||||
'Ignored if no merge is required'))
|
||||
|
||||
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
|
||||
@@ -725,7 +730,7 @@ def parseOpts(overrideArguments=None):
|
||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||
help='Automatically correct known faults of the file. '
|
||||
'One of never (do nothing), warn (only emit a warning), '
|
||||
'detect_or_warn(the default; fix file if we can, warn otherwise)')
|
||||
'detect_or_warn (the default; fix file if we can, warn otherwise)')
|
||||
postproc.add_option(
|
||||
'--prefer-avconv',
|
||||
action='store_false', dest='prefer_ffmpeg',
|
||||
|
@@ -35,6 +35,11 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
|
||||
thumbnail_filename = info['thumbnails'][-1]['filename']
|
||||
|
||||
if not os.path.exists(encodeFilename(thumbnail_filename)):
|
||||
self._downloader.report_warning(
|
||||
'Skipping embedding the thumbnail because the file is missing.')
|
||||
return [], info
|
||||
|
||||
if info['ext'] == 'mp3':
|
||||
options = [
|
||||
'-c', 'copy', '-map', '0', '-map', '1',
|
||||
|
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
ISO639Utils,
|
||||
)
|
||||
|
||||
|
||||
@@ -307,199 +308,6 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
|
||||
|
||||
|
||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _conver_lang_code(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
def run(self, information):
|
||||
if information['ext'] not in ['mp4', 'mkv']:
|
||||
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
|
||||
@@ -525,7 +333,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
opts += ['-c:s', 'mov_text']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i + 1)])
|
||||
lang_code = self._conver_lang_code(lang)
|
||||
lang_code = ISO639Utils.short2long(lang)
|
||||
if lang_code is not None:
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
|
||||
|
@@ -50,7 +50,7 @@ def rsa_verify(message, signature, key):
|
||||
def update_self(to_screen, verbose):
|
||||
"""Update the program file with the latest version from the repository"""
|
||||
|
||||
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
|
||||
UPDATE_URL = "https://rg3.github.io/youtube-dl/update/"
|
||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||
JSON_URL = UPDATE_URL + 'versions.json'
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
|
@@ -62,6 +62,8 @@ std_headers = {
|
||||
}
|
||||
|
||||
|
||||
NO_DEFAULT = object()
|
||||
|
||||
ENGLISH_MONTH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
@@ -171,13 +173,15 @@ def xpath_with_ns(path, ns_map):
|
||||
return '/'.join(replaced)
|
||||
|
||||
|
||||
def xpath_text(node, xpath, name=None, fatal=False):
|
||||
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||
if sys.version_info < (2, 7): # Crazy 2.6
|
||||
xpath = xpath.encode('ascii')
|
||||
|
||||
n = node.find(xpath)
|
||||
if n is None or n.text is None:
|
||||
if fatal:
|
||||
if default is not NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
name = xpath if name is None else name
|
||||
raise ExtractorError('Could not find XML element %s' % name)
|
||||
else:
|
||||
@@ -1841,7 +1845,10 @@ def srt_subtitles_timecode(seconds):
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
|
||||
_x = functools.partial(xpath_with_ns, ns_map={
|
||||
'ttml': 'http://www.w3.org/ns/ttml',
|
||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||
})
|
||||
|
||||
def parse_node(node):
|
||||
str_or_empty = functools.partial(str_or_none, default='')
|
||||
@@ -1849,9 +1856,9 @@ def dfxp2srt(dfxp_data):
|
||||
out = str_or_empty(node.text)
|
||||
|
||||
for child in node:
|
||||
if child.tag in (_x('ttml:br'), 'br'):
|
||||
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||
out += '\n' + str_or_empty(child.tail)
|
||||
elif child.tag in (_x('ttml:span'), 'span'):
|
||||
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
||||
out += str_or_empty(parse_node(child))
|
||||
else:
|
||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||
@@ -1860,7 +1867,7 @@ def dfxp2srt(dfxp_data):
|
||||
|
||||
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
|
||||
|
||||
if not paras:
|
||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||
@@ -1879,6 +1886,468 @@ def dfxp2srt(dfxp_data):
|
||||
return ''.join(out)
|
||||
|
||||
|
||||
class ISO639Utils(object):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def short2long(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
@classmethod
|
||||
def long2short(cls, code):
|
||||
"""Convert language code from ISO 639-2/T to ISO 639-1"""
|
||||
for short_name, long_name in cls._lang_map.items():
|
||||
if long_name == code:
|
||||
return short_name
|
||||
|
||||
|
||||
class ISO3166Utils(object):
|
||||
# From http://data.okfn.org/data/core/country-list
|
||||
_country_map = {
|
||||
'AF': 'Afghanistan',
|
||||
'AX': 'Åland Islands',
|
||||
'AL': 'Albania',
|
||||
'DZ': 'Algeria',
|
||||
'AS': 'American Samoa',
|
||||
'AD': 'Andorra',
|
||||
'AO': 'Angola',
|
||||
'AI': 'Anguilla',
|
||||
'AQ': 'Antarctica',
|
||||
'AG': 'Antigua and Barbuda',
|
||||
'AR': 'Argentina',
|
||||
'AM': 'Armenia',
|
||||
'AW': 'Aruba',
|
||||
'AU': 'Australia',
|
||||
'AT': 'Austria',
|
||||
'AZ': 'Azerbaijan',
|
||||
'BS': 'Bahamas',
|
||||
'BH': 'Bahrain',
|
||||
'BD': 'Bangladesh',
|
||||
'BB': 'Barbados',
|
||||
'BY': 'Belarus',
|
||||
'BE': 'Belgium',
|
||||
'BZ': 'Belize',
|
||||
'BJ': 'Benin',
|
||||
'BM': 'Bermuda',
|
||||
'BT': 'Bhutan',
|
||||
'BO': 'Bolivia, Plurinational State of',
|
||||
'BQ': 'Bonaire, Sint Eustatius and Saba',
|
||||
'BA': 'Bosnia and Herzegovina',
|
||||
'BW': 'Botswana',
|
||||
'BV': 'Bouvet Island',
|
||||
'BR': 'Brazil',
|
||||
'IO': 'British Indian Ocean Territory',
|
||||
'BN': 'Brunei Darussalam',
|
||||
'BG': 'Bulgaria',
|
||||
'BF': 'Burkina Faso',
|
||||
'BI': 'Burundi',
|
||||
'KH': 'Cambodia',
|
||||
'CM': 'Cameroon',
|
||||
'CA': 'Canada',
|
||||
'CV': 'Cape Verde',
|
||||
'KY': 'Cayman Islands',
|
||||
'CF': 'Central African Republic',
|
||||
'TD': 'Chad',
|
||||
'CL': 'Chile',
|
||||
'CN': 'China',
|
||||
'CX': 'Christmas Island',
|
||||
'CC': 'Cocos (Keeling) Islands',
|
||||
'CO': 'Colombia',
|
||||
'KM': 'Comoros',
|
||||
'CG': 'Congo',
|
||||
'CD': 'Congo, the Democratic Republic of the',
|
||||
'CK': 'Cook Islands',
|
||||
'CR': 'Costa Rica',
|
||||
'CI': 'Côte d\'Ivoire',
|
||||
'HR': 'Croatia',
|
||||
'CU': 'Cuba',
|
||||
'CW': 'Curaçao',
|
||||
'CY': 'Cyprus',
|
||||
'CZ': 'Czech Republic',
|
||||
'DK': 'Denmark',
|
||||
'DJ': 'Djibouti',
|
||||
'DM': 'Dominica',
|
||||
'DO': 'Dominican Republic',
|
||||
'EC': 'Ecuador',
|
||||
'EG': 'Egypt',
|
||||
'SV': 'El Salvador',
|
||||
'GQ': 'Equatorial Guinea',
|
||||
'ER': 'Eritrea',
|
||||
'EE': 'Estonia',
|
||||
'ET': 'Ethiopia',
|
||||
'FK': 'Falkland Islands (Malvinas)',
|
||||
'FO': 'Faroe Islands',
|
||||
'FJ': 'Fiji',
|
||||
'FI': 'Finland',
|
||||
'FR': 'France',
|
||||
'GF': 'French Guiana',
|
||||
'PF': 'French Polynesia',
|
||||
'TF': 'French Southern Territories',
|
||||
'GA': 'Gabon',
|
||||
'GM': 'Gambia',
|
||||
'GE': 'Georgia',
|
||||
'DE': 'Germany',
|
||||
'GH': 'Ghana',
|
||||
'GI': 'Gibraltar',
|
||||
'GR': 'Greece',
|
||||
'GL': 'Greenland',
|
||||
'GD': 'Grenada',
|
||||
'GP': 'Guadeloupe',
|
||||
'GU': 'Guam',
|
||||
'GT': 'Guatemala',
|
||||
'GG': 'Guernsey',
|
||||
'GN': 'Guinea',
|
||||
'GW': 'Guinea-Bissau',
|
||||
'GY': 'Guyana',
|
||||
'HT': 'Haiti',
|
||||
'HM': 'Heard Island and McDonald Islands',
|
||||
'VA': 'Holy See (Vatican City State)',
|
||||
'HN': 'Honduras',
|
||||
'HK': 'Hong Kong',
|
||||
'HU': 'Hungary',
|
||||
'IS': 'Iceland',
|
||||
'IN': 'India',
|
||||
'ID': 'Indonesia',
|
||||
'IR': 'Iran, Islamic Republic of',
|
||||
'IQ': 'Iraq',
|
||||
'IE': 'Ireland',
|
||||
'IM': 'Isle of Man',
|
||||
'IL': 'Israel',
|
||||
'IT': 'Italy',
|
||||
'JM': 'Jamaica',
|
||||
'JP': 'Japan',
|
||||
'JE': 'Jersey',
|
||||
'JO': 'Jordan',
|
||||
'KZ': 'Kazakhstan',
|
||||
'KE': 'Kenya',
|
||||
'KI': 'Kiribati',
|
||||
'KP': 'Korea, Democratic People\'s Republic of',
|
||||
'KR': 'Korea, Republic of',
|
||||
'KW': 'Kuwait',
|
||||
'KG': 'Kyrgyzstan',
|
||||
'LA': 'Lao People\'s Democratic Republic',
|
||||
'LV': 'Latvia',
|
||||
'LB': 'Lebanon',
|
||||
'LS': 'Lesotho',
|
||||
'LR': 'Liberia',
|
||||
'LY': 'Libya',
|
||||
'LI': 'Liechtenstein',
|
||||
'LT': 'Lithuania',
|
||||
'LU': 'Luxembourg',
|
||||
'MO': 'Macao',
|
||||
'MK': 'Macedonia, the Former Yugoslav Republic of',
|
||||
'MG': 'Madagascar',
|
||||
'MW': 'Malawi',
|
||||
'MY': 'Malaysia',
|
||||
'MV': 'Maldives',
|
||||
'ML': 'Mali',
|
||||
'MT': 'Malta',
|
||||
'MH': 'Marshall Islands',
|
||||
'MQ': 'Martinique',
|
||||
'MR': 'Mauritania',
|
||||
'MU': 'Mauritius',
|
||||
'YT': 'Mayotte',
|
||||
'MX': 'Mexico',
|
||||
'FM': 'Micronesia, Federated States of',
|
||||
'MD': 'Moldova, Republic of',
|
||||
'MC': 'Monaco',
|
||||
'MN': 'Mongolia',
|
||||
'ME': 'Montenegro',
|
||||
'MS': 'Montserrat',
|
||||
'MA': 'Morocco',
|
||||
'MZ': 'Mozambique',
|
||||
'MM': 'Myanmar',
|
||||
'NA': 'Namibia',
|
||||
'NR': 'Nauru',
|
||||
'NP': 'Nepal',
|
||||
'NL': 'Netherlands',
|
||||
'NC': 'New Caledonia',
|
||||
'NZ': 'New Zealand',
|
||||
'NI': 'Nicaragua',
|
||||
'NE': 'Niger',
|
||||
'NG': 'Nigeria',
|
||||
'NU': 'Niue',
|
||||
'NF': 'Norfolk Island',
|
||||
'MP': 'Northern Mariana Islands',
|
||||
'NO': 'Norway',
|
||||
'OM': 'Oman',
|
||||
'PK': 'Pakistan',
|
||||
'PW': 'Palau',
|
||||
'PS': 'Palestine, State of',
|
||||
'PA': 'Panama',
|
||||
'PG': 'Papua New Guinea',
|
||||
'PY': 'Paraguay',
|
||||
'PE': 'Peru',
|
||||
'PH': 'Philippines',
|
||||
'PN': 'Pitcairn',
|
||||
'PL': 'Poland',
|
||||
'PT': 'Portugal',
|
||||
'PR': 'Puerto Rico',
|
||||
'QA': 'Qatar',
|
||||
'RE': 'Réunion',
|
||||
'RO': 'Romania',
|
||||
'RU': 'Russian Federation',
|
||||
'RW': 'Rwanda',
|
||||
'BL': 'Saint Barthélemy',
|
||||
'SH': 'Saint Helena, Ascension and Tristan da Cunha',
|
||||
'KN': 'Saint Kitts and Nevis',
|
||||
'LC': 'Saint Lucia',
|
||||
'MF': 'Saint Martin (French part)',
|
||||
'PM': 'Saint Pierre and Miquelon',
|
||||
'VC': 'Saint Vincent and the Grenadines',
|
||||
'WS': 'Samoa',
|
||||
'SM': 'San Marino',
|
||||
'ST': 'Sao Tome and Principe',
|
||||
'SA': 'Saudi Arabia',
|
||||
'SN': 'Senegal',
|
||||
'RS': 'Serbia',
|
||||
'SC': 'Seychelles',
|
||||
'SL': 'Sierra Leone',
|
||||
'SG': 'Singapore',
|
||||
'SX': 'Sint Maarten (Dutch part)',
|
||||
'SK': 'Slovakia',
|
||||
'SI': 'Slovenia',
|
||||
'SB': 'Solomon Islands',
|
||||
'SO': 'Somalia',
|
||||
'ZA': 'South Africa',
|
||||
'GS': 'South Georgia and the South Sandwich Islands',
|
||||
'SS': 'South Sudan',
|
||||
'ES': 'Spain',
|
||||
'LK': 'Sri Lanka',
|
||||
'SD': 'Sudan',
|
||||
'SR': 'Suriname',
|
||||
'SJ': 'Svalbard and Jan Mayen',
|
||||
'SZ': 'Swaziland',
|
||||
'SE': 'Sweden',
|
||||
'CH': 'Switzerland',
|
||||
'SY': 'Syrian Arab Republic',
|
||||
'TW': 'Taiwan, Province of China',
|
||||
'TJ': 'Tajikistan',
|
||||
'TZ': 'Tanzania, United Republic of',
|
||||
'TH': 'Thailand',
|
||||
'TL': 'Timor-Leste',
|
||||
'TG': 'Togo',
|
||||
'TK': 'Tokelau',
|
||||
'TO': 'Tonga',
|
||||
'TT': 'Trinidad and Tobago',
|
||||
'TN': 'Tunisia',
|
||||
'TR': 'Turkey',
|
||||
'TM': 'Turkmenistan',
|
||||
'TC': 'Turks and Caicos Islands',
|
||||
'TV': 'Tuvalu',
|
||||
'UG': 'Uganda',
|
||||
'UA': 'Ukraine',
|
||||
'AE': 'United Arab Emirates',
|
||||
'GB': 'United Kingdom',
|
||||
'US': 'United States',
|
||||
'UM': 'United States Minor Outlying Islands',
|
||||
'UY': 'Uruguay',
|
||||
'UZ': 'Uzbekistan',
|
||||
'VU': 'Vanuatu',
|
||||
'VE': 'Venezuela, Bolivarian Republic of',
|
||||
'VN': 'Viet Nam',
|
||||
'VG': 'Virgin Islands, British',
|
||||
'VI': 'Virgin Islands, U.S.',
|
||||
'WF': 'Wallis and Futuna',
|
||||
'EH': 'Western Sahara',
|
||||
'YE': 'Yemen',
|
||||
'ZM': 'Zambia',
|
||||
'ZW': 'Zimbabwe',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def short2full(cls, code):
|
||||
"""Convert an ISO 3166-2 country code to the corresponding full name"""
|
||||
return cls._country_map.get(code.upper())
|
||||
|
||||
|
||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
def __init__(self, proxies=None):
|
||||
# Set default handlers
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.06.04.1'
|
||||
__version__ = '2015.07.07'
|
||||
|
Reference in New Issue
Block a user