From 2ded33110fe1fb11ac501d068b0e34f340446f93 Mon Sep 17 00:00:00 2001 From: Coffeemakr Date: Tue, 10 Jan 2017 21:10:41 +0100 Subject: [PATCH] Improve YoutubeStreamUrlIdHandler * Make it a singelton * Accept embed links * Accept share links (youtube.com/shared?ci=...) * Add tests * Accept host case insensititve --- .../services/youtube/YoutubeService.java | 4 +- .../youtube/YoutubeStreamExtractor.java | 2 +- .../youtube/YoutubeStreamUrlIdHandler.java | 96 +++++++++++--- .../StreamPreviewInfoCollector.java | 3 +- .../YoutubeStreamUrlIdHandlerTest.java | 119 ++++++++++++++++++ 5 files changed, 203 insertions(+), 21 deletions(-) create mode 100644 app/src/test/java/org/schabi/newpipe/extractor/services/youtube/youtube/YoutubeStreamUrlIdHandlerTest.java diff --git a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index b7cd33bc7..030878b03 100644 --- a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -46,7 +46,7 @@ public class YoutubeService extends StreamingService { @Override public StreamExtractor getExtractorInstance(String url) throws ExtractionException, IOException { - UrlIdHandler urlIdHandler = new YoutubeStreamUrlIdHandler(); + UrlIdHandler urlIdHandler = YoutubeStreamUrlIdHandler.getInstance(); if(urlIdHandler.acceptUrl(url)) { return new YoutubeStreamExtractor(urlIdHandler, url, getServiceId()); } @@ -61,7 +61,7 @@ public class YoutubeService extends StreamingService { @Override public UrlIdHandler getUrlIdHandlerInstance() { - return new YoutubeStreamUrlIdHandler(); + return YoutubeStreamUrlIdHandler.getInstance(); } @Override diff --git a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 5910a1efb..07a0e2ad9 100644 --- a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -184,7 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { // cached values private static volatile String decryptionCode = ""; - UrlIdHandler urlidhandler = new YoutubeStreamUrlIdHandler(); + UrlIdHandler urlidhandler = YoutubeStreamUrlIdHandler.getInstance(); String pageUrl = ""; public YoutubeStreamExtractor(UrlIdHandler urlIdHandler, String pageUrl, int serviceId) diff --git a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java index 2c3f76293..f2e7a0483 100644 --- a/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java +++ b/app/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java @@ -1,12 +1,21 @@ package org.schabi.newpipe.extractor.services.youtube; +import android.support.annotation.NonNull; + +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.Parser; import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URLDecoder; +import java.util.regex.Matcher; /** * Created by Christian Schabesberger on 02.02.16. @@ -29,45 +38,55 @@ import java.net.URLDecoder; */ public class YoutubeStreamUrlIdHandler implements UrlIdHandler { - @SuppressWarnings("WeakerAccess") + + private static final YoutubeStreamUrlIdHandler instance = new YoutubeStreamUrlIdHandler(); + private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; + + private YoutubeStreamUrlIdHandler() {} + + public static YoutubeStreamUrlIdHandler getInstance() { + return instance; + } + @Override public String getUrl(String videoId) { return "https://www.youtube.com/watch?v=" + videoId; } - @SuppressWarnings("WeakerAccess") @Override public String getId(String url) throws ParsingException, IllegalArgumentException { - if(url.isEmpty()) - { + if(url.isEmpty()) { throw new IllegalArgumentException("The url parameter should not be empty"); } - String id; - if(url.contains("youtube")) { - if(url.contains("attribution_link")) { + String id; + String lowercaseUrl = url.toLowerCase(); + if(lowercaseUrl.contains("youtube")) { + if (url.contains("attribution_link")) { try { String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); String query = URLDecoder.decode(escapedQuery, "UTF-8"); - id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", query); - } catch(UnsupportedEncodingException uee) { + id = Parser.matchGroup1("v=" + ID_PATTERN, query); + } catch (UnsupportedEncodingException uee) { throw new ParsingException("Could not parse attribution_link", uee); } - } - else if(url.contains("vnd.youtube")) - { - id = Parser.matchGroup1("([\\-a-zA-Z0-9_]{11}).*", url); + } else if(lowercaseUrl.contains("youtube.com/shared?ci=")) { + return getRealIdFromSharedLink(url); + } else if (url.contains("vnd.youtube")) { + id = Parser.matchGroup1(ID_PATTERN, url); + } else if (url.contains("embed")) { + id = Parser.matchGroup1("embed/" + ID_PATTERN, url); } else if(url.contains("googleads")) { throw new FoundAdException("Error found add: " + url); } else { - id = Parser.matchGroup1("[?&]v=([\\-a-zA-Z0-9_]{11})", url); + id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); } } - else if(url.contains("youtu.be")) { + else if(lowercaseUrl.contains("youtu.be")) { if(url.contains("v=")) { - id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", url); + id = Parser.matchGroup1("v=" + ID_PATTERN, url); } else { - id = Parser.matchGroup1("youtu\\.be/([a-zA-Z0-9_-]{11})", url); + id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); } } else { @@ -82,12 +101,55 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler { } } + /** + * Get the real url from a shared uri. + * + * Shared URI's look like this: + *
+     *     * https://www.youtube.com/shared?ci=PJICrTByb3E
+     *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
+     * 
+ * @param url The shared url + * @return the id of the stream + * @throws ParsingException + */ + private @NonNull String getRealIdFromSharedLink(String url) throws ParsingException { + URI uri; + try { + uri = new URI(url); + } catch (URISyntaxException e) { + throw new ParsingException("Invalid shared link", e); + } + String sharedId = getSharedId(uri); + Downloader downloader = NewPipe.getDownloader(); + String content; + try { + content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Unable to resolve shared link", e); + } + // is this bad? is this fragile?: + String realId = Parser.matchGroup1("rel=\"shortlink\" href=\"https://youtu.be/" + ID_PATTERN, content); + if(sharedId.equals(realId)) { + throw new ParsingException("Got same id for as shared id: " + sharedId); + } + return realId; + } + + private @NonNull String getSharedId(URI uri) throws ParsingException { + if (!"/shared".equals(uri.getPath())) { + throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); + } + return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); + } + public String cleanUrl(String complexUrl) throws ParsingException { return getUrl(getId(complexUrl)); } @Override public boolean acceptUrl(String videoUrl) { + videoUrl = videoUrl.toLowerCase(); return videoUrl.contains("youtube") || videoUrl.contains("youtu.be"); } diff --git a/app/src/main/java/org/schabi/newpipe/extractor/stream_info/StreamPreviewInfoCollector.java b/app/src/main/java/org/schabi/newpipe/extractor/stream_info/StreamPreviewInfoCollector.java index ea978f730..20120fa2d 100644 --- a/app/src/main/java/org/schabi/newpipe/extractor/stream_info/StreamPreviewInfoCollector.java +++ b/app/src/main/java/org/schabi/newpipe/extractor/stream_info/StreamPreviewInfoCollector.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.stream_info; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -60,7 +61,7 @@ public class StreamPreviewInfoCollector { if (urlIdHandler == null) { throw new ParsingException("Error: UrlIdHandler not set"); } else if(!resultItem.webpage_url.isEmpty()) { - resultItem.id = (new YoutubeStreamUrlIdHandler()).getId(resultItem.webpage_url); + resultItem.id = NewPipe.getService(serviceId).getUrlIdHandlerInstance().getId(resultItem.webpage_url); } resultItem.title = extractor.getTitle(); resultItem.stream_type = extractor.getStreamType(); diff --git a/app/src/test/java/org/schabi/newpipe/extractor/services/youtube/youtube/YoutubeStreamUrlIdHandlerTest.java b/app/src/test/java/org/schabi/newpipe/extractor/services/youtube/youtube/YoutubeStreamUrlIdHandlerTest.java new file mode 100644 index 000000000..79bb5d88e --- /dev/null +++ b/app/src/test/java/org/schabi/newpipe/extractor/services/youtube/youtube/YoutubeStreamUrlIdHandlerTest.java @@ -0,0 +1,119 @@ +package org.schabi.newpipe.extractor.services.youtube.youtube; + +import org.junit.Before; +import org.junit.Test; +import org.schabi.newpipe.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.FoundAdException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.youtube.YoutubeStreamUrlIdHandler; + +import java.util.ArrayList; +import java.util.List; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.fail; +import static org.junit.Assert.assertTrue; + +/** + * Test for {@link YoutubeStreamUrlIdHandler} + */ +public class YoutubeStreamUrlIdHandlerTest { + private static String AD_URL = "https://googleads.g.doubleclick.net/aclk?sa=l&ai=C-2IPgeVTWPf4GcOStgfOnIOADf78n61GvKmmobYDrgIQASDj-5MDKAJg9ZXOgeAEoAGgy_T-A8gBAakC2gkpmquIsT6oAwGqBJMBT9BgD5kVgbN0dX602bFFaDw9vsxq-We-S8VkrXVBi6W_e7brZ36GCz1WO3EPEeklYuJjXLUowwCOKsd-8xr1UlS_tusuFJv9iX35xoBHKTRvs8-0aDbfEIm6in37QDfFuZjqgEMB8-tg0Jn_Pf1RU5OzbuU40B4Gy25NUTnOxhDKthOhKBUSZEksCEerUV8GMu10iAXCxquwApIFBggDEAEYAaAGGsgGlIjthrUDgAfItIsBqAemvhvYBwHSCAUIgGEQAbgT6AE&num=1&sig=AOD64_1DybDd4qAm5O7o9UAbTNRdqXXHFQ&ctype=21&video_id=dMO_IXYPZew&client=ca-pub-6219811747049371&adurl=http://www.youtube.com/watch%3Fv%3DdMO_IXYPZew"; + private YoutubeStreamUrlIdHandler urlIdHandler; + + @Before + public void setUp() throws Exception { + urlIdHandler = YoutubeStreamUrlIdHandler.getInstance(); + NewPipe.init(Downloader.getInstance()); + } + + @Test(expected = NullPointerException.class) + public void getIdWithNullAsUrl() throws ParsingException { + urlIdHandler.getId(null); + } + + @Test(expected = FoundAdException.class) + public void getIdForAd() throws ParsingException { + urlIdHandler.getId(AD_URL); + } + + @Test + public void getIdForInvalidUrls() throws ParsingException { + List invalidUrls = new ArrayList<>(50); + invalidUrls.add("https://www.youtube.com/watch?v=jZViOEv90d"); + invalidUrls.add("https://www.youtube.com/watchjZViOEv90d"); + invalidUrls.add("https://www.youtube.com/"); + for(String invalidUrl: invalidUrls) { + Throwable exception = null; + try { + urlIdHandler.getId(invalidUrl); + } catch (ParsingException e) { + exception = e; + } + if(exception == null) { + fail("Expected ParsingException for url: " + invalidUrl); + } + } + } + @Test + public void getId() throws Exception { + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI")); + assertEquals("W-fFHeTX70Q", urlIdHandler.getId("https://www.youtube.com/watch?v=W-fFHeTX70Q")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI?t=100")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://youtu.be/jZViOEv90dI?t=9s")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://Youtu.be/jZViOEv90dI?t=9s")); + assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.youtube.com/watch_popup?v=uEJuoEs1UxY")); + assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.Youtube.com/watch_popup?v=uEJuoEs1UxY")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/embed/jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube-nocookie.com/embed/jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/watch?v=jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtube.com/watch?v=jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtu.be/jZViOEv90dI?t=9s")); + assertEquals("7_WWz2DSnT8", urlIdHandler.getId("https://youtu.be/7_WWz2DSnT8")); + assertEquals("oy6NvWeVruY", urlIdHandler.getId("https://m.youtube.com/watch?v=oy6NvWeVruY")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/embed/jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.Youtube.com/embed/jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube-nocookie.com/embed/jZViOEv90dI")); + assertEquals("EhxJLojIE_o", urlIdHandler.getId("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI")); + assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube:jZViOEv90dI")); + + // Shared links + String sharedId = "7JIArTByb3E"; + String realId = "Q7JsK50NGaA"; + assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.YouTube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link")); + assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.youtube.com/shared?ci=" + sharedId )); + assertEquals(realId, urlIdHandler.getId("https://www.youtube.com/shared?ci=7JIArTByb3E")); + + } + + + @Test + public void testAcceptUrl() { + assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI?t=100")); + assertTrue(urlIdHandler.acceptUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100")); + assertTrue(urlIdHandler.acceptUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100")); + assertTrue(urlIdHandler.acceptUrl("https://youtu.be/jZViOEv90dI?t=9s")); + //assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch/jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("https://www.youtube-nocookie.com/embed/jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/watch?v=jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("http://youtu.be/jZViOEv90dI?t=9s")); + assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/embed/jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("http://www.youtube-nocookie.com/embed/jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare")); + assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI")); + assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI")); + + assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI")); + + String sharedId = "7JIArTByb3E"; + assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link")); + assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId )); + assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/shared?ci=7JIArTByb3E")); + } +} \ No newline at end of file