From 91f98c125e59de5f36b8d3dfec0fe102501e4a68 Mon Sep 17 00:00:00 2001 From: Adam Howard Date: Tue, 17 Nov 2015 22:51:27 +0000 Subject: [PATCH] finished implementing timestamp, along with refactoring services * added VideoInfo(AbstractVideoInfo) constructor, to support later implementation for reusing info scraped into VideoPreviewInfo, into VideoInfo * Made the Extractor class behave as a per-video object; - most method return values are video-specific, so it makes sense (to me) to have Extractor be stateful. - The only stateless methods are getVideoUrl(), getVideoId() and loadDecryptionCode(String) * Implemented a constructor for YoutubeExtractor, which performs all initialisation work --- .../org/schabi/newpipe/AbstractVideoInfo.java | 3 +- .../java/org/schabi/newpipe/VideoInfo.java | 64 ++- .../newpipe/VideoInfoItemViewCreator.java | 2 +- .../newpipe/VideoItemDetailActivity.java | 18 +- .../newpipe/VideoItemDetailFragment.java | 13 +- .../org/schabi/newpipe/VideoPreviewInfo.java | 3 - .../schabi/newpipe/services/Extractor.java | 95 +++- .../newpipe/services/StreamingService.java | 2 +- .../services/youtube/YoutubeExtractor.java | 456 +++++++++++------- .../services/youtube/YoutubeSearchEngine.java | 10 +- .../services/youtube/YoutubeService.java | 9 +- 11 files changed, 441 insertions(+), 234 deletions(-) diff --git a/app/src/main/java/org/schabi/newpipe/AbstractVideoInfo.java b/app/src/main/java/org/schabi/newpipe/AbstractVideoInfo.java index 1eaa332cd..43839b1f0 100644 --- a/app/src/main/java/org/schabi/newpipe/AbstractVideoInfo.java +++ b/app/src/main/java/org/schabi/newpipe/AbstractVideoInfo.java @@ -7,9 +7,10 @@ public abstract class AbstractVideoInfo { public String id = ""; public String title = ""; public String uploader = ""; + //public int duration = -1; public String thumbnail_url = ""; public Bitmap thumbnail = null; public String webpage_url = ""; public String upload_date = ""; - public long view_count = 0; + public long view_count = -1; } diff --git a/app/src/main/java/org/schabi/newpipe/VideoInfo.java b/app/src/main/java/org/schabi/newpipe/VideoInfo.java index 50ea90e4a..bfb90a292 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoInfo.java +++ b/app/src/main/java/org/schabi/newpipe/VideoInfo.java @@ -24,44 +24,76 @@ import android.graphics.Bitmap; /**Info object for opened videos, ie the video ready to play.*/ public class VideoInfo extends AbstractVideoInfo { + private static final String TAG = VideoInfo.class.toString(); + public String uploader_thumbnail_url = ""; public Bitmap uploader_thumbnail = null; public String description = ""; - public int duration = -1; - public int age_limit = 0; - public int like_count = 0; - public int dislike_count = 0; - public String average_rating = ""; public VideoStream[] videoStreams = null; public AudioStream[] audioStreams = null; + public int videoAvailableStatus = VIDEO_AVAILABLE; + public int duration = -1; + + /*YouTube-specific fields + todo: move these to a subclass*/ + public int age_limit = 0; + public int like_count = -1; + public int dislike_count = -1; + public String average_rating = ""; public VideoPreviewInfo nextVideo = null; public VideoPreviewInfo[] relatedVideos = null; - public int videoAvailableStatus = VIDEO_AVAILABLE; - //public int startPosition = 0;//in seconds. some metadata is not passed using a VideoInfo object! - - private static final String TAG = VideoInfo.class.toString(); + public int startPosition = -1;//in seconds. some metadata is not passed using a VideoInfo object! public static final int VIDEO_AVAILABLE = 0x00; public static final int VIDEO_UNAVAILABLE = 0x01; public static final int VIDEO_UNAVAILABLE_GEMA = 0x02;//German DRM organisation - public static class VideoStream { - public VideoStream(String url, int format, String res) { - this.url = url; this.format = format; resolution = res; + + public VideoInfo() {} + + + /**Creates a new VideoInfo object from an existing AbstractVideoInfo. + * All the shared properties are copied to the new VideoInfo.*/ + public VideoInfo(AbstractVideoInfo avi) { + this.id = avi.id; + this.title = avi.title; + this.uploader = avi.uploader; + this.thumbnail_url = avi.thumbnail_url; + this.thumbnail = avi.thumbnail; + this.webpage_url = avi.webpage_url; + this.upload_date = avi.upload_date; + this.upload_date = avi.upload_date; + this.view_count = avi.view_count; + + //todo: better than this + if(avi instanceof VideoPreviewInfo) {//shitty String to convert code + String dur = ((VideoPreviewInfo)avi).duration; + int minutes = Integer.parseInt(dur.substring(0, dur.indexOf(":"))); + int seconds = Integer.parseInt(dur.substring(dur.indexOf(":")+1, dur.length())); + this.duration = (minutes*60)+seconds; } + + } + + public static class VideoStream { public String url = ""; //url of the stream public int format = -1; public String resolution = ""; + + public VideoStream(String url, int format, String res) { + this.url = url; this.format = format; resolution = res; + } } public static class AudioStream { - public AudioStream(String url, int format, int bandwidth, int samplingRate) { - this.url = url; this.format = format; - this.bandwidth = bandwidth; this.samplingRate = samplingRate; - } public String url = ""; public int format = -1; public int bandwidth = -1; public int samplingRate = -1; + + public AudioStream(String url, int format, int bandwidth, int samplingRate) { + this.url = url; this.format = format; + this.bandwidth = bandwidth; this.samplingRate = samplingRate; + } } } \ No newline at end of file diff --git a/app/src/main/java/org/schabi/newpipe/VideoInfoItemViewCreator.java b/app/src/main/java/org/schabi/newpipe/VideoInfoItemViewCreator.java index b33a66e94..19064c211 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoInfoItemViewCreator.java +++ b/app/src/main/java/org/schabi/newpipe/VideoInfoItemViewCreator.java @@ -57,7 +57,7 @@ public class VideoInfoItemViewCreator { } holder.itemVideoTitleView.setText(info.title); holder.itemUploaderView.setText(info.uploader); - holder.itemDurationView.setText(info.duration); + holder.itemDurationView.setText(""+info.duration); if(!info.upload_date.isEmpty()) { holder.itemUploadDateView.setText(info.upload_date); } else { diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java index b31fe970b..0b87b7b7f 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java @@ -64,27 +64,25 @@ public class VideoItemDetailActivity extends AppCompatActivity { // this means the video was called though another app if (getIntent().getData() != null) { videoUrl = getIntent().getData().toString(); - Log.i(TAG, "video URL passed:\"" + videoUrl + "\""); + //Log.i(TAG, "video URL passed:\"" + videoUrl + "\""); StreamingService[] serviceList = ServiceList.getServices(); Extractor extractor = null; for (int i = 0; i < serviceList.length; i++) { if (serviceList[i].acceptUrl(videoUrl)) { arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i); - try { - currentStreamingService = i; - extractor = ServiceList.getService(i).getExtractorInstance(); - } catch (Exception e) { - e.printStackTrace(); - } + currentStreamingService = i; + //extractor = ServiceList.getService(i).getExtractorInstance(); break; } } - if(extractor == null) { + if(currentStreamingService == -1) { Toast.makeText(this, R.string.urlNotSupportedText, Toast.LENGTH_LONG) .show(); } - arguments.putString(VideoItemDetailFragment.VIDEO_URL, - extractor.getVideoUrl(extractor.getVideoId(videoUrl)));//cleans URL + //arguments.putString(VideoItemDetailFragment.VIDEO_URL, + // extractor.getVideoUrl(extractor.getVideoId(videoUrl)));//cleans URL + arguments.putString(VideoItemDetailFragment.VIDEO_URL, videoUrl); + arguments.putBoolean(VideoItemDetailFragment.AUTO_PLAY, PreferenceManager.getDefaultSharedPreferences(this) .getBoolean(getString(R.string.autoPlayThroughIntent), false)); diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java index 6383a836a..351692f2a 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java @@ -90,16 +90,18 @@ public class VideoItemDetailFragment extends Fragment { private class ExtractorRunnable implements Runnable { private Handler h = new Handler(); private Extractor extractor; + private StreamingService service; private String videoUrl; - public ExtractorRunnable(String videoUrl, Extractor extractor, VideoItemDetailFragment f) { - this.extractor = extractor; + public ExtractorRunnable(String videoUrl, StreamingService service, VideoItemDetailFragment f) { + this.service = service; this.videoUrl = videoUrl; } @Override public void run() { try { - VideoInfo videoInfo = extractor.getVideoInfo(videoUrl); + this.extractor = service.getExtractorInstance(videoUrl); + VideoInfo videoInfo = extractor.getVideoInfo(); h.post(new VideoResultReturnedRunnable(videoInfo)); if (videoInfo.videoAvailableStatus == VideoInfo.VIDEO_AVAILABLE) { h.post(new SetThumbnailRunnable( @@ -239,7 +241,7 @@ public class VideoItemDetailFragment extends Fragment { //this is horribly convoluted //TODO: find a better way to convert YYYY-MM-DD to a locale-specific date - //suggestions welcome + //suggestions are welcome int year = Integer.parseInt(info.upload_date.substring(0, 4)); int month = Integer.parseInt(info.upload_date.substring(5, 7)); int date = Integer.parseInt(info.upload_date.substring(8, 10)); @@ -255,6 +257,7 @@ public class VideoItemDetailFragment extends Fragment { descriptionView.setMovementMethod(LinkMovementMethod.getInstance()); actionBarHandler.setVideoInfo(info.webpage_url, info.title); + actionBarHandler.setStartPosition(info.startPosition); // parse streams Vector streamsToUse = new Vector<>(); @@ -357,7 +360,7 @@ public class VideoItemDetailFragment extends Fragment { StreamingService streamingService = ServiceList.getService( getArguments().getInt(STREAMING_SERVICE)); extractorThread = new Thread(new ExtractorRunnable( - getArguments().getString(VIDEO_URL), streamingService.getExtractorInstance(), this)); + getArguments().getString(VIDEO_URL), streamingService, this)); autoPlayEnabled = getArguments().getBoolean(AUTO_PLAY); extractorThread.start(); diff --git a/app/src/main/java/org/schabi/newpipe/VideoPreviewInfo.java b/app/src/main/java/org/schabi/newpipe/VideoPreviewInfo.java index 6f644f3b1..f49bd0a0e 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoPreviewInfo.java +++ b/app/src/main/java/org/schabi/newpipe/VideoPreviewInfo.java @@ -26,10 +26,7 @@ import android.os.Parcelable; /**Info object for previews of unopened videos, eg search results, related videos*/ public class VideoPreviewInfo extends AbstractVideoInfo implements Parcelable { - - public String duration = ""; - protected VideoPreviewInfo(Parcel in) { id = in.readString(); title = in.readString(); diff --git a/app/src/main/java/org/schabi/newpipe/services/Extractor.java b/app/src/main/java/org/schabi/newpipe/services/Extractor.java index 7f2e18f50..a38b13f13 100644 --- a/app/src/main/java/org/schabi/newpipe/services/Extractor.java +++ b/app/src/main/java/org/schabi/newpipe/services/Extractor.java @@ -22,9 +22,94 @@ package org.schabi.newpipe.services; import org.schabi.newpipe.VideoInfo; -/**Scrapes information from a video streaming service (eg, YouTube). To implement*/ -public interface Extractor { - VideoInfo getVideoInfo(String siteUrl); - String getVideoUrl(String videoId); - String getVideoId(String videoUrl); +/**Scrapes information from a video streaming service (eg, YouTube).*/ +public abstract class Extractor { + public String pageUrl; + public VideoInfo videoInfo; + + public Extractor(String url) { + this.pageUrl = url; + } + + /**Fills out the video info fields which are common to all services. + * Probably needs to be overridden by subclasses*/ + public VideoInfo getVideoInfo() + { + if(videoInfo == null) { + videoInfo = new VideoInfo(); + } + + if(videoInfo.webpage_url.isEmpty()) { + videoInfo.webpage_url = pageUrl; + } + + if(videoInfo.title.isEmpty()) { + videoInfo.title = getTitle(); + } + + if(videoInfo.duration < 1) { + videoInfo.duration = getLength(); + } + + + if(videoInfo.uploader.isEmpty()) { + videoInfo.uploader = getUploader(); + } + + if(videoInfo.description.isEmpty()) { + videoInfo.description = getDescription(); + } + + if(videoInfo.view_count == -1) { + videoInfo.view_count = getViews(); + } + + if(videoInfo.upload_date.isEmpty()) { + videoInfo.upload_date = getUploadDate(); + } + + if(videoInfo.thumbnail_url.isEmpty()) { + videoInfo.thumbnail_url = getThumbnailUrl(); + } + + if(videoInfo.id.isEmpty()) { + videoInfo.id = getVideoId(pageUrl); + } + + /** Load and extract audio*/ + if(videoInfo.audioStreams == null) { + videoInfo.audioStreams = getAudioStreams(); + } + /** Extract video stream url*/ + if(videoInfo.videoStreams == null) { + videoInfo.videoStreams = getVideoStreams(); + } + + if(videoInfo.uploader_thumbnail_url.isEmpty()) { + videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl(); + } + + if(videoInfo.startPosition < 0) { + videoInfo.startPosition = getTimeStamp(); + } + + //Bitmap thumbnail = null; + //Bitmap uploader_thumbnail = null; + //int videoAvailableStatus = VIDEO_AVAILABLE; + return videoInfo; + } + + public abstract String getVideoUrl(String videoId); + public abstract String getVideoId(String siteUrl); + public abstract int getTimeStamp(); + public abstract String getTitle(); + public abstract String getDescription(); + public abstract String getUploader(); + public abstract int getLength(); + public abstract int getViews(); + public abstract String getUploadDate(); + public abstract String getThumbnailUrl(); + public abstract String getUploaderThumbnailUrl(); + public abstract VideoInfo.AudioStream[] getAudioStreams(); + public abstract VideoInfo.VideoStream[] getVideoStreams(); } diff --git a/app/src/main/java/org/schabi/newpipe/services/StreamingService.java b/app/src/main/java/org/schabi/newpipe/services/StreamingService.java index 0e9a022c2..4321340c6 100644 --- a/app/src/main/java/org/schabi/newpipe/services/StreamingService.java +++ b/app/src/main/java/org/schabi/newpipe/services/StreamingService.java @@ -25,7 +25,7 @@ public interface StreamingService { public String name = ""; } ServiceInfo getServiceInfo(); - Extractor getExtractorInstance(); + Extractor getExtractorInstance(String url); SearchEngine getSearchEngineInstance(); /**When a VIEW_ACTION is caught this function will test if the url delivered within the calling diff --git a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeExtractor.java b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeExtractor.java index fd902c320..9c47b76cc 100644 --- a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeExtractor.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.services.youtube; import android.util.Log; import android.util.Xml; +import org.json.JSONException; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -46,14 +47,225 @@ import java.util.regex.Pattern; * along with NewPipe. If not, see . */ -public class YoutubeExtractor implements Extractor { +public class YoutubeExtractor extends Extractor { private static final String TAG = YoutubeExtractor.class.toString(); + private String pageContents; + private Document doc; + private JSONObject jsonObj; + private JSONObject playerArgs; - // These lists only contain itag formats that are supported by the common Android Video player. - // How ever if you are heading for a list showing all itag formats look at - // https://github.com/rg3/youtube-dl/issues/1687 + // static values + private static final String DECRYPTION_FUNC_NAME="decrypt"; + // cached values + private static volatile String decryptionCode = ""; + + + public YoutubeExtractor(String pageUrl) { + super(pageUrl);//most common videoInfo fields are now set in our superclass, for all services + pageContents = Downloader.download(cleanUrl(pageUrl)); + doc = Jsoup.parse(pageContents, pageUrl); + + //attempt to load the youtube js player JSON arguments + try { + String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContents); + jsonObj = new JSONObject(jsonString); + playerArgs = jsonObj.getJSONObject("args"); + + } catch (Exception e) {//if this fails, the video is most likely not available. + // Determining why is done later. + videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE; + Log.d(TAG, "Could not load JSON data for Youtube video \""+pageUrl+"\". This most likely means the video is unavailable"); + } + + //---------------------------------- + // load and parse description code, if it isn't already initialised + //---------------------------------- + if (decryptionCode.isEmpty()) { + try { + // The Youtube service needs to be initialized by downloading the + // js-Youtube-player. This is done in order to get the algorithm + // for decrypting cryptic signatures inside certain stream urls. + JSONObject ytAssets = jsonObj.getJSONObject("assets"); + String playerUrl = ytAssets.getString("js"); + + if (playerUrl.startsWith("//")) { + playerUrl = "https:" + playerUrl; + } + decryptionCode = loadDecryptionCode(playerUrl); + } catch (Exception e){ + Log.d(TAG, "Could not load decryption code for the Youtube service."); + e.printStackTrace(); + } + } + } + + @Override + public String getTitle() { + try {//json player args method + return playerArgs.getString("title"); + } catch(JSONException je) {//html method + je.printStackTrace(); + Log.w(TAG, "failed to load title from JSON args; trying to extract it from HTML"); + } try { // fall through to fall-back + return doc.select("meta[name=title]").attr("content"); + } catch (Exception e) { + Log.e(TAG, "failed permanently to load title."); + e.printStackTrace(); + return ""; + } + } + + @Override + public String getDescription() { + try { + return doc.select("p[id=\"eow-description\"]").first().html(); + } catch (Exception e) {//todo: add fallback method + Log.e(TAG, "failed to load description."); + e.printStackTrace(); + return ""; + } + } + + @Override + public String getUploader() { + try {//json player args method + return playerArgs.getString("author"); + } catch(JSONException je) { + je.printStackTrace(); + Log.w(TAG, "failed to load uploader name from JSON args; trying to extract it from HTML"); + } try {//fall through to fallback HTML method + return doc.select("div.yt-user-info").first().text(); + } catch (Exception e) { + e.printStackTrace(); + Log.e(TAG, "failed permanently to load uploader name."); + return ""; + } + } + + @Override + public int getLength() { + try { + return playerArgs.getInt("length_seconds"); + } catch (JSONException je) {//todo: find fallback method + Log.e(TAG, "failed to load video duration from JSON args"); + je.printStackTrace(); + return -1; + } + } + + @Override + public int getViews() { + try { + String viewCountString = doc.select("meta[itemprop=interactionCount]").attr("content"); + return Integer.parseInt(viewCountString); + } catch (Exception e) {//todo: find fallback method + Log.e(TAG, "failed to number of views"); + e.printStackTrace(); + return -1; + } + } + + @Override + public String getUploadDate() { + try { + return doc.select("meta[itemprop=datePublished]").attr("content"); + } catch (Exception e) {//todo: add fallback method + Log.e(TAG, "failed to get upload date."); + e.printStackTrace(); + return ""; + } + } + + @Override + public String getThumbnailUrl() { + //first attempt getting a small image version + //in the html extracting part we try to get a thumbnail with a higher resolution + // Try to get high resolution thumbnail if it fails use low res from the player instead + try { + return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); + } catch(Exception e) { + Log.w(TAG, "Could not find high res Thumbnail. Using low res instead"); + //fall through to fallback + } try { + return playerArgs.getString("thumbnail_url"); + } catch (JSONException je) { + je.printStackTrace(); + Log.w(TAG, "failed to extract thumbnail URL from JSON args; trying to extract it from HTML"); + return ""; + } + } + + @Override + public String getUploaderThumbnailUrl() { + try { + return doc.select("a[class*=\"yt-user-photo\"]").first() + .select("img").first() + .attr("abs:data-thumb"); + } catch (Exception e) {//todo: add fallback method + Log.e(TAG, "failed to get uploader thumbnail URL."); + e.printStackTrace(); + return ""; + } + } + + @Override + public VideoInfo.AudioStream[] getAudioStreams() { + try { + String dashManifest = playerArgs.getString("dashmpd"); + return parseDashManifest(dashManifest, decryptionCode); + + } catch (NullPointerException e) { + Log.e(TAG, "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available)."); + } catch (Exception e) { + e.printStackTrace(); + } + return new VideoInfo.AudioStream[0]; + } + + @Override + public VideoInfo.VideoStream[] getVideoStreams() { + try{ + //------------------------------------ + // extract video stream url + //------------------------------------ + String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); + Vector videoStreams = new Vector<>(); + for(String url_data_str : encoded_url_map.split(",")) { + Map tags = new HashMap<>(); + for(String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { + String[] split_tag = raw_tag.split("="); + tags.put(split_tag[0], split_tag[1]); + } + + int itag = Integer.parseInt(tags.get("itag")); + String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8"); + + // if video has a signature: decrypt it and add it to the url + if(tags.get("s") != null) { + streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode); + } + + if(resolveFormat(itag) != -1) { + videoStreams.add(new VideoInfo.VideoStream( + streamUrl, + resolveFormat(itag), + resolveResolutionString(itag))); + } + } + return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]); + + } catch (Exception e) { + Log.e(TAG, "Failed to get video stream"); + e.printStackTrace(); + return new VideoInfo.VideoStream[0]; + } + } + + /**These lists only contain itag formats that are supported by the common Android Video player. + However if you are looking for a list showing all itag formats, look at + https://github.com/rg3/youtube-dl/issues/1687 */ public static int resolveFormat(int itag) { switch(itag) { // video @@ -91,68 +303,28 @@ public class YoutubeExtractor implements Extractor { } } - - // static values - private static final String DECRYPTION_FUNC_NAME="decrypt"; - - // cached values - private static volatile String decryptionCode = ""; - - public void initService(String site) { - // The Youtube service needs to be initialized by downloading the - // js-Youtube-player. This is done in order to get the algorithm - // for decrypting cryptic signatures inside certain stream urls. - - // Star Wars Kid is used as a dummy video, in order to download the youtube player. - //String site = Downloader.download("https://www.youtube.com/watch?v=HPPj6viIBmU"); - //------------------------------------- - // extracting form player args - //------------------------------------- - try { - String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", site); - JSONObject jsonObj = new JSONObject(jsonString); - - //---------------------------------- - // load and parse description code - //---------------------------------- - if (decryptionCode.isEmpty()) { - JSONObject ytAssets = jsonObj.getJSONObject("assets"); - String playerUrl = ytAssets.getString("js"); - if (playerUrl.startsWith("//")) { - playerUrl = "https:" + playerUrl; - } - decryptionCode = loadDecryptionCode(playerUrl); - } - - } catch (Exception e){ - Log.d(TAG, "Could not initialize the extractor of the Youtube service."); - e.printStackTrace(); - } - } - @Override - public String getVideoId(String videoUrl) { - String id = ""; - Pattern pat; + public String getVideoId(String url) { + String id; + String pat; - if(videoUrl.contains("youtube")) { - pat = Pattern.compile("youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})"); + if(url.contains("youtube")) { + pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})"; } - else if(videoUrl.contains("youtu.be")) { - pat = Pattern.compile("youtu\\.be/([a-zA-Z0-9_-]{11})"); + else if(url.contains("youtu.be")) { + pat = "youtu\\.be/([a-zA-Z0-9_-]{11})"; } else { - Log.e(TAG, "Error could not parse url: " + videoUrl); + Log.e(TAG, "Error could not parse url: " + url); return ""; } - Matcher mat = pat.matcher(videoUrl); - boolean foundMatch = mat.find(); - if(foundMatch){ - id = mat.group(1); - Log.i(TAG, "string \""+videoUrl+"\" matches!"); + id = matchGroup1(pat, url); + if(!id.isEmpty()){ + Log.i(TAG, "string \""+url+"\" matches!"); + return id; } - Log.i(TAG, "string \""+videoUrl+"\" does not match."); - return id; + Log.i(TAG, "string \""+url+"\" does not match."); + return ""; } @Override @@ -160,118 +332,47 @@ public class YoutubeExtractor implements Extractor { return "https://www.youtube.com/watch?v=" + videoId; } - public int getStartPosition(String siteUrl){ - String timeStamp = matchGroup1("((#|&)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", siteUrl); - Log.i(TAG, "time stamp:"+timeStamp); - //videoInfo.startPosition + /**Attempts to parse (and return) the offset to start playing the video from. + * @return the offset (in seconds), or 0 if no timestamp is found.*/ + @Override + public int getTimeStamp(){ + String timeStamp = matchGroup1("((#|&)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); - //TODO: test this! - if(timeStamp.length() > 0) { + //TODO: test this + if(!timeStamp.isEmpty()) { String secondsString = matchGroup1("(\\d{1,3})s", timeStamp); - if(secondsString.length() == 0)//try again with unspecified units as seconds - secondsString = matchGroup1("t=(\\d{1,3})", timeStamp); String minutesString = matchGroup1("(\\d{1,3})m", timeStamp); String hoursString = matchGroup1("(\\d{1,3})h", timeStamp); - int seconds = (secondsString.length() > 0 ? Integer.parseInt(secondsString) : 0); - int minutes = (minutesString.length() > 0 ? Integer.parseInt(minutesString) : 0); - int hours = (hoursString.length() > 0 ? Integer.parseInt(hoursString) : 0); + if(secondsString.isEmpty()//if nothing was got, + && minutesString.isEmpty()//treat as unlabelled seconds + && hoursString.isEmpty()) + secondsString = matchGroup1("t=(\\d{1,3})", timeStamp); - return seconds + (60*minutes) + (3600*hours);//don't trust BODMAS! + int seconds = (secondsString.isEmpty() ? 0 : Integer.parseInt(secondsString)); + int minutes = (minutesString.isEmpty() ? 0 : Integer.parseInt(minutesString)); + int hours = (hoursString.isEmpty() ? 0 : Integer.parseInt(hoursString)); + + int ret = seconds + (60*minutes) + (3600*hours);//don't trust BODMAS! + Log.d(TAG, "derived timestamp value:"+ret); + return ret; //the ordering varies internationally }//else, return default 0 return 0; } @Override - public VideoInfo getVideoInfo(String siteUrl) { - String site = Downloader.download(siteUrl); - VideoInfo videoInfo = new VideoInfo(); - - Document doc = Jsoup.parse(site, siteUrl); - - videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", siteUrl); + public VideoInfo getVideoInfo() { + videoInfo = super.getVideoInfo(); + //todo: replace this with a call to getVideoId, if possible + videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", pageUrl); videoInfo.age_limit = 0; - videoInfo.webpage_url = siteUrl; - initService(site); - - //------------------------------------- - // extracting form player args - //------------------------------------- - JSONObject playerArgs = null; - { - try { - String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", site); - JSONObject jsonObj = new JSONObject(jsonString); - playerArgs = jsonObj.getJSONObject("args"); - } - catch (Exception e) { - e.printStackTrace(); - // If we fail in this part the video is most likely not available. - // Determining why is done later. - videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE; - } - } - - //----------------------- - // load and extract audio - //----------------------- + //average rating try { - String dashManifest = playerArgs.getString("dashmpd"); - videoInfo.audioStreams = parseDashManifest(dashManifest, decryptionCode); - - } catch (NullPointerException e) { - Log.e(TAG, "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available)."); - } catch (Exception e) { - e.printStackTrace(); - } - - try { - //-------------------------------------------- - // extract general information about the video - //-------------------------------------------- - - videoInfo.uploader = playerArgs.getString("author"); - videoInfo.title = playerArgs.getString("title"); - //first attempt getting a small image version - //in the html extracting part we try to get a thumbnail with a higher resolution - videoInfo.thumbnail_url = playerArgs.getString("thumbnail_url"); - videoInfo.duration = playerArgs.getInt("length_seconds"); videoInfo.average_rating = playerArgs.getString("avg_rating"); - - //------------------------------------ - // extract video stream url - //------------------------------------ - String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); - Vector videoStreams = new Vector<>(); - for(String url_data_str : encoded_url_map.split(",")) { - Map tags = new HashMap<>(); - for(String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { - String[] split_tag = raw_tag.split("="); - tags.put(split_tag[0], split_tag[1]); - } - - int itag = Integer.parseInt(tags.get("itag")); - String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8"); - - // if video has a signature: decrypt it and add it to the url - if(tags.get("s") != null) { - streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode); - } - - if(resolveFormat(itag) != -1) { - videoStreams.add(new VideoInfo.VideoStream( - streamUrl, - resolveFormat(itag), - resolveResolutionString(itag))); - } - } - videoInfo.videoStreams = - videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]); - - } catch (Exception e) { + } catch (JSONException e) { e.printStackTrace(); } @@ -279,7 +380,6 @@ public class YoutubeExtractor implements Extractor { // extracting information from html page //--------------------------------------- - // Determine what went wrong when the Video is not available if(videoInfo.videoAvailableStatus == VideoInfo.VIDEO_UNAVAILABLE) { if(doc.select("h1[id=\"unavailable-message\"]").first().text().contains("GEMA")) { @@ -287,22 +387,6 @@ public class YoutubeExtractor implements Extractor { } } - // Try to get high resolution thumbnail if it fails use low res from the player instead - try { - videoInfo.thumbnail_url = doc.select("link[itemprop=\"thumbnailUrl\"]").first() - .attr("abs:href"); - } catch(Exception e) { - Log.i(TAG, "Could not find high res Thumbnail. Using low res instead"); - } - - // upload date - videoInfo.upload_date = doc.select("meta[itemprop=datePublished]").attr("content"); - - //TODO: Format date locale-specifically - - - // description - videoInfo.description = doc.select("p[id=\"eow-description\"]").first().html(); String likesString = ""; String dislikesString = ""; try { @@ -325,17 +409,8 @@ public class YoutubeExtractor implements Extractor { videoInfo.dislike_count = 0; } - // uploader thumbnail - videoInfo.uploader_thumbnail_url = doc.select("a[class*=\"yt-user-photo\"]").first() - .select("img").first() - .attr("abs:data-thumb"); - - // view count TODO: locale-specific formatting - String viewCountString = doc.select("meta[itemprop=interactionCount]").attr("content"); - videoInfo.view_count = Integer.parseInt(viewCountString); - // next video - videoInfo.nextVideo = extractVideoInfoItem(doc.select("div[class=\"watch-sidebar-section\"]").first() + videoInfo.nextVideo = extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first() .select("li").first()); // related videos @@ -343,13 +418,14 @@ public class YoutubeExtractor implements Extractor { for(Element li : doc.select("ul[id=\"watch-related\"]").first().children()) { // first check if we have a playlist. If so leave them out if(li.select("a[class*=\"content-link\"]").first() != null) { - relatedVideos.add(extractVideoInfoItem(li)); + relatedVideos.add(extractVideoPreviewInfo(li)); } } videoInfo.relatedVideos = relatedVideos.toArray(new VideoPreviewInfo[relatedVideos.size()]); return videoInfo; } + private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) { if(!dashManifest.contains("/signature/")) { String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); @@ -413,8 +489,10 @@ public class YoutubeExtractor implements Extractor { } return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]); } - - private VideoPreviewInfo extractVideoInfoItem(Element li) { + /**Provides information about links to other videos on the video page, such as related videos. + * This is encapsulated in a VideoPreviewInfo object, + * which is a subset of the fields in a full VideoInfo.*/ + private VideoPreviewInfo extractVideoPreviewInfo(Element li) { VideoPreviewInfo info = new VideoPreviewInfo(); info.webpage_url = li.select("a[class*=\"content-link\"]").first() .attr("abs:href"); @@ -426,8 +504,10 @@ public class YoutubeExtractor implements Extractor { //todo: check NullPointerException causing info.title = li.select("span[class=\"title\"]").first().text(); - info.view_count = Long.parseLong(li.select("span[class*=\"view-count\"]").first().text()); + info.view_count = Long.parseLong(li.select("span[class*=\"view-count\"]") + .first().text().replaceAll("[^\\d]", "")); info.uploader = li.select("span[class=\"g-hovercard\"]").first().text(); + info.duration = li.select("span[class=\"video-time\"]").first().text(); Element img = li.select("img").first(); @@ -491,15 +571,19 @@ public class YoutubeExtractor implements Extractor { return result.toString(); } + private String cleanUrl(String complexUrl) { + return getVideoUrl(getVideoId(complexUrl)); + } + private String matchGroup1(String pattern, String input) { Pattern pat = Pattern.compile(pattern); Matcher mat = pat.matcher(input); boolean foundMatch = mat.find(); - if(foundMatch){ + if (foundMatch) { return mat.group(1); } else { - Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); + Log.w(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); new Exception("failed to find pattern \""+pattern+"\"").printStackTrace(); return ""; } diff --git a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngine.java b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngine.java index 74af8cdf7..d01718ed2 100644 --- a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngine.java +++ b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngine.java @@ -62,7 +62,7 @@ public class YoutubeSearchEngine implements SearchEngine { String site; String url = builder.build().toString(); //if we've been passed a valid language code, append it to the URL - if(languageCode.length() > 0) { + if(!languageCode.isEmpty()) { //assert Pattern.matches("[a-z]{2}(-([A-Z]{2}|[0-9]{1,3}))?", languageCode); site = Downloader.download(url, languageCode); } @@ -101,6 +101,7 @@ public class YoutubeSearchEngine implements SearchEngine { // video item type } else if(!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) { + //todo: de-duplicate this with YoutubeExtractor.getVideoPreviewInfo() VideoPreviewInfo resultItem = new VideoPreviewInfo(); Element dl = el.select("h3").first().select("a").first(); resultItem.webpage_url = dl.attr("abs:href"); @@ -113,8 +114,9 @@ public class YoutubeSearchEngine implements SearchEngine { e.printStackTrace(); } resultItem.title = dl.text(); - resultItem.duration = item.select("span[class=\"video-time\"]").first() - .text(); + + resultItem.duration = item.select("span[class=\"video-time\"]").first().text(); + resultItem.uploader = item.select("div[class=\"yt-lockup-byline\"]").first() .select("a").first() .text(); @@ -132,7 +134,7 @@ public class YoutubeSearchEngine implements SearchEngine { } result.resultList.add(resultItem); } else { - Log.e(TAG, "GREAT FUCKING ERROR"); + Log.e(TAG, "unexpected element found:\""+el+"\""); } } return result; diff --git a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeService.java b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeService.java index 284fcbb16..e606c805d 100644 --- a/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeService.java +++ b/app/src/main/java/org/schabi/newpipe/services/youtube/YoutubeService.java @@ -33,8 +33,13 @@ public class YoutubeService implements StreamingService { return serviceInfo; } @Override - public Extractor getExtractorInstance() { - return new YoutubeExtractor(); + public Extractor getExtractorInstance(String url) { + if(acceptUrl(url)) { + return new YoutubeExtractor(url); + } + else { + throw new IllegalArgumentException("supplied String is not a valid Youtube URL"); + } } @Override public SearchEngine getSearchEngineInstance() {