From 52ded6e3d762e87f1cc6eb6b52e1c5ab2355bb89 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Fri, 12 Aug 2022 13:30:33 +0200 Subject: [PATCH 1/2] Handle curly braces inside strings in StringUtils.matchToClosingParenthesis This is required to extract fully more complex YouTube nsig functions. --- .../newpipe/extractor/utils/StringUtils.java | 50 +++++++++++++++++-- .../extractor/utils/StringUtilsTest.java | 10 ++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java index 3002a8d6d7..9a6091a4d5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java @@ -23,14 +23,13 @@ public static String matchToClosingParenthesis(@Nonnull final String string, } startIndex += start.length(); - int endIndex = startIndex; - while (string.charAt(endIndex) != '{') { - ++endIndex; - } + int endIndex = findNextParenthesis(string, startIndex, true); ++endIndex; int openParenthesis = 1; while (openParenthesis > 0) { + endIndex = findNextParenthesis(string, endIndex, false); + switch (string.charAt(endIndex)) { case '{': ++openParenthesis; @@ -46,4 +45,47 @@ public static String matchToClosingParenthesis(@Nonnull final String string, return string.substring(startIndex, endIndex); } + + private static int findNextParenthesis(@Nonnull final String string, + final int offset, + final boolean onlyOpen) { + boolean lastEscaped = false; + char quote = ' '; + + for (int i = offset; i < string.length(); i++) { + boolean thisEscaped = false; + final char c = string.charAt(i); + + switch (c) { + case '{': + if (quote == ' ') { + return i; + } + break; + case '}': + if (!onlyOpen && quote == ' ') { + return i; + } + break; + case '\\': + if (!lastEscaped) { + thisEscaped = true; + } + break; + case '\'': + case '"': + if (!lastEscaped) { + if (quote == ' ') { + quote = c; + } else if (quote == c) { + quote = ' '; + } + } + } + + lastEscaped = thisEscaped; + } + + return -1; + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/StringUtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/StringUtilsTest.java index 926a6b67ba..35a680e7ee 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/StringUtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/StringUtilsTest.java @@ -58,4 +58,14 @@ public void lessClosing__success() { assertEquals(expected, substring); } + + @Test + void find_closing_with_quotes() { + final String expected = "{return \",}\\\"/\"}"; + final String string = "function(d){return \",}\\\"/\"}"; + + final String substring = matchToClosingParenthesis(string, "function(d)"); + + assertEquals(expected, substring); + } } \ No newline at end of file From 5b548340e88baa09d07d24410544568eb8b6ec8f Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Fri, 12 Aug 2022 16:17:13 +0200 Subject: [PATCH 2/2] [YouTube] Catch any exception in YoutubeThrottlingDecrypter.apply and improve docs This will prevent any future extractor break due to decryption failure, like it was excepted to be the case before. Some documentation about the throttling decryption has been also improved. --- .../youtube/YoutubeThrottlingDecrypter.java | 66 ++++++++++++------- .../extractors/YoutubeStreamExtractor.java | 16 +++-- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java index 2158bb3221..cb807dc173 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java @@ -12,24 +12,24 @@ import java.util.regex.Pattern; /** + * YouTube's streaming URLs of HTML5 clients are protected with a cipher, which modifies their + * {@code n} query parameter. + * *

- * YouTube's media is protected with a cipher, - * which modifies the "n" query parameter of it's video playback urls. - * This class handles extracting that "n" query parameter, - * applying the cipher on it and returning the resulting url which is not throttled. + * This class handles extracting that {@code n} query parameter, applying the cipher on it and + * returning the resulting URL which is not throttled. *

* - *
- * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
- * 
+ *

+ * For instance, + * {@code https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other} * becomes - *

- * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
- * 
- *
+ * {@code https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other}. + *

+ * *

- * Decoding the "n" parameter is time intensive. For this reason, the results are cached. - * The cache can be cleared using {@link #clearCache()} + * Decoding the {@code n} parameter is time intensive. For this reason, the results are cached. + * The cache can be cleared using {@link #clearCache()}. *

* */ @@ -73,13 +73,35 @@ public YoutubeThrottlingDecrypter() throws ParsingException { } /** + * Try to decrypt a YouTube streaming URL protected with a throttling parameter. + * *

- * The videoId is only used to fetch the decryption function. - * It can be a constant value of any existing video. - * A constant value is discouraged, because it could allow tracking. + * If the streaming URL provided doesn't contain a throttling parameter, it is returned as it + * is; otherwise, the encrypted value is decrypted and this value is replaced by the decrypted + * one. + *

+ * + *

+ * If the JavaScript code has been not extracted, it is extracted with the given video ID using + * {@link YoutubeJavaScriptExtractor#extractJavaScriptCode(String)}. + *

+ * + * @param streamingUrl The streaming URL to decrypt, if needed. + * @param videoId A video ID, used to fetch the JavaScript code to get the decryption + * function. It can be a constant value of any existing video, but a + * constant value is discouraged, because it could allow tracking. + * @return A streaming URL with the decrypted parameter or the streaming URL itself if no + * throttling parameter has been found + * @throws ParsingException If the streaming URL contains a throttling parameter and its + * decryption failed */ - public static String apply(final String url, final String videoId) throws ParsingException { - if (containsNParam(url)) { + public static String apply(@Nonnull final String streamingUrl, + @Nonnull final String videoId) throws ParsingException { + if (!containsNParam(streamingUrl)) { + return streamingUrl; + } + + try { if (FUNCTION == null) { final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(videoId); @@ -88,11 +110,11 @@ public static String apply(final String url, final String videoId) throws Parsin FUNCTION = parseDecodeFunction(playerJsCode, FUNCTION_NAME); } - final String oldNParam = parseNParam(url); + final String oldNParam = parseNParam(streamingUrl); final String newNParam = decryptNParam(FUNCTION, FUNCTION_NAME, oldNParam); - return replaceNParam(url, oldNParam, newNParam); - } else { - return url; + return replaceNParam(streamingUrl, oldNParam, newNParam); + } catch (final Exception e) { + throw new ParsingException("Could not parse, decrypt or replace n parameter", e); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 5b9ae5801a..82a5d01681 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -602,15 +602,21 @@ public List getVideoOnlyStreams() throws ExtractionException { } /** - * Try to decrypt url and fallback to given url, because decryption is not - * always needed. + * Try to decrypt a streaming URL and fallback to the given URL, because decryption may fail if + * YouTube do breaking changes. + * + *

* This way a breaking change from YouTube does not result in a broken extractor. + *

+ * + * @param streamingUrl the streaming URL to decrypt with {@link YoutubeThrottlingDecrypter} + * @param videoId the video ID to use when extracting JavaScript player code, if needed */ - private String tryDecryptUrl(final String url, final String videoId) { + private String tryDecryptUrl(final String streamingUrl, final String videoId) { try { - return YoutubeThrottlingDecrypter.apply(url, videoId); + return YoutubeThrottlingDecrypter.apply(streamingUrl, videoId); } catch (final ParsingException e) { - return url; + return streamingUrl; } }