From ff68d714ee981aff9f5c980ed21525f5dea4908e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Schw=C3=B6rer?= Date: Sun, 12 Nov 2023 19:45:37 +0100 Subject: [PATCH] Fix download Pale (sometimes missing next-chapter links) --- Scraper/Scraper.cs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Scraper/Scraper.cs b/Scraper/Scraper.cs index 4da91d5..02f6955 100644 --- a/Scraper/Scraper.cs +++ b/Scraper/Scraper.cs @@ -510,7 +510,7 @@ public class Scraper if (next == null) next = nodeContent.Descendants() .Where(p => p.Name.ToLower() == "a") - .Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next") + .Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next" || Helper.Striptease(p) == "ext chapt") .Where(p => p.Attributes.Contains("href")) .FirstOrDefault(); @@ -523,7 +523,7 @@ public class Scraper if (next == null) next = Helper.RecursiveDescendants(nodeContent) .Where(p => p.Name.ToLower() == "a") - .Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next") + .Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next" || Helper.Striptease(p) == "ext chapt") .Where(p => p.Attributes.Contains("href")) .FirstOrDefault(); @@ -533,6 +533,18 @@ public class Scraper .Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next")) .FirstOrDefault(); + if (next == null && ACTIVE_BOOK.Title == "Pale") + { + var nextLS = Helper.RecursiveDescendants(doc.DocumentNode) + .Where(p => p.Name.ToLower() == "a") + .Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next")) + .GroupBy(p => p.Attributes["href"].Value.Trim()) + .ToList(); + if (nextLS.Count == 1) next = nextLS.Single().FirstOrDefault(); + } + + if (next != null && next.Attributes["href"].Value.Trim() == "(https://palewebserial.wordpress.com/2023/10/10/end/") next = null; // do not process author-notes from Pale + if (next != null) { var next_url = next.Attributes["href"].Value.Trim(); @@ -557,7 +569,7 @@ public class Scraper } } - + if (next == null) prt(" > (!) No next URL found"); #endregion