Quickpush-commit from 2023-11-12 22:25:52
This commit is contained in:
@@ -354,7 +354,9 @@ public class Scraper
|
||||
|
||||
#region Base
|
||||
|
||||
var nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@class,'post') and contains(@class ,'type-post')]");
|
||||
HtmlNode nodeContent = null;
|
||||
if (nodeContent == null && ACTIVE_BOOK.SiteType == Site.AO3) nodeContent = doc.DocumentNode.SelectSingleNode(@"//*[@id = 'workskin']");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@class,'post') and contains(@class ,'type-post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@id,'post') and contains(@class ,'post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@id,'post') and contains(@class ,'post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class ,'chapter') and not(contains(@class ,'chapter-page'))]//div[contains(@class ,'portlet-body')]");
|
||||
@@ -367,12 +369,16 @@ public class Scraper
|
||||
|
||||
var nodeChapter = nodeContent.SelectSingleNode(@"//div[contains(@class, 'entry-content') or contains(@class, 'postcontent') or contains(@class, 'post-content') or contains(@class, 'chapter-content')]");
|
||||
if (nodeChapter == null && ACTIVE_BOOK.SiteType == Site.WW) nodeChapter = nodeContent.SelectSingleNode(@"//div[contains(@id, 'content')]");
|
||||
if (nodeChapter == null && ACTIVE_BOOK.SiteType == Site.AO3) nodeChapter = nodeContent.SelectSingleNode(@"//*[@id = 'chapters']");
|
||||
|
||||
#endregion
|
||||
|
||||
#region Title
|
||||
|
||||
var titleNode = nodeContent.SelectSingleNode(@"//header[@class='entry-header']//h1[@class='entry-title']");
|
||||
HtmlNode titleNode = null;
|
||||
if (titleNode == null && ACTIVE_BOOK.SiteType == Site.AO3) titleNode = nodeContent.SelectSingleNode(@"//h3[contains(@class, 'title')]");
|
||||
if (titleNode == null && ACTIVE_BOOK.SiteType == Site.AO3) titleNode = nodeContent.SelectSingleNode(@"//h2[contains(@class, 'title')]");
|
||||
if (titleNode == null) titleNode = nodeContent.SelectSingleNode(@"//header[@class='entry-header']//h1[@class='entry-title']");
|
||||
if (titleNode == null) titleNode = nodeContent.SelectSingleNode(@"//h1[contains(@class, 'posttitle')]");
|
||||
if (titleNode == null) titleNode = nodeContent.SelectSingleNode(@"//div[contains(@class, 'fic-header')]//h1");
|
||||
if (titleNode == null && ACTIVE_BOOK.SiteType == Site.WP) titleNode = nodeContent.SelectSingleNode(@"//div[contains(@class, 'entry-content')]//strong");
|
||||
@@ -454,12 +460,6 @@ public class Scraper
|
||||
{
|
||||
prt(" [!!] Warning cannot parse title");
|
||||
}
|
||||
|
||||
if (suffix.Length > 2)
|
||||
{
|
||||
curr.title = baseTitle;
|
||||
titles.Add(baseTitle);
|
||||
}
|
||||
}
|
||||
|
||||
if (curr.title.ToLower().StartsWith(ACTIVE_BOOK.Foldername.ToLower())) {
|
||||
@@ -479,7 +479,7 @@ public class Scraper
|
||||
return ProcessResult.ReachedEnd; // prevent book II loop
|
||||
}
|
||||
|
||||
curr.isEpilogue = (titles.Any(t => t.ToLower().Contains("epilogue") || t.ToLower().Contains("epilog"))) && (ACTIVE_BOOK.SiteType!=Site.Royalroad);
|
||||
curr.isEpilogue = (titles.Any(t => t.ToLower().Contains("epilogue") || t.ToLower().Contains("epilog"))) && (ACTIVE_BOOK.SiteType!=Site.Royalroad) && (ACTIVE_BOOK!=Config.WI);
|
||||
curr.isPrologue = (titles.Any(t => t.ToLower().Contains("prologue") || t.ToLower().Contains("prolog")));
|
||||
curr.isBonus = (titles.Any(t => t.ToLower().Trim().StartsWith("bonus")));
|
||||
|
||||
@@ -533,8 +533,9 @@ public class Scraper
|
||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||
.FirstOrDefault();
|
||||
|
||||
if (next == null && ACTIVE_BOOK.Title == "Pale")
|
||||
if (next == null && ACTIVE_BOOK == Config.PALE)
|
||||
{
|
||||
// some chapters in Pale miss the anchor tags on the next-chapter elem -.-
|
||||
var nextLS = Helper.RecursiveDescendants(doc.DocumentNode)
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||
@@ -543,7 +544,7 @@ public class Scraper
|
||||
if (nextLS.Count == 1) next = nextLS.Single().FirstOrDefault();
|
||||
}
|
||||
|
||||
if (next != null && next.Attributes["href"].Value.Trim() == "(https://palewebserial.wordpress.com/2023/10/10/end/") next = null; // do not process author-notes from Pale
|
||||
if (next != null && next.Attributes["href"].Value.Trim() == "https://palewebserial.wordpress.com/2023/10/10/end/") next = null; // do not process author-notes from Pale
|
||||
|
||||
if (next != null)
|
||||
{
|
||||
@@ -623,6 +624,27 @@ public class Scraper
|
||||
|
||||
#endregion
|
||||
|
||||
#region A03 Stuff
|
||||
|
||||
var ao3workNodes = nodeChapter.SelectNodes(@"//*[@id = 'work']");
|
||||
if (ao3workNodes != null)
|
||||
{
|
||||
foreach (var node in ao3workNodes)
|
||||
{
|
||||
if (nodeChapter.ChildNodes.Contains(node))
|
||||
{
|
||||
nodeChapter.RemoveChild(node);
|
||||
prt(" > ao3 work-div removed");
|
||||
}
|
||||
else
|
||||
{
|
||||
prt(" > ao3 work-div cannot be removed - skipping");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Share Div
|
||||
|
||||
var shareNodes = nodeChapter.SelectNodes(@"div[@id='jp-post-flair' or contains(@class, 'sharedaddy') or contains(@class, 'sharing') or contains(@class, 'social')]");
|
||||
|
@@ -5,8 +5,10 @@ public enum Site
|
||||
Wordpress,
|
||||
WuxiaWorld,
|
||||
Royalroad,
|
||||
ArchiveOfOurOwn,
|
||||
|
||||
WP = Wordpress,
|
||||
WW = WuxiaWorld,
|
||||
RR = Royalroad,
|
||||
WP = Wordpress,
|
||||
WW = WuxiaWorld,
|
||||
RR = Royalroad,
|
||||
AO3 = ArchiveOfOurOwn,
|
||||
}
|
Reference in New Issue
Block a user