1
0

Replace Linqpad-only methods and split into files (works except Hyperlinq)

This commit is contained in:
2023-08-20 16:10:39 +02:00
parent ccfeedc067
commit 9692dc531f
16 changed files with 1748 additions and 1662 deletions

17
Scraper/Chapter.cs Normal file
View File

@@ -0,0 +1,17 @@
namespace WordpressEboobScraper2.Scraper;
public class Chapter
{
public string url;
public string title;
public string next;
public GZippedString queryResult;
public GZippedString sourcecode;
public GZippedString chapter;
public bool isPrologue;
public bool isEpilogue;
public bool isBonus;
public bool isSpecial => isPrologue || isEpilogue || isBonus;
}

47
Scraper/EpubParameter.cs Normal file
View File

@@ -0,0 +1,47 @@
using System.Globalization;
namespace WordpressEboobScraper2.Scraper;
public class EpubParameter
{
public readonly string Series;
public readonly int SeriesIndex;
public readonly Guid ID_OPF;
public readonly Guid ID_CAL;
public readonly string Title;
public readonly string Author;
public readonly DateTime Release;
public readonly string Language;
public readonly string StartURL;
public readonly string Foldername;
public readonly Site SiteType;
public string AuthorSort { get { return Author.Split(' ').Aggregate((a, b) => b + ", " + a); } }
public EpubParameter(Site st, string t, string a, string r, string l, string s) : this(st, null, -1, t, a, r, l, s) { }
public EpubParameter(Site st, string z, int i, string t, string a, string r, string l, string s)
{
SiteType = st;
Series = z;
SeriesIndex = i;
Title = t;
Author = a;
Release = DateTime.ParseExact(r, "yyyy-MM-dd", CultureInfo.InvariantCulture);
Language = l;
StartURL = s;
if (z == null)
Foldername = Helper.Filenamify(t);
else
Foldername = string.Format("{0} {1} - {2}", Helper.Filenamify(z), i, Helper.Filenamify(t));
var u = new Random(Title.GetHashCode() ^ Author.GetHashCode());
var g = new byte[16];
u.NextBytes(g);
ID_OPF = new Guid(g);
u.NextBytes(g);
ID_CAL = new Guid(g);
}
public String DisplayStr => (Series == null) ? $"{Title}" : $"{Series} {SeriesIndex} - {Title}";
}

9
Scraper/Extensions.cs Normal file
View File

@@ -0,0 +1,9 @@
namespace WordpressEboobScraper2.Scraper;
public static class Extensions
{
public static void Dump(this string str)
{
Console.Out.WriteLine(str);
}
}

57
Scraper/GZippedString.cs Normal file
View File

@@ -0,0 +1,57 @@
using System.IO.Compression;
using System.Text;
using System.Xml.Serialization;
namespace WordpressEboobScraper2.Scraper;
public class GZippedString : IXmlSerializable
{
public string Value { get; set; }
public System.Xml.Schema.XmlSchema GetSchema() { return null; }
public void ReadXml(System.Xml.XmlReader reader)
{
Value = DecompressString(reader.ReadString());
reader.ReadEndElement();
}
public void WriteXml(System.Xml.XmlWriter writer)
{
writer.WriteString(CompressString(Value));
}
private string CompressString(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
var memoryStream = new MemoryStream();
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
gZipStream.Write(buffer, 0, buffer.Length);
memoryStream.Position = 0;
var compressedData = new byte[memoryStream.Length];
memoryStream.Read(compressedData, 0, compressedData.Length);
var gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
return Convert.ToBase64String(gZipBuffer);
}
private string DecompressString(string compressedText)
{
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
using (var memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);
var buffer = new byte[dataLength];
memoryStream.Position = 0;
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
gZipStream.Read(buffer, 0, buffer.Length);
return Encoding.UTF8.GetString(buffer);
}
}
public static implicit operator GZippedString(string v) => new GZippedString{Value = v};
public static implicit operator string (GZippedString v) => v.Value;
}

102
Scraper/Helper.cs Normal file
View File

@@ -0,0 +1,102 @@
using HtmlAgilityPack;
namespace WordpressEboobScraper2.Scraper;
public class Helper
{
public static string Filenamify(string v, bool repl = false)
{
var s = new String(v.Replace((char)160, ' ').ToCharArray().Where(p =>
(p >= '0' && p <= '9') ||
(p >= 'A' && p <= 'Z') ||
(p >= 'a' && p <= 'z') ||
p == ' ' ||
p == '.' ||
p == '-' ||
p == '*' ||
p == '_' ||
p == '.' ||
p == ',').ToArray());
if (repl) s = s.Replace(' ', '_');
return s;
}
public static string TitleFmt(string raw)
{
raw = HtmlEntity.DeEntitize(raw);
raw = raw.Replace('', '-');
raw = raw.Replace((char)160, ' ');
raw = raw.Trim().Trim('-', ':', '_', '#').Trim();
if (raw.ToLower().StartsWith("tde")) raw = raw.Substring(3);
raw = raw.Trim().Trim('-', ':', '_', '#').Trim();
if (raw.Length >= 2) raw = char.ToUpper(raw[0]) + raw.Substring(1);
return raw;
}
public static string Striptease(HtmlNode raw)
{
{
var rm = raw.SelectNodes(@"//script");
if (rm != null && rm.Any())
{
var copy = HtmlNode.CreateNode($"<{raw.Name}></{raw.Name}>");
copy.CopyFrom(raw);
raw = copy;
rm = raw.SelectNodes(@"//script");
if (rm != null) foreach (var e in rm) e.Remove();
}
}
{
var rm = raw.SelectNodes(@"//meta");
if (rm != null && rm.Any())
{
var copy = HtmlNode.CreateNode($"<{raw.Name}></{raw.Name}>");
copy.CopyFrom(raw);
raw = copy;
rm = raw.SelectNodes(@"//meta");
if (rm != null) foreach (var e in rm) e.Remove();
}
}
return Striptease(HtmlEntity.DeEntitize(raw.InnerText));
}
public static string Striptease(string raw)
{
var r = string.Join(string.Empty,
raw
.ToCharArray()
.Select(c => char.IsWhiteSpace(c) ? ' ' : c)
.Where(c => char.IsLetterOrDigit(c) ||char.IsWhiteSpace(c))
.Select(c => char.ToLower(c))).Trim();
return r;
}
public static string CombineAuthority(string url, string suffix)
{
var left = new Uri(url).GetLeftPart(UriPartial.Authority);
if (!left.EndsWith("/")) left = left + "/";
if (suffix.StartsWith("/")) suffix = suffix.TrimStart('/');
return left + suffix;
}
public static string CombineUri(string uri1, string uri2)
{
if (uri1.Contains("/")) uri1 = uri1.Substring(0, uri1.LastIndexOf("/"));
uri1 = uri1.TrimEnd('/');
uri2 = uri2.TrimStart('/');
return string.Format("{0}/{1}", uri1, uri2);
}
}

18
Scraper/Hyperlinq.cs Normal file
View File

@@ -0,0 +1,18 @@
namespace WordpressEboobScraper2.Scraper;
public class Hyperlinq
{
private readonly Action action;
private readonly string title;
public Hyperlinq(Action action, string title)
{
this.action = action;
this.title = title;
}
public void Dump()
{
Console.Out.WriteLine(this.title);
}
}

7
Scraper/MainMode.cs Normal file
View File

@@ -0,0 +1,7 @@
namespace WordpressEboobScraper2.Scraper;
public enum MainMode
{
Generate,
Verify,
}

8
Scraper/ProcessResult.cs Normal file
View File

@@ -0,0 +1,8 @@
namespace WordpressEboobScraper2.Scraper;
public enum ProcessResult
{
SuccessNormal,
ReachedEnd,
SkipChapter,
}

1351
Scraper/Scraper.cs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
namespace WordpressEboobScraper2.Scraper;
public class SerializableCacheEntry
{
public string URL;
public GZippedString Content;
}

12
Scraper/Site.cs Normal file
View File

@@ -0,0 +1,12 @@
namespace WordpressEboobScraper2.Scraper;
public enum Site
{
Wordpress,
WuxiaWorld,
Royalroad,
WP = Wordpress,
WW = WuxiaWorld,
RR = Royalroad,
}

View File

@@ -0,0 +1,8 @@
using System.Text;
namespace WordpressEboobScraper2.Scraper;
public class Utf8StringWriter : StringWriter
{
public override Encoding Encoding { get { return Encoding.UTF8; } }
}