diff --git a/Scraper/Extensions.cs b/Scraper/Extensions.cs index 0ce2219..3ed6c1c 100644 --- a/Scraper/Extensions.cs +++ b/Scraper/Extensions.cs @@ -6,4 +6,16 @@ public static class Extensions { Console.Out.WriteLine(str); } + + public static byte[] ReadAllToByteArray(this Stream stream) + { + var bytes = new List(); + + int b; + + // -1 is a special value that mark the end of the stream + while ((b = stream.ReadByte()) != -1) bytes.Add((byte)b); + + return bytes.ToArray(); + } } \ No newline at end of file diff --git a/Scraper/GZippedString.cs b/Scraper/GZippedString.cs index e17c35d..8d5e806 100644 --- a/Scraper/GZippedString.cs +++ b/Scraper/GZippedString.cs @@ -24,31 +24,43 @@ public class GZippedString : IXmlSerializable private string CompressString(string text) { byte[] buffer = Encoding.UTF8.GetBytes(text); + var memoryStream = new MemoryStream(); using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true)) gZipStream.Write(buffer, 0, buffer.Length); + memoryStream.Position = 0; var compressedData = new byte[memoryStream.Length]; memoryStream.Read(compressedData, 0, compressedData.Length); + var gZipBuffer = new byte[compressedData.Length + 4]; - Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length); - Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4); + Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length); // bytes 4.. are the gzip content + + Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4); // bytes 0..3 are the length of the uncompressed string + return Convert.ToBase64String(gZipBuffer); } private string DecompressString(string compressedText) { byte[] gZipBuffer = Convert.FromBase64String(compressedText); - using (var memoryStream = new MemoryStream()) + + using var memoryStream = new MemoryStream(); + + int dataLength = BitConverter.ToInt32(gZipBuffer[..4], 0); + + memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4); + memoryStream.Position = 0; + + byte[] buffer = null; + + using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress)) { - int dataLength = BitConverter.ToInt32(gZipBuffer, 0); - memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4); - var buffer = new byte[dataLength]; - memoryStream.Position = 0; - using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress)) - gZipStream.Read(buffer, 0, buffer.Length); - return Encoding.UTF8.GetString(buffer); + buffer = gZipStream.ReadAllToByteArray(); + if (dataLength != buffer.Length) throw new Exception("not enough data in gzip"); } + + return Encoding.UTF8.GetString(buffer); } public static implicit operator GZippedString(string v) => new GZippedString{Value = v};