Skip to content

Commit

Permalink
Deserialize chat json files without relying on the file extension (#858)
Browse files Browse the repository at this point in the history
* Deserialize chat json files without relying on the file extension
This fixes a huge issue with the chat updater that somehow went unnoticed
This also enables deserializing UTF16 BOM and UTF32 BOM files

* Thanks Rider
  • Loading branch information
ScrubN authored Oct 28, 2023
1 parent 2d64a7d commit c3db4a6
Showing 1 changed file with 69 additions and 20 deletions.
89 changes: 69 additions & 20 deletions TwitchDownloaderCore/Chat/ChatJson.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Runtime.Serialization;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Threading;
Expand Down Expand Up @@ -44,20 +45,9 @@ public static class ChatJson
AllowTrailingCommas = true
};

await using var fs = new FileStream(filePath, FileMode.Open, FileAccess.Read);
switch (Path.GetExtension(filePath).ToLower())
await using (var fs = new FileStream(filePath, FileMode.Open, FileAccess.Read))
{
case ".gz":
await using (var gs = new GZipStream(fs, CompressionMode.Decompress))
{
jsonDocument = await JsonDocument.ParseAsync(gs, deserializationOptions, cancellationToken);
}
break;
case ".json":
jsonDocument = await JsonDocument.ParseAsync(fs, deserializationOptions, cancellationToken);
break;
default:
throw new NotSupportedException(Path.GetFileName(filePath) + " is not a valid chat format");
jsonDocument = await GetJsonDocumentAsync(fs, filePath, deserializationOptions, cancellationToken);
}

if (jsonDocument.RootElement.TryGetProperty("FileInfo", out JsonElement fileInfoElement))
Expand Down Expand Up @@ -131,7 +121,66 @@ public static class ChatJson
return returnChatRoot;
}

private static async ValueTask UpgradeChatJson(ChatRoot chatRoot)
private static async Task<JsonDocument> GetJsonDocumentAsync(Stream stream, string filePath, JsonDocumentOptions deserializationOptions, CancellationToken cancellationToken = default)
{
if (!stream.CanSeek)
{
// We aren't able to verify the file type. Pretend it's JSON.
return await JsonDocument.ParseAsync(stream, deserializationOptions, cancellationToken);
}

const int RENT_LENGTH = 4;
var rentedBuffer = ArrayPool<byte>.Shared.Rent(RENT_LENGTH);
try
{
if (await stream.ReadAsync(rentedBuffer.AsMemory(0, RENT_LENGTH), cancellationToken) != RENT_LENGTH)
{
throw new EndOfStreamException($"{Path.GetFileName(filePath)} is not a valid chat format.");
}

stream.Seek(-RENT_LENGTH, SeekOrigin.Current);

// TODO: use list patterns when .NET 7+
// https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
switch (rentedBuffer[0], rentedBuffer[1], rentedBuffer[2], rentedBuffer[3])
{
case (0x1F, 0x8B, _, _): // https://docs.fileformat.com/compression/gz/#gz-file-header
{
await using var gs = new GZipStream(stream, CompressionMode.Decompress);
return await GetJsonDocumentAsync(gs, filePath, deserializationOptions, cancellationToken);
}
case (0x00, 0x00, 0xFE, 0xFF): // UTF-32 BE
case (0xFF, 0xFE, 0x00, 0x00): // UTF-32 LE
{
using var sr = new StreamReader(stream, Encoding.UTF32);
var jsonString = await sr.ReadToEndAsync();
return JsonDocument.Parse(jsonString.AsMemory(), deserializationOptions);
}
case (0xFE, 0xFF, _, _): // UTF-16 BE
case (0xFF, 0xFE, _, _): // UTF-16 LE
{
using var sr = new StreamReader(stream, Encoding.Unicode);
var jsonString = await sr.ReadToEndAsync();
return JsonDocument.Parse(jsonString.AsMemory(), deserializationOptions);
}
case (0xEF, 0xBB, 0xBF, _): // UTF-8
case ((byte)'{', _, _, _): // Starts with a '{', probably JSON
{
return await JsonDocument.ParseAsync(stream, deserializationOptions, cancellationToken);
}
default:
{
throw new NotSupportedException($"{Path.GetFileName(filePath)} is not a valid chat format.");
}
}
}
finally
{
ArrayPool<byte>.Shared.Return(rentedBuffer);
}
}

private static async Task UpgradeChatJson(ChatRoot chatRoot)
{
const int MAX_STREAM_LENGTH = 172_800; // 48 hours in seconds. https://help.twitch.tv/s/article/broadcast-guidelines
chatRoot.video ??= new Video
Expand Down Expand Up @@ -195,14 +244,14 @@ public static async Task SerializeAsync(string filePath, ChatRoot chatRoot, Chat
await JsonSerializer.SerializeAsync(fs, chatRoot, _jsonSerializerOptions, cancellationToken);
break;
case ChatCompression.Gzip:
await using (var gs = new GZipStream(fs, CompressionLevel.SmallestSize))
{
await JsonSerializer.SerializeAsync(gs, chatRoot, _jsonSerializerOptions, cancellationToken);
}
{
await using var gs = new GZipStream(fs, CompressionLevel.SmallestSize);
await JsonSerializer.SerializeAsync(gs, chatRoot, _jsonSerializerOptions, cancellationToken);
break;
}
default:
throw new NotSupportedException($"{compression} is not a supported chat compression.");
}
}
}
}
}

0 comments on commit c3db4a6

Please sign in to comment.