Skip to content

Commit

Permalink
Merge pull request #6 from roofman2008/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
roofman2008 authored Nov 23, 2021
2 parents 62448a8 + 688b2cc commit 9cc346d
Show file tree
Hide file tree
Showing 13 changed files with 167 additions and 28 deletions.
18 changes: 13 additions & 5 deletions PaheScrapper/App.config
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<configSections>
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" >
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
<section name="PaheScrapper.Properties.Configuration" type="System.Configuration.ClientSettingsSection, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" allowExeDefinition="MachineToLocalUser" requirePermission="false" />
</sectionGroup>
</configSections>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
</startup>
<userSettings>
<PaheScrapper.Properties.Configuration>
Expand All @@ -26,10 +26,10 @@
<value>8</value>
</setting>
<setting name="HTMLSaveStateThershold" serializeAs="String">
<value>50</value>
<value>5</value>
</setting>
<setting name="WebDriveSaveStateThershold" serializeAs="String">
<value>10</value>
<value>1</value>
</setting>
<setting name="WebDriveRestartOnError" serializeAs="String">
<value>True</value>
Expand All @@ -51,4 +51,12 @@
</setting>
</PaheScrapper.Properties.Configuration>
</userSettings>
<runtime>
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
<dependentAssembly>
<assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.0.3.0" newVersion="4.0.3.0" />
</dependentAssembly>
</assemblyBinding>
</runtime>
</configuration>
8 changes: 8 additions & 0 deletions PaheScrapper/Helpers/ConsoleHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ public static void LogBranch(string text)
LogWriter lw = new LogWriter("Branch: " + text);
}

public static void LogTime(TimeSpan timeSpan, TimeSpan totalTimeSpan)
{
Console.BackgroundColor = ConsoleColor.DarkMagenta;
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine($"{(int)timeSpan.TotalMilliseconds} ms - {totalTimeSpan:G}");
LogWriter lw = new LogWriter("Time: " + $"{(int)timeSpan.TotalMilliseconds} ms - {totalTimeSpan:G}");
}

public static void LogStats(string text)
{
Console.BackgroundColor = ConsoleColor.DarkBlue;
Expand Down
60 changes: 60 additions & 0 deletions PaheScrapper/Helpers/StringCompressor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using System;
using System.IO;
using System.IO.Compression;
using System.Text;

namespace PaheScrapper.Helpers
{
internal static class StringCompressor
{
/// <summary>
/// Compresses the string.
/// </summary>
/// <param name="text">The text.</param>
/// <returns></returns>
public static string CompressString(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
var memoryStream = new MemoryStream();
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
{
gZipStream.Write(buffer, 0, buffer.Length);
}

memoryStream.Position = 0;

var compressedData = new byte[memoryStream.Length];
memoryStream.Read(compressedData, 0, compressedData.Length);

var gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
return Convert.ToBase64String(gZipBuffer);
}

/// <summary>
/// Decompresses the string.
/// </summary>
/// <param name="compressedText">The compressed text.</param>
/// <returns></returns>
public static string DecompressString(string compressedText)
{
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
using (var memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);

var buffer = new byte[dataLength];

memoryStream.Position = 0;
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
{
gZipStream.Read(buffer, 0, buffer.Length);
}

return Encoding.UTF8.GetString(buffer);
}
}
}
}
19 changes: 19 additions & 0 deletions PaheScrapper/Helpers/WebErrorReporter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using System.Net;
using HtmlAgilityPack;
using Newtonsoft.Json;

namespace PaheScrapper.Helpers
{
public static class WebErrorReporter
{
public static void HttpError(WebResponse webResponse)
{
LogWriter lw = new LogWriter("Http Dump: \n" + JsonConvert.SerializeObject(webResponse));
}

public static void HtmlError(HtmlDocument htmlDocument)
{
LogWriter lw = new LogWriter("Html Dump: \n" + StringCompressor.CompressString(htmlDocument.DocumentNode.InnerHtml));
}
}
}
11 changes: 11 additions & 0 deletions PaheScrapper/PaheScrapper.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,18 @@
<HintPath>..\packages\DotNetSeleniumExtras.WaitHelpers.3.11.0\lib\net45\SeleniumExtras.WaitHelpers.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Buffers, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.Buffers.4.5.1\lib\netstandard1.1\System.Buffers.dll</HintPath>
</Reference>
<Reference Include="System.Core" />
<Reference Include="System.Drawing" />
<Reference Include="System.Management" />
<Reference Include="System.Memory, Version=4.0.1.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.Memory.4.5.4\lib\netstandard1.1\System.Memory.dll</HintPath>
</Reference>
<Reference Include="System.Runtime.CompilerServices.Unsafe, Version=4.0.4.1, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>..\packages\System.Runtime.CompilerServices.Unsafe.4.5.3\lib\netstandard1.0\System.Runtime.CompilerServices.Unsafe.dll</HintPath>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
Expand All @@ -91,8 +100,10 @@
<Compile Include="Helpers\LogWriter.cs" />
<Compile Include="Helpers\MyContractResolver.cs" />
<Compile Include="Helpers\NetworkHelper.cs" />
<Compile Include="Helpers\WebErrorReporter.cs" />
<Compile Include="Helpers\ProcessHelper.cs" />
<Compile Include="Helpers\RutimeParser.cs" />
<Compile Include="Helpers\StringCompressor.cs" />
<Compile Include="Helpers\UriHelper.cs" />
<Compile Include="Helpers\VMDecoder.cs" />
<Compile Include="Helpers\WebDriverHelper.cs" />
Expand Down
16 changes: 8 additions & 8 deletions PaheScrapper/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,20 @@ static void FullScrape(string command = null)
if (command == "continue")
{
f = File.OpenText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename));
_manager = ScrapperManager.Deserialize(f.ReadToEnd());
_manager = ScrapperManager.Deserialize(StringCompressor.DecompressString(f.ReadToEnd()));
f.Close();
}
else if (command == "resync")
{
f = File.OpenText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename));
_manager = ScrapperManager.Deserialize(f.ReadToEnd());
_manager = ScrapperManager.Deserialize(StringCompressor.DecompressString(f.ReadToEnd()));
_manager.ResetState();
f.Close();
}
else if (command == "loop")
{
f = File.OpenText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename));
_manager = ScrapperManager.Deserialize(f.ReadToEnd());
_manager = ScrapperManager.Deserialize(StringCompressor.DecompressString(f.ReadToEnd()));
f.Close();
isLoop = true;
}
Expand All @@ -108,9 +108,9 @@ static void FullScrape(string command = null)
else if (command == "state")
{
f = File.OpenText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename));
_manager = ScrapperManager.Deserialize(f.ReadToEnd());
_manager = ScrapperManager.Deserialize(StringCompressor.DecompressString(f.ReadToEnd()));
f.Close();
ConsoleHelper.LogCommandHandled($"Current State = {_manager.State.ToString()}");
ConsoleHelper.LogCommandHandled($"Current State = {_manager.State}");
_manager = null;
command = null;
goto recommand;
Expand Down Expand Up @@ -164,7 +164,7 @@ static void FullScrape(string command = null)

using (var file = File.CreateText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename)))
{
file.WriteAsync(_manager.Serialize()).Wait();
file.WriteAsync(StringCompressor.CompressString(_manager.Serialize())).Wait();
file.Close();
}

Expand Down Expand Up @@ -209,7 +209,7 @@ static void FullScrape(string command = null)
using (var file =
File.CreateText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename)))
{
file.WriteAsync(_manager.Serialize()).Wait();
file.WriteAsync(StringCompressor.CompressString(_manager.Serialize())).Wait();
file.Close();
}

Expand All @@ -232,7 +232,7 @@ static void FullScrape(string command = null)
ConsoleHelper.LogStorage("Final Save Scraper State");
using (var file = File.CreateText(Path.Combine(_folderPath, Configuration.Default.ManagerStateFilename)))
{
file.WriteAsync(_manager.Serialize()).Wait();
file.WriteAsync(StringCompressor.CompressString(_manager.Serialize())).Wait();
file.Close();
}

Expand Down
4 changes: 2 additions & 2 deletions PaheScrapper/Properties/Configuration.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions PaheScrapper/Properties/Configuration.settings
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
<Value Profile="(Default)">8</Value>
</Setting>
<Setting Name="HTMLSaveStateThershold" Type="System.Int32" Scope="User">
<Value Profile="(Default)">50</Value>
<Value Profile="(Default)">5</Value>
</Setting>
<Setting Name="WebDriveSaveStateThershold" Type="System.Int32" Scope="User">
<Value Profile="(Default)">10</Value>
<Value Profile="(Default)">1</Value>
</Setting>
<Setting Name="WebDriveRestartOnError" Type="System.Boolean" Scope="User">
<Value Profile="(Default)">True</Value>
Expand Down
2 changes: 1 addition & 1 deletion PaheScrapper/ScrapperConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
public static class ScrapperConstants
{
public static string WebsiteLanding() => "https://pahe.ph/";
public static string WebsiteLandingPaging(int page) => page <= 1 ? WebsiteLanding() : $"https://pahe.ph/page/{page}/";
public static string WebsiteLandingPaging(int page) => page < 1 ? WebsiteLanding() : $"https://pahe.ph/page/{page + 1}/";
public static int HttpRequestTimeout() => 15 * 1000;
}
}
Loading

0 comments on commit 9cc346d

Please sign in to comment.