From bcbc2b1ccd59398ea67321d5d6a83b4c16874034 Mon Sep 17 00:00:00 2001 From: Ahmed Mubarak Date: Wed, 24 Nov 2021 15:56:10 +0200 Subject: [PATCH 1/4] Support VM Array Case --- PaheScrapper/ScrapperMethods.cs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/PaheScrapper/ScrapperMethods.cs b/PaheScrapper/ScrapperMethods.cs index 960ee6c..dc79027 100644 --- a/PaheScrapper/ScrapperMethods.cs +++ b/PaheScrapper/ScrapperMethods.cs @@ -475,14 +475,36 @@ public static VMMovieLookup DecodeDetailsVM(HtmlDocument document) startIndex = documentHtml.IndexOf(startPattern, StringComparison.Ordinal); documentHtml = documentHtml.Substring(startIndex + startPattern.Length, documentHtml.Length - startPattern.Length - startIndex); endIndex = documentHtml.IndexOf(endPattern, StringComparison.Ordinal) + 1; + + /*Array Case*/ + bool arrayOfString = false; + if (endIndex == 0) + { + endPattern = "]; function"; + endIndex = documentHtml.IndexOf(endPattern, StringComparison.Ordinal) + 1; + arrayOfString = true; + } + documentHtml = documentHtml.Substring(0, endIndex); if (string.IsNullOrEmpty(documentHtml)) return new VMMovieLookup(); - JObject linksObject = JObject.Parse(documentHtml); - IEnumerable linksTokens = linksObject.Properties().Select(l => l.Value).ToArray(); - string[] linksArray = linksTokens.Select(l => l.Value()).ToArray(); + string[] linksArray = null; + + if (arrayOfString) + { + /*Array Case*/ + JArray linksInArrayStruct = new JArray(documentHtml); + linksArray = linksInArrayStruct.Children().Select(l=>l.Value()).ToArray(); + } + else + { + /*Object Case*/ + JObject linksObject = JObject.Parse(documentHtml); + IEnumerable linksTokens = linksObject.Properties().Select(l => l.Value).ToArray(); + linksArray = linksTokens.Select(l => l.Value()).ToArray(); + } //Movie Page Links Buttons documentHtml = decodedHtml; From 11b832bf19371812c710a690d3143268b5bc8076 Mon Sep 17 00:00:00 2001 From: Ahmed Mubarak Date: Wed, 24 Nov 2021 18:18:40 +0200 Subject: [PATCH 2/4] Fix Empty VM Array Case --- PaheScrapper/ScrapperMethods.cs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/PaheScrapper/ScrapperMethods.cs b/PaheScrapper/ScrapperMethods.cs index dc79027..752225b 100644 --- a/PaheScrapper/ScrapperMethods.cs +++ b/PaheScrapper/ScrapperMethods.cs @@ -495,8 +495,11 @@ public static VMMovieLookup DecodeDetailsVM(HtmlDocument document) if (arrayOfString) { /*Array Case*/ - JArray linksInArrayStruct = new JArray(documentHtml); - linksArray = linksInArrayStruct.Children().Select(l=>l.Value()).ToArray(); + if (documentHtml != "[]") + { + JArray linksInArrayStruct = new JArray(documentHtml); + linksArray = linksInArrayStruct.Children().Select(l => l.Value()).ToArray(); + } } else { @@ -506,6 +509,10 @@ public static VMMovieLookup DecodeDetailsVM(HtmlDocument document) linksArray = linksTokens.Select(l => l.Value()).ToArray(); } + /*Empty Array => Empty Lookup*/ + if (linksArray == null) + return new VMMovieLookup(); + //Movie Page Links Buttons documentHtml = decodedHtml; startPattern = "if (counter== 0){"; From 2fd874b6858bd35ec761cde48052cef538a41d87 Mon Sep 17 00:00:00 2001 From: Ahmed Mubarak Date: Wed, 24 Nov 2021 20:36:12 +0200 Subject: [PATCH 3/4] Fix Persist Function --- PaheScrapper/App.config | 6 +++--- .../Properties/Configuration.Designer.cs | 6 +++--- .../Properties/Configuration.settings | 6 +++--- PaheScrapper/ScrapperManager.cs | 21 +++++++++++++++---- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/PaheScrapper/App.config b/PaheScrapper/App.config index 07381d8..a01da10 100644 --- a/PaheScrapper/App.config +++ b/PaheScrapper/App.config @@ -26,10 +26,10 @@ 8 - 5 + 500 - 1 + 100 True @@ -44,7 +44,7 @@ 100 - 50 + 10 output.json diff --git a/PaheScrapper/Properties/Configuration.Designer.cs b/PaheScrapper/Properties/Configuration.Designer.cs index 1bb1de1..a0bb4b1 100644 --- a/PaheScrapper/Properties/Configuration.Designer.cs +++ b/PaheScrapper/Properties/Configuration.Designer.cs @@ -85,7 +85,7 @@ public int WebDriveInstances { [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] - [global::System.Configuration.DefaultSettingValueAttribute("5")] + [global::System.Configuration.DefaultSettingValueAttribute("500")] public int HTMLSaveStateThershold { get { return ((int)(this["HTMLSaveStateThershold"])); @@ -97,7 +97,7 @@ public int HTMLSaveStateThershold { [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] - [global::System.Configuration.DefaultSettingValueAttribute("1")] + [global::System.Configuration.DefaultSettingValueAttribute("100")] public int WebDriveSaveStateThershold { get { return ((int)(this["WebDriveSaveStateThershold"])); @@ -157,7 +157,7 @@ public int HtmlRetryMaxLimit { [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] - [global::System.Configuration.DefaultSettingValueAttribute("50")] + [global::System.Configuration.DefaultSettingValueAttribute("10")] public int FailsafeStateThershold { get { return ((int)(this["FailsafeStateThershold"])); diff --git a/PaheScrapper/Properties/Configuration.settings b/PaheScrapper/Properties/Configuration.settings index 8907769..e7cd426 100644 --- a/PaheScrapper/Properties/Configuration.settings +++ b/PaheScrapper/Properties/Configuration.settings @@ -18,10 +18,10 @@ 8 - 5 + 500 - 1 + 100 True @@ -36,7 +36,7 @@ 100 - 50 + 10 output.json diff --git a/PaheScrapper/ScrapperManager.cs b/PaheScrapper/ScrapperManager.cs index c6cbe5b..33dfb6c 100644 --- a/PaheScrapper/ScrapperManager.cs +++ b/PaheScrapper/ScrapperManager.cs @@ -51,22 +51,35 @@ void BypassSurcuriRoutine() void PersistHtmlState(bool transition) { - if ((_currentPage + 1) % Configuration.Default.HTMLSaveStateThershold == 0) + if (!transition) { - if (!transition) + if ((_currentPage + 1) % Configuration.Default.HTMLSaveStateThershold == 0) + { _currentPage++; + saveState(_scrapperState); + } + } + else + { saveState(_scrapperState); } } void PersistWebDriveState(bool transition) { - if ((_currentPage + 1) % Configuration.Default.WebDriveSaveStateThershold == 0) + if (!transition) { - if (!transition) + if ((_currentPage + 1) % Configuration.Default.WebDriveSaveStateThershold == 0) + { + _currentPage++; + saveState(_scrapperState); + } + } + else + { saveState(_scrapperState); } } From 7c54d9e41ba6bd344bbfa4abe0adcd72b576c2a3 Mon Sep 17 00:00:00 2001 From: Ahmed Mubarak Date: Fri, 26 Nov 2021 16:33:33 +0200 Subject: [PATCH 4/4] New Version --- PaheScrapper/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PaheScrapper/Program.cs b/PaheScrapper/Program.cs index a8fcd2a..d8ee4fb 100644 --- a/PaheScrapper/Program.cs +++ b/PaheScrapper/Program.cs @@ -12,7 +12,7 @@ class Program { static void Main(string[] args) { - ConsoleHelper.LogInfo("Pahe Scraper - Alpha 2.4"); + ConsoleHelper.LogInfo("Pahe Scraper - Alpha 2.5"); if (args.Length == 3 && args[0] == "-d") {