Skip to content

Commit

Permalink
If we can't pull the robots information, don't break completely (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
g3rv4 authored Feb 4, 2023
1 parent 32280c3 commit 91ac0e2
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@ await Parallel.ForEachAsync(Config.Instance.Sites,
new ParallelOptions { MaxDegreeOfParallelism = Config.Instance.Sites.Length },
async (site, _) =>
{
sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt"));
try
{
sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt"));
}
catch
{
Console.WriteLine($"Ignoring {site.Host} because had issues fetching its robots data (is the site down?)");
}
}
);

Expand Down Expand Up @@ -88,6 +95,11 @@ await Parallel.ForEachAsync(Config.Instance.Sites,
return;
}
}
else
{
Console.WriteLine($"Not scraping {url} because I couldn't fetch robots data.");
return;
}

HttpResponseMessage? response = null;
try
Expand Down

0 comments on commit 91ac0e2

Please sign in to comment.