-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
6 changed files
with
200 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<?xml version="1.0" encoding="utf-8" ?> | ||
<configuration> | ||
<startup> | ||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.7.2" /> | ||
</startup> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Datalogics.PDFL; | ||
|
||
/* | ||
* Runs OCR on the document recognizing text found on its rasterized pages. | ||
* | ||
* Copyright (c) 2007-2025, Datalogics, Inc. All rights reserved. | ||
* | ||
*/ | ||
|
||
namespace OCRDocument | ||
{ | ||
class OCRDocument | ||
{ | ||
static void Main(string[] args) | ||
{ | ||
Console.WriteLine("OCRDocument Sample:"); | ||
|
||
|
||
using (Library lib = new Library()) | ||
{ | ||
Console.WriteLine("Initialized the library."); | ||
|
||
String sInput = Library.ResourceDirectory + "Sample_Input/scanned_images.pdf"; | ||
String sOutput = "OCRDocument-out.pdf"; | ||
|
||
if (args.Length > 0) | ||
sInput = args[0]; | ||
if (args.Length > 1) | ||
sOutput = args[1]; | ||
|
||
Console.WriteLine("Input file: " + sInput); | ||
Console.WriteLine("Writing output to: " + sOutput); | ||
|
||
OCRParams ocrParams = new OCRParams(); | ||
//The OCRParams.Languages parameter controls which languages the OCR engine attempts | ||
//to detect. By default the OCR engine searches for English. | ||
List<LanguageSetting> langList = new List<LanguageSetting>(); | ||
LanguageSetting languageOne = new LanguageSetting(Language.English, false); | ||
langList.Add(languageOne); | ||
|
||
//You could add additional languages for the OCR engine to detect by adding | ||
//more entries to the LanguageSetting list. | ||
|
||
//LanguageSetting languageTwo = new LanguageSetting(Language.Japanese, false); | ||
//langList.Add(languageTwo); | ||
ocrParams.Languages = langList; | ||
|
||
// If the resolution for the images in your document are not | ||
// 300 dpi, specify a default resolution here. Specifying a | ||
// correct resolution gives better results for OCR, especially | ||
// with automatic image preprocessing. | ||
// ocrParams.Resolution = 600; | ||
|
||
using (OCREngine ocrEngine = new OCREngine(ocrParams)) | ||
{ | ||
//Create a document object using the input file | ||
using (Document doc = new Document(sInput)) | ||
{ | ||
for (int numPage = 0; numPage < doc.NumPages; numPage++) | ||
{ | ||
using (Page page = doc.GetPage(numPage)) | ||
{ | ||
page.OCRPageContents(doc, ocrEngine); | ||
} | ||
} | ||
|
||
doc.Save(SaveFlags.Full, sOutput); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
58 changes: 58 additions & 0 deletions
58
OpticalCharacterRecognition/OCRDocument/OCRDocument.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> | ||
<PropertyGroup> | ||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||
<Platform Condition=" '$(Platform)' == '' ">x64</Platform> | ||
<ProjectGuid>{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}</ProjectGuid> | ||
<OutputType>Exe</OutputType> | ||
<RootNamespace>OCRDocument</RootNamespace> | ||
<AssemblyName>OCRDocument</AssemblyName> | ||
<TargetFrameworkVersion>v4.7.2</TargetFrameworkVersion> | ||
<FileAlignment>512</FileAlignment> | ||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects> | ||
<Deterministic>true</Deterministic> | ||
<NuGetPackageImportStamp> | ||
</NuGetPackageImportStamp> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' "> | ||
<PlatformTarget>x64</PlatformTarget> | ||
<DebugSymbols>true</DebugSymbols> | ||
<DebugType>full</DebugType> | ||
<Optimize>false</Optimize> | ||
<OutputPath>..\..\..\dle\build\win-x86-64\Debug\</OutputPath> | ||
<DefineConstants>DEBUG;TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' "> | ||
<PlatformTarget>x64</PlatformTarget> | ||
<DebugType>pdbonly</DebugType> | ||
<Optimize>false</Optimize> | ||
<OutputPath>bin\Release\</OutputPath> | ||
<DefineConstants>TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<PackageReference Include="Adobe.PDF.Library.LM.NETFramework"> | ||
<Version>18.*</Version> | ||
</PackageReference> | ||
<Reference Include="System" /> | ||
<Reference Include="System.Core" /> | ||
<Reference Include="System.Xml.Linq" /> | ||
<Reference Include="System.Data.DataSetExtensions" /> | ||
<Reference Include="Microsoft.CSharp" /> | ||
<Reference Include="System.Data" /> | ||
<Reference Include="System.Net.Http" /> | ||
<Reference Include="System.Xml" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Compile Include="OCRDocument.cs" /> | ||
<Compile Include="Properties\AssemblyInfo.cs" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<None Include="App.config" /> | ||
</ItemGroup> | ||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 16 | ||
VisualStudioVersion = 16.0.33328.57 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OCRDocument", "OCRDocument.csproj", "{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|x64 = Debug|x64 | ||
Release|x64 = Release|x64 | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}.Debug|x64.ActiveCfg = Debug|x64 | ||
{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}.Debug|x64.Build.0 = Debug|x64 | ||
{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}.Release|x64.ActiveCfg = Release|x64 | ||
{C9DD37F3-545F-4346-8EF2-FAE2DD20FDCF}.Release|x64.Build.0 = Release|x64 | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {CEA60573-4A7F-49A3-8EC5-6DCC54E2E30B} | ||
EndGlobalSection | ||
EndGlobal |
33 changes: 33 additions & 0 deletions
33
OpticalCharacterRecognition/OCRDocument/Properties/AssemblyInfo.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
using System.Reflection; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
|
||
// General Information about an assembly is controlled through the following | ||
// set of attributes. Change these attribute values to modify the information | ||
// associated with an assembly. | ||
[assembly: AssemblyTitle("OCRDocument")] | ||
[assembly: AssemblyDescription("")] | ||
[assembly: AssemblyConfiguration("")] | ||
[assembly: AssemblyCompany("Datalogics, Inc.")] | ||
[assembly: AssemblyProduct("OCRDocument")] | ||
[assembly: AssemblyCopyright("Copyright © Datalogics 2019-2025")] | ||
[assembly: AssemblyTrademark("")] | ||
[assembly: AssemblyCulture("")] | ||
|
||
// Setting ComVisible to false makes the types in this assembly not visible | ||
// to COM components. If you need to access a type in this assembly from | ||
// COM, set the ComVisible attribute to true on that type. | ||
[assembly: ComVisible(false)] | ||
|
||
// The following GUID is for the ID of the typelib if this project is exposed to COM | ||
[assembly: Guid("a1a2f184-6250-4843-8d6b-3a72776dd27d")] | ||
|
||
// Version information for an assembly consists of the following four values: | ||
// | ||
// Major Version | ||
// Minor Version | ||
// Build Number | ||
// Revision | ||
// | ||
[assembly: AssemblyVersion("1.0.0.0")] | ||
[assembly: AssemblyFileVersion("1.0.0.0")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters