Sample Code: Simple Document Conversion (DirectText)
The sample function below will take as input the path to an image file as well as the path where the output document should be placed. This code will iterate through each page in an input document, OCR the page and output it to the specified format.
References needed:
Nuance.OmniPage.CSDK.ArgTypes.dll
Nuance.OmniPage.CSDK.Objects.dll
private static void DirectText(string inputFile, string outputFile)
{
//Initialize the engine
Engine.Init("CompanyName", "ProductName");
//Create a settings collection to manage OCR settings
using (SettingCollection settings = new SettingCollection())
{
//Set the recognition module to our 3-way engine which is the most accurate
settings.DefaultRecognitionModule = RECOGNITIONMODULE.RM_OMNIFONT_PLUS3W;
//Set the output format to Simple Text
settings.DTXTOutputformat = DTXTOUTPUTFORMATS.DTXT_TXTF;
//Other common outputs:
//Formatted Text - DTXT_TXTF
//PDF - DTXT_IOTPDF
//XML - DTXT_XMLCOORD
//Load the (multipage) input file
using (ImageFile file = new ImageFile(inputFile, FILEOPENMODE.IMGF_READ, IMF_FORMAT.FF_SIZE, settings))
{
//Iterate through each page in the input file
for (int iPage = 0; iPage < file.PageCount; iPage++)
{
//Load the page into memory
using (Page page = new Page(file, iPage, settings))
{
//Perform image cleanup (deskew, auto-rotate, despeckle, etc.)
page.Preprocess();
//OCR the image to the output file (append).
page.Recognize(outputFile);
}
}
}
}
}