Skip to main content
Kofax

How to Retrieve the Message Body Text in a KIC Script

3022189

Question / Problem: 

In the KIC Developer's Guide, "IDocumentScript2 Interface Definition", section, it states:

"The messageBody parameter contains all selected representations of the message body (original, PDF, TIF)."

However, there are no good code samples showing how to retrieve the text of the messageBody. 

Answer / Solution: 

You can retrieve the original content, which is stored as HTML, then perform a conversion to text.

First, in the KIC Plugin in Kofax Capture Administration, edit the Destination then, in the Message content options, enable "Include original content".

Destination.png

This will make the messageBody available in HTML format, (through content.html file) in both a Document script,  (ManageMessageFiles/BeforeMessageImport functions), as well as a Batch ReRoute script.

Here is a sample of a Batch ReRoute script that retrieves the HTML formatted message, passes it to a function that converts the HTML to straight text, then assigns the text to an Index Field:

using System;
using System.Linq;
using System.Text;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using Kofax.KCS.ImportConnector.Messages;
using Kofax.KCS.ImportConnector.Scripting;


namespace Kofax.KCS.ImportConnector.ScriptingSample
{

    public class SampleReRouteScript : IReRouteScript
    {
        #region ReRoutingScript Members

        /// <summary>
        /// <param name="message">The complete message, can be modified.</param>
        /// <param name="extension">Reserved for future use</param>
        /// </summary>
        public eMessageScriptCode ReRoute(Message message, object extension)
        {
            eMessageScriptCode scriptStatus = eMessageScriptCode.Continue;

            if (message.BodyAttachments != null)
            {
                foreach (Attachment att in message.BodyAttachments)
                {
                    if (att.IsBody && att.FileName.ToLower() == "content.html")
                    {
                        string html = System.IO.File.ReadAllText(att.FilePath);
                        string output = ConvertHTMLToPlainText(html);
                        message.Fields.Add("KfxCustomIndexValue2", output);
                    }
                }
            }

            return scriptStatus;
        }

        // This function converts HTML code to plain text
        public string ConvertHTMLToPlainText(string HTMLCode)
        {
            HTMLCode = HTMLCode.Replace("\n", " ");
            HTMLCode = HTMLCode.Replace("\t", " ");
            HTMLCode = Regex.Replace(HTMLCode, "\\s+", " ");
            HTMLCode = Regex.Replace(HTMLCode, "<head.*?</head>", ""
                                , RegexOptions.IgnoreCase | RegexOptions.Singleline);

            HTMLCode = Regex.Replace(HTMLCode, "<script.*?</script>", ""
              , RegexOptions.IgnoreCase | RegexOptions.Singleline);
            StringBuilder sbHTML = new StringBuilder(HTMLCode);
            string[] OldWords = {"&nbsp;", "&amp;", "&quot;", "&lt;",
               "&gt;", "&reg;", "&copy;", "&bull;", "&trade;"};
            string[] NewWords = { " ", "&", "\"", "<", ">", "®", "©", "•", "â„¢" };
            for (int i = 0; i < OldWords.Length; i++)
            {
                sbHTML.Replace(OldWords[i], NewWords[i]);
            }

            sbHTML.Replace("<br>", "\n<br>");
            sbHTML.Replace("<br ", "\n<br ");
            sbHTML.Replace("<p ", "\n<p ");

            return System.Text.RegularExpressions.Regex.Replace(
              sbHTML.ToString(), "<[^>]*>", "");
        }

        #endregion
    }
}

 

 

Applies to:  

Product Version
   

 

 

  • Was this article helpful?