﻿using Cyotek.GhostScript.PdfConversion;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Tesseract;

namespace NilaOCR
{
    public class GutschriftDocumentModel
    {


        public string PDFFilePath { get; set; }
        public OCRStatus OCRRecognition { get; set; }
        public string OrderNumber { get; set; }
        public string InvoiceNumber { get; set; }
        public string DocumentDate { get; set; }
        public string SAPNumber { get; set; }
        public string CostCenter { get; set; }
        public string SummeNetto { get; set; }
        public string InitInvNumber { get; set; }
        public Boolean ValidOrderNumber { get; set; }
        public Boolean ValidInvoiceNumber { get; set; }
        public Boolean ValidDocumentDate { get; set; }
        public Boolean ValidSAPNumber { get; set; }
        public Boolean ValidCostCenter { get; set; }
        public Boolean ValidSummeNetto { get; set; }

        public Boolean ValidDocument { get; set; }

        public int pageCount { get; set; }


        

        public GutschriftDocumentModel()
        {

        }

        public GutschriftDocumentModel(System.Data.DataRow row)
        {
            PDFFilePath = row["SourceFilePath"].ToString();
            InvoiceNumber = row["InvNummer"].ToString();
            InitInvNumber = row["ID"].ToString();
            SAPNumber = row["SAPNummer"].ToString();
            DocumentDate = row["Datum"].ToString();
            OrderNumber = row["Bestellnummer"].ToString();
            CostCenter = row["Kostenstelle"].ToString();
            SummeNetto = row["NettoSumme"].ToString();
            ValidDocument = true;
            pageCount = 1;
            Validate();
        }

        public void Validate()
        {
            this.ValidDocument = true;

            if (this.OrderNumber != null && Regex.IsMatch(this.OrderNumber, "[0-9]{" + OCRSettings.settings.numDigitsOrder.ToString() + "}"))
            {
                this.ValidOrderNumber = true;
            }
            else
            {
                this.ValidOrderNumber = false;
                this.ValidDocument = false;
            }

           if (this.InvoiceNumber != null && Regex.IsMatch(this.InvoiceNumber,"[0-9]{" + OCRSettings.settings.numDigitsInvoice.ToString() + "}")) {
               this.ValidInvoiceNumber = true;
            }
            else
            {
                this.ValidInvoiceNumber = false;
                this.ValidDocument = false;
            }

           if (this.SAPNumber!=null && Regex.IsMatch(this.SAPNumber, "[0-9]{" + OCRSettings.settings.numDigitsSAP.ToString() + "}"))
           {
               this.ValidSAPNumber = true;
           }
           else
           {
               this.ValidSAPNumber = false;
               this.ValidDocument = false;
           }

           if (this.CostCenter != null && Regex.IsMatch(this.CostCenter, "[0-9]{" + OCRSettings.settings.numDigitsCostCenter.ToString() + "}")

               && OCRSettings.settings.CostCenters.Contains(this.CostCenter))
           {
               this.ValidCostCenter = true;
           }
           else
           {
               if (this.CostCenter != null)
               {
                   this.CostCenter = this.CostCenter.Replace("I", "1");
                   this.CostCenter = this.CostCenter.Replace("l", "1");
                   this.CostCenter = this.CostCenter.Replace("i", "1");
                   this.CostCenter = this.CostCenter.Replace("o", "0");
                   this.CostCenter = this.CostCenter.Replace("O", "0");
                   this.CostCenter = this.CostCenter.Replace("z", "2");
                   this.CostCenter = this.CostCenter.Replace("Z", "2");

               }
               if (this.CostCenter != null && Regex.IsMatch(this.CostCenter, "[0-9]{" + OCRSettings.settings.numDigitsCostCenter.ToString() + "}") && OCRSettings.settings.CostCenters.Contains(this.CostCenter))
               {
                   this.ValidCostCenter = true;
               }
               else
               {
                    this.ValidCostCenter = false;
                    this.ValidDocument = false;
               }

              
           }

            

            CultureInfo cultureInfo = CultureInfo.CreateSpecificCulture("de-DE");
            DateTime dt = new DateTime() ;

            if (DateTime.TryParseExact(this.DocumentDate, "dd.MM.yyyy", cultureInfo, System.Globalization.DateTimeStyles.AdjustToUniversal, out dt)) 
            {
                this.ValidDocumentDate = true;
            }
            else {
                if (!DateTime.TryParseExact(this.DocumentDate, "yyyy-MM-dd", cultureInfo, DateTimeStyles.None, out dt))
                {
                    this.ValidDocumentDate = false;
                    this.ValidDocument = false;
                }
                else
                {
                    this.ValidDocumentDate = true;
                }
            }

            if (this.pageCount != 1)
            {
                this.ValidDocument = false;
            }

            if (SummeNetto != null)
            {
                    double sum = 0;
                
                if (this.SummeNetto.Contains(",") && this.SummeNetto.Contains("."))
                {
                    this.ValidSummeNetto = false;
                    this.ValidDocument = false;
                }

                else if (Double.TryParse(this.SummeNetto,NumberStyles.Number,CultureInfo.InvariantCulture,out sum))
                    {
                        if(sum > 0)
                            this.ValidSummeNetto = true;
                        else
                        {
                            this.ValidSummeNetto = false;
                            this.ValidDocument = false;
                        }
                    }
                    else
                    {
                        this.ValidSummeNetto = false;
                        this.ValidDocument = false;
                    }

            }
            else
            {
                this.ValidSummeNetto = false;
                this.ValidDocument = false;
            }

        }

        public InvoiceTempData GetInvoiceTempData()
        {
            InvoiceTempData invtmpData = new InvoiceTempData();
            invtmpData.InvNummer = this.InvoiceNumber != null ? this.InvoiceNumber : "null";
            invtmpData.Bestellnummer = this.OrderNumber != null ? this.OrderNumber : "";
            invtmpData.Datum = this.DocumentDate != null ? this.DocumentDate : "";
            invtmpData.Kostenstelle = this.CostCenter != null ? this.CostCenter : "";
            invtmpData.SAPNummer = this.SAPNumber != null ? this.SAPNumber : "null";
            invtmpData.NettoSumme = this.SummeNetto != null ? this.SummeNetto : "0";
            invtmpData.SourcePath = this.PDFFilePath;
            String[] dStr = invtmpData.Datum.Split('.');
            int[] dInt = new int[3];
            if (dStr.Length >= 3)
            {
                try
                {
                    dInt[0] = Int32.Parse(dStr[0].Replace("o", "0").Replace("O", "0"));
                    dInt[1] = Int32.Parse(dStr[1].Replace("o", "0").Replace("O", "0"));
                    dInt[2] = Int32.Parse(dStr[2].Replace("o", "0").Replace("O", "0"));
                    invtmpData.DatumAccessFormat = new DateTime(dInt[2], dInt[1], dInt[0]).ToString("yyyy-MM-dd");
                }
                catch(Exception ex)
                {
                    invtmpData.DatumAccessFormat = invtmpData.Datum;
                }
            }
            else
            {
                invtmpData.DatumAccessFormat = invtmpData.Datum;
            }

            /* adding discount from configuration list */
            invtmpData.Discount = 0;
            if (OCRSettings.settings.discounts.ContainsKey(invtmpData.Bestellnummer))
            {
                invtmpData.Discount = OCRSettings.settings.discounts[invtmpData.Bestellnummer];
            } 

            
 
            return invtmpData;
            

            
        }

        public InvoiceData GetInvoiceData()
        {
            InvoiceData invtmpData = new InvoiceData();
            invtmpData.InvNummer = this.InvoiceNumber;
            invtmpData.Bestellnummer = this.OrderNumber;
            invtmpData.Datum = this.DocumentDate;
            invtmpData.Kostenstelle = this.CostCenter;
            invtmpData.SAPNummer = this.SAPNumber;

            if(this.SummeNetto.Contains(",") && !this.SummeNetto.Contains("."))
            {
                this.SummeNetto = this.SummeNetto.Replace(",", ".");
            }
            

            invtmpData.NettoSumme = this.SummeNetto;
            invtmpData.NettoSummeDoc = this.SummeNetto;
            
            
            String[] dStr = this.DocumentDate.Split(".-".ToCharArray());
            int[] dInt = new int[3];
            dInt[0] = Int32.Parse(dStr[0]);
            dInt[1] = Int32.Parse(dStr[1]);
            dInt[2] = Int32.Parse(dStr[2]);
            try
            {
                invtmpData.DatumAccessFormat = new DateTime(dInt[2], dInt[1], dInt[0]).ToString("yyyy-MM-dd");
            }
            catch(Exception ex)
            {
                invtmpData.DatumAccessFormat = this.DocumentDate;
            }


            /* adding discount from configuration list */
            invtmpData.Discount = 0;
            if (OCRSettings.settings.discounts.ContainsKey(invtmpData.Bestellnummer))
            {
                invtmpData.Discount = OCRSettings.settings.discounts[invtmpData.Bestellnummer];

                float net1 = float.Parse(invtmpData.NettoSummeDoc,NumberStyles.Number,CultureInfo.InvariantCulture);
              
                invtmpData.NettoSumme = (net1 * (1 - invtmpData.Discount / 100)).ToString();

            } 

            return invtmpData;



        }


        public void Process(String filePath)
        {
            
            this.PDFFilePath = filePath;
            string pdfname = Path.GetFileName(filePath);
            string pdfnamewithoutextension = Path.GetFileNameWithoutExtension(filePath);

            Pdf2Image pdfimage = new Pdf2Image(filePath);
            pageCount = pdfimage.PageCount;

            if (pageCount != 1)
            {
                OCRRecognition = OCRStatus.PageError;
                return;
            }

            string outputimagename = OCRSettings.settings.OutputDocPath + "\\" + pdfnamewithoutextension + ".png";
                        pdfimage.ConvertPdfPageToImage(outputimagename, 1);

                        if (File.Exists(outputimagename))
                        {
                            using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.TesseractAndCube))
                            {
                                using (var img = Pix.LoadFromFile(outputimagename))
                                {
                                     using (var page = engine.Process(img))
                                    {
                                        String text = page.GetText();

                                     
                                        text = CleanText(text);
                                        string bestell = GetBestellNumber(text);
                                       
                                        if (bestell != null)
                                        {
                                            string[] tokens = bestell.Trim().Split(":;".ToCharArray());
                                            if (tokens.Length == 2)
                                            {
                                                if (trimInner(tokens[1]).Length == OCRSettings.settings.numDigitsOrder)
                                                {
                                                    this.OrderNumber = trimInner(tokens[1]);
                                                }

                                            }
                                        }

                                        string rechnungs = GetRechnungsNumber(text);
                                        if (rechnungs != null)
                                        {
                                            string[] tokens = rechnungs.Trim().Split(":;".ToCharArray());
                                            if (tokens.Length == 2)
                                            {
                                                if (trimInner(tokens[1]).Length == OCRSettings.settings.numDigitsInvoice)
                                                {
                                                    this.InvoiceNumber = trimInner(tokens[1]);
                                                    this.InitInvNumber = trimInner(tokens[1]);
                                                }

                                            }
                                        }

                                        string rechnungsdate = GetRechnungsDatum(text);
                                        if (rechnungsdate != null)
                                        {
                                            string[] tokens = rechnungsdate.Trim().Split(":;".ToCharArray());
                                            if (tokens.Length == 2)
                                            {
                                                //if (trimInner(tokens[1]).Length == frmMain.settings.num)
                                                //{
                                                this.DocumentDate = tokens[1];
                                                this.DocumentDate = this.DocumentDate.Replace(" ", "");
                                                this.DocumentDate = this.DocumentDate.Replace(",", ".");
                                                this.DocumentDate = this.DocumentDate.Replace("o", "0");
                                                this.DocumentDate = this.DocumentDate.Replace("O", "0");

                                                //}

                                            }
                                        }

                                        string sap = GetSAPNumber(text);
                                        if (sap != null)
                                        {
                                            ProcessSAPNumber(sap);
                                        }

                                        if (this.SAPNumber == null || this.SAPNumber.Equals(""))
                                        {
                                            sap = GetSAPNumber2(text);
                                            if(sap != null)
                                                ProcessSAPNumber(sap);
                                        }

                                        string sum = GetSummeNetto(text);

                                        if (sum != null)
                                        {
                                            string[] tokens = sum.Split(" ".ToCharArray());
                                            this.SummeNetto = tokens[tokens.Length - 1];
                                            this.SummeNetto = this.SummeNetto.Replace(".",""); // remove Thousand Separator in German
                                            this.SummeNetto = this.SummeNetto.Replace(",", "."); // Decimal separator update
                                        }
                                       // string textpath = OCRSettings.settings.OutputDocPath + "\\" + pdfnamewithoutextension + ".txt";
                                       // File.WriteAllText(textpath, text);

                                    }
                                }
                            }
                        }
                        try
                        {
                            File.Delete(outputimagename);
                        }
            catch(Exception ex)
                        {

                        }

                        Validate();
           
        }

        private void ProcessSAPNumber(string sap)
        {
            string[] saplines = sap.Split("\n".ToCharArray());

            if (saplines.Length >= 3)
            {
                string[] tokens = saplines[1].Split(" ".ToCharArray());
                if (tokens[0].Trim().Length == OCRSettings.settings.numDigitsSAP)
                {
                    this.SAPNumber = tokens[0].Trim().Replace("o", "0").Replace("O", "0");
                }
                else
                {
                    if (saplines[1].Replace(" ", "").Trim().Length > OCRSettings.settings.numDigitsSAP)
                    {
                        string saps = saplines[1].Replace(" ", "").Trim().Substring(0, OCRSettings.settings.numDigitsSAP);
                        int a = 0;
                        if (int.TryParse(saps.Substring(0, 1), out a))
                        {
                            this.SAPNumber = saps;
                        }
                    }

                }

                tokens = saplines[2].Split("-—".ToCharArray());
                if (trimInner(tokens[0]).Length == OCRSettings.settings.numDigitsCostCenter)
                {
                    if (this.CostCenter == null || this.CostCenter.Equals(""))
                    {
                        this.CostCenter = trimInner(tokens[0]).Replace("5s","56").Replace("s6","56").Replace("o","0").Replace("s3","53");
                    }
                }
                else
                {
                    tokens = saplines[3].Split("-—".ToCharArray());
                    if (trimInner(tokens[0]).Length == OCRSettings.settings.numDigitsCostCenter)
                    {
                        if (this.CostCenter == null || this.CostCenter.Equals(""))
                            this.CostCenter = trimInner(tokens[0]).Replace("5s", "56").Replace("s6", "56").Replace("o", "0").Replace("s3", "53");
                    }
                    else
                    {
                        tokens = saplines[4].Split("-—".ToCharArray());
                        if (trimInner(tokens[0]).Length == OCRSettings.settings.numDigitsCostCenter)
                        {
                            if (this.CostCenter == null || this.CostCenter.Equals(""))
                                this.CostCenter = trimInner(tokens[0]).Replace("5s", "56").Replace("s6", "56").Replace("o", "0").Replace("s3", "53");
                        }
                    }
                }
            }
        }

        private string GetBelegNumber(string text)
        {
            String lcase = text.ToLowerInvariant();
            string[] parray = { "beleg—nr", "beleg-nr", "be1eg—nr", "be1eg-nr" };
            int index = findIndexOf(parray, lcase);
            if (index >= 0)
            {
                string line = "";
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        return line;
                    }
                    line += ch;
                }
                return line;
            }

            return null;
        }

        private string GetSummeNetto(string text)
        {
            String lcase = text.ToLowerInvariant();
            string[] parray = { "summe netto", "summe nett0", "sumrne netto", "surr1rne netto", "summe nelto", "snmme netto", "netto", "nctto" };
            lcase = CleanText(lcase);

            int index = findIndexOf(parray, lcase);
            if (index >= 0)
            {
                string line = "";
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        return line.Replace(" ,",",").Replace(", ",",");
                    }
                    line += ch;
                }
                return line.Replace(" ,", ",").Replace(", ", ",");
            }

            return null;
        }

        private string CleanText(string text)
        {
            string result = text;
            while (result.IndexOf("\n\n") >= 0)
            {
                result = result.Replace("\n\n", "\n");
            }
            // double space to single space
            while (result.IndexOf("  ") >= 0)
            {
                result = result.Replace("  ", " ");
            }
            return result.Trim();
        }

        private string trimInner(string text)
        {
            text = text.Trim();
            string result = "";
            foreach (char ch in text.ToCharArray())
            {
                if (!ch.Equals(' '))
                {
                    result += ch;
                }
            }
            return result;
        }

        private string GetSAPNumber2(string text)
        {
            String lcase = text.ToLowerInvariant();
            int index = -1;
            if (Regex.IsMatch(lcase, @"\bzei\w+t"))
            {
                Match m = Regex.Match(lcase, @"\bzei\w+t");
                index = m.Index;
            }
            if (index == -1)
            {
                string[] parray = { "zeitpunkt" };
                // if (lcase.Contains("blatt") || lcase.Contains("b1att"))
                // {
                index = findIndexOf(parray, lcase); //lcase.IndexOf("blatt");
            }
            if (index >= 0)
            {
                string line = "";
                int lcount = 0;
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        if (lcount == 4)
                            return line;
                        lcount++;
                    }
                    line += ch;
                }
                return line;
            }
            // }
            return null;
        }

        private string GetSAPNumber(string text)
        {
            String lcase = text.ToLowerInvariant();
            string[] parray = { "blatt", "b1att", "biatt", "b1a1:t" , "hlatt","b1at:","bl.att","bl a.tt","b1a.tt","blat1","—blat"};
            // if (lcase.Contains("blatt") || lcase.Contains("b1att"))
            // {
            int index = findIndexOf(parray, lcase); //lcase.IndexOf("blatt");
            if (index >= 0)
            {
                string line = "";
                int lcount = 0;
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        if (lcount == 4)
                            return line;
                        lcount++;
                    }
                    line += ch;
                }
                return line;
            }
            // }
            return null;
        }

        private string GetRechnungsNumber(string text)
        {
            String lcase = text.ToLowerInvariant();
            int index = -1;
            if (Regex.IsMatch(lcase, @"\br\w+[^\n]\Wnr\W:"))
            {
                Match m = Regex.Match(lcase, @"\br\w+[^\n]\Wnr\W:");
                index = m.Index;
            }
            if (index == -1)
            {
                string[] parray = { "rechnungs-", "rechnungs—", "rechnungsn", "rcchnungs—n", "rcchnungs-n", "rechnungs~nr", "rcch11ungs—n", "rec1mungs—nr", "rechnur1gs—nr", "rech.r1ungs—n", "rcc11nungs—nr", "rcchnungs—n", "rech11ungs—n", "rechnut1gs—n", "rec11nungs—n", "rechr1ungs—n", "rechnu11gs—n", "re-chnungs—n", "r0chnungs—n", "r‘chnungs~n" };

                //if (lcase.Contains("rechnungs-") || lcase.Contains("rechnungs—"))
                //{
                index = findIndexOf(parray, lcase); //lcase.IndexOf("rechnungs-");
            }
            if (index >= 0)
            {
                string line = "";
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        return line;
                    }
                    line += ch;
                }
                return line;
            }
            //}
            return null;
        }

        private int findIndexOf(string[] parray, string text)
        {
            foreach (string word in parray)
            {
                if (text.Contains(word))
                    return text.IndexOf(word);
            }
            return -1;
        }

        private string GetRechnungsDatum(string text)
        {
            String lcase = text.ToLowerInvariant();
            int index = -1;
            if (Regex.IsMatch(lcase, @"\br\w+m:"))
            {
                Match m = Regex.Match(lcase, @"\br\w+m:");
                index = m.Index;
            }
            if (index == -1)
            {

                string[] parray = { "rechnungsda", "rechnung sdatum", "rechnung sda", "reclmungsdatum", "rcchnungsdatum", "rcchnungsda", "rec11nu11gsda" };
                 index = findIndexOf(parray, lcase);
            }
            if (index >= 0)
            {
               // int index = lcase.IndexOf("rechnungsda");
                string line = "";
                while (index < lcase.Length)
                {
                    String ch = lcase.Substring(index++, 1);
                    if (ch == Environment.NewLine || ch.Equals("\n"))
                    {
                        return line;
                    }
                    line += ch;
                }
                return line;
            }
            return null;
        }

        private string GetBestellNumber(string text)
        {
            String lcase = text.ToLowerInvariant();
            int index = -1;
            if (Regex.IsMatch(lcase, @"\bb\w+[^\n]\Wnr"))
            {
                Match m = Regex.Match(lcase, @"\bb\w+[^\n]\Wnr");
                index = m.Index;
            }

            if (index == -1)
            {
                string[] parray = { "besten", "besteii", "besteli", "beste11—n", "bcs1e11-n", "bcste11—nr", "beste1l—n", "bcstell-n", "bcste1l—n", "bestel1—n", "bes1e11~n", "be8te11—n", "beste114n", "bcste11~n", "bcste11«n", "besie11—n", "be.s‘te]1—n", "bes1e114nr", "bestc11—n", "bes1e11—n", "besrc11—n", "bcstcll-n", "besle11—nr", "besi;e11vnr", "bc.ste11—nr", "bcs1e11—nr", "beste-11-n", "be:ste11", "b0sle11—n", "bes1e11anr", "bestell", "best.e11", "besteil", "beste1l", "beste11", "bestel1", "bestei1", "beste1i" };
                // if(lcase.Contains("bestell"))
                //  {
                index = findIndexOf(parray, lcase); //lcase.IndexOf("bestell");
            }
            string line = "";
            while (index < lcase.Length)
            {
                String ch = lcase.Substring(index++, 1);
                if (ch == Environment.NewLine || ch.Equals("\n"))
                {
                    if (line.Contains(":") || line.Contains(";"))
                        return line.Trim();
                    else
                    {
                        return null;
                    }
                }
                line += ch;
            }
            return line;
            // }
            return null;
        }

        public int ID { get; set; }
    }

    public enum OCRStatus
    {
        Good = 1,
        FieldError = 2,
        FatalError = 3,
        PageError = 4
    }
}
