From 88ced93bb33502fec2f050aaec17d23f6c33796d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joe=20K=C3=BCng?= Date: Thu, 7 Nov 2024 11:25:17 +0100 Subject: [PATCH] Added Elaborate class for logic --- capzlog-ExtractDataFromPDF/Elaborate.cs | 41 ++++++++ capzlog-ExtractDataFromPDF/Program.cs | 50 ++++++---- .../SinglePageReader.cs | 24 ----- capzlog-ExtractDataFromPDF/models/Crew.cs | 7 ++ capzlog-ExtractDataFromPDF/models/Flight.cs | 2 + .../util/BriefingExtractor.cs | 57 ++++++----- .../util/ExtractText.cs | 2 +- .../util/FlightPlanExtractor.cs | 98 ++++++++++--------- capzlog-ExtractDataFromPDF/util/indexing.cs | 52 ++++++++-- 9 files changed, 212 insertions(+), 121 deletions(-) create mode 100644 capzlog-ExtractDataFromPDF/Elaborate.cs delete mode 100644 capzlog-ExtractDataFromPDF/SinglePageReader.cs diff --git a/capzlog-ExtractDataFromPDF/Elaborate.cs b/capzlog-ExtractDataFromPDF/Elaborate.cs new file mode 100644 index 0000000..5ad47a2 --- /dev/null +++ b/capzlog-ExtractDataFromPDF/Elaborate.cs @@ -0,0 +1,41 @@ +using capzlog_ExtractDataFromPDF.models; +using UglyToad.PdfPig; + +namespace capzlog_ExtractDataFromPDF; + +public class Elaborate +{ + private PdfDocument _pdfDocument; + public Elaborate(PdfDocument pdfDocument) + { + _pdfDocument = pdfDocument; + + } + + public Flight GetAllFlightInfo(int pageOfFlightPlan) + { + ExtractText reader = new ExtractText(); + string content = reader.GetTextFormSinglePage(_pdfDocument, pageOfFlightPlan); + + FlightPlanExtractor flightPlanExtractor = new FlightPlanExtractor(content); + Flight flight = flightPlanExtractor.ExtractFlightPlan(); + + + Indexing indexing = new Indexing(_pdfDocument); + + int indexCrewBriefing = indexing.getIndexCrewBriefing(flight.Info.FlightNumber); + if (indexCrewBriefing < 0) + { + return flight; + } + + string crewContent = reader.GetTextFormSinglePage(_pdfDocument, indexCrewBriefing); + + BriefingExtractor briefingExtractor = new BriefingExtractor(crewContent); + CrewBriefing crewBriefing = briefingExtractor.ExtractCrewBriefing(); + + flight.CrewBriefing = crewBriefing; + + return flight; + } +} \ No newline at end of file diff --git a/capzlog-ExtractDataFromPDF/Program.cs b/capzlog-ExtractDataFromPDF/Program.cs index eb73db9..2c709da 100644 --- a/capzlog-ExtractDataFromPDF/Program.cs +++ b/capzlog-ExtractDataFromPDF/Program.cs @@ -1,6 +1,6 @@  using System.Text.RegularExpressions; - +using capzlog_ExtractDataFromPDF.models; using UglyToad.PdfPig; @@ -13,27 +13,41 @@ namespace capzlog_ExtractDataFromPDF { using (var pdfDoc = PdfDocument.Open(SRC)) { - - SinglePageReader reader = new SinglePageReader(); - string content = reader.GetCrewAndFlightAssignment(pdfDoc, 89); - BriefingExtractor briefingExtractor = new BriefingExtractor(content); + Indexing indexing = new Indexing(pdfDoc); + List indexFlightPlans = indexing.GetIndexFlightPlans(); + + Console.WriteLine("Found "+indexFlightPlans.Count+" flight plans"); + + List flights = new List(); - Console.WriteLine("Crew Function: " + briefingExtractor.ExtractCrew()[0].Function); - Console.WriteLine("Business Passengers: " + briefingExtractor.ExtractPassengers().Business); - Console.WriteLine("Flight Assignment DOI: " + briefingExtractor.ExtractFlightAssignment().DOI); + Elaborate elaborate = new Elaborate(pdfDoc); + + for (int i = 0; i < indexFlightPlans.Count; i++) + { + flights.Add(elaborate.GetAllFlightInfo(indexFlightPlans[i])); + + } + + flights.ForEach(flight => + { + Console.WriteLine("Flight Number: " + flight.Info.FlightNumber); + Console.WriteLine("Aircraft Type: " + flight.Info.AircraftType); + Console.WriteLine("Departure: " + flight.Info.Departure); + Console.WriteLine("Arrival: " + flight.Info.Destination); + Console.WriteLine("Date: " + flight.Info.Date); + Console.WriteLine("Zero Fuel Mass: " + flight.MassLoad.ZeroFuelMass); + Console.WriteLine("Scheduled Departure Time: " + flight.Schedule.ScheduledDepartureTime); + flight.CrewBriefing.Crews.ForEach(crew => + { + Console.WriteLine(crew.Function + ": " + crew.Name); + }); + + Console.WriteLine(); - string content2 = reader.GetCrewAndFlightAssignment(pdfDoc, 12); - FlightPlanExtractor flightPlanExtractor = new FlightPlanExtractor(content2); - - Console.WriteLine("Fuel Data Limc: " + flightPlanExtractor.ExtractFlightPlan().FuelData.Limc); - Console.WriteLine("Zero Fuel Mass: " + flightPlanExtractor.ExtractFlightPlan().MassLoad.ZeroFuelMass); - Console.WriteLine("Scheduled Arrival Time: " + flightPlanExtractor.ExtractFlightPlan().Schedule.ScheduledArrivalTime); - Console.WriteLine("GainLoss: " + flightPlanExtractor.ExtractFlightPlan().Correction.GainOrLoss); - Console.WriteLine("Aircraft Type: " + flightPlanExtractor.ExtractFlightPlan().Info.AircraftType); - Console.WriteLine("Flight Date: " + flightPlanExtractor.ExtractFlightPlan().Info.Date); - Console.WriteLine("Departure: " + flightPlanExtractor.ExtractFlightPlan().Info.Departure); + }); + } diff --git a/capzlog-ExtractDataFromPDF/SinglePageReader.cs b/capzlog-ExtractDataFromPDF/SinglePageReader.cs deleted file mode 100644 index d8ad912..0000000 --- a/capzlog-ExtractDataFromPDF/SinglePageReader.cs +++ /dev/null @@ -1,24 +0,0 @@ -using capzlog_ExtractDataFromPDF.models; -using UglyToad.PdfPig; -using UglyToad.PdfPig.DocumentLayoutAnalysis; - - -namespace capzlog_ExtractDataFromPDF; - -//GET flightAssigment and flight crew -public class SinglePageReader -{ - - - - public string GetCrewAndFlightAssignment(PdfDocument pdfDocument, int pageNumber) - { - ExtractText extractText = new ExtractText(); - var textBlocks = extractText.ExtractTextBlocks(pdfDocument, pageNumber); - - return textBlocks; - - } - - -} \ No newline at end of file diff --git a/capzlog-ExtractDataFromPDF/models/Crew.cs b/capzlog-ExtractDataFromPDF/models/Crew.cs index 6f99d7b..b90825e 100644 --- a/capzlog-ExtractDataFromPDF/models/Crew.cs +++ b/capzlog-ExtractDataFromPDF/models/Crew.cs @@ -1,5 +1,12 @@ namespace capzlog_ExtractDataFromPDF.models { + public class CrewBriefing + { + public List Crews { get; set; }= new List(); + public FlightAssigment FlightAssignment { get; set; } = new FlightAssigment(); + public Passegers Passengers { get; set; } = new Passegers(); + } + public class Crew { public string Function { get; set; } = ""; diff --git a/capzlog-ExtractDataFromPDF/models/Flight.cs b/capzlog-ExtractDataFromPDF/models/Flight.cs index fd632bd..5ae0c40 100644 --- a/capzlog-ExtractDataFromPDF/models/Flight.cs +++ b/capzlog-ExtractDataFromPDF/models/Flight.cs @@ -8,6 +8,8 @@ namespace capzlog_ExtractDataFromPDF.models public LoadMass MassLoad { get; set; } = new LoadMass(); public Fuel FuelData { get; set; } = new Fuel(); public Corrections Correction { get; set; } = new Corrections(); + + public CrewBriefing CrewBriefing { get; set; } = new CrewBriefing(); } public class FlightInfo diff --git a/capzlog-ExtractDataFromPDF/util/BriefingExtractor.cs b/capzlog-ExtractDataFromPDF/util/BriefingExtractor.cs index e485f58..7dce9e3 100644 --- a/capzlog-ExtractDataFromPDF/util/BriefingExtractor.cs +++ b/capzlog-ExtractDataFromPDF/util/BriefingExtractor.cs @@ -11,42 +11,50 @@ public class BriefingExtractor { this.briefingText = briefingText; } + + public CrewBriefing ExtractCrewBriefing() + { + CrewBriefing crewBriefing = new CrewBriefing(); + crewBriefing.Crews = ExtractCrew(); + crewBriefing.FlightAssignment = ExtractFlightAssignment(); + crewBriefing.Passengers = ExtractPassengers(); + return crewBriefing; + } - public FlightAssigment ExtractFlightAssignment() + private FlightAssigment ExtractFlightAssignment() { FlightAssigment flightAssignment = new FlightAssigment(); var dowMatch = Regex.Match(briefingText, @"DOW:\s*(\d+(?:\.\d+)?)kg"); - if (dowMatch.Success) - { - flightAssignment.DOW = double.Parse(dowMatch.Groups[1].Value); - } + flightAssignment.DOW = dowMatch.Success ? double.Parse(dowMatch.Groups[1].Value) : 0.0; var doiMatch = Regex.Match(briefingText, @"DOI:\s*(\d+(?:\.\d+)?)"); - if (doiMatch.Success) - { - flightAssignment.DOI = double.Parse(doiMatch.Groups[1].Value); - } + flightAssignment.DOI = doiMatch.Success ? double.Parse(doiMatch.Groups[1].Value) : 0.0; return flightAssignment; } - public Passegers ExtractPassengers() + private Passegers ExtractPassengers() { Passegers passengers = new Passegers(); var paxMatch = Regex.Match(briefingText, @"\d+\/(\d+)"); if (paxMatch.Success) { - passengers.Business = int.Parse(paxMatch.Groups[0].Value.Split("/")[0]);; + passengers.Business = int.Parse(paxMatch.Groups[0].Value.Split("/")[0]); passengers.Economy = int.Parse(paxMatch.Groups[1].Value); } + else + { + passengers.Business = 0; + passengers.Economy = 0; + } return passengers; } - //TODO: check for multiple crew members in the same function - public List ExtractCrew() + // TODO: check for multiple crew members in the same function + private List ExtractCrew() { List crewList = new List(); @@ -73,15 +81,17 @@ public class BriefingExtractor } } - if (!string.IsNullOrEmpty(currentLine)) { combinedLines.Add(currentLine.Trim()); } - combinedLines.RemoveAt(0); + if (combinedLines.Count > 0) + { + combinedLines.RemoveAt(0); + } - int index = combinedLines[combinedLines.Count - 1].IndexOf("X:"); + int index = combinedLines.Count > 0 ? combinedLines[combinedLines.Count - 1].IndexOf("X:") : -1; if (index >= 0) { @@ -90,20 +100,23 @@ public class BriefingExtractor foreach (var line in combinedLines) { - var match = Regex.Match(line, - @"(CMD|COP|CAB|SEN)\s+(\w+)\s+([A-Za-zÀ-ÿ\s\-]+)"); + var match = Regex.Match(line, @"(CMD|COP|CAB|SEN)\s+(\w+)\s+([A-Za-zÀ-ÿ\s\-]+)"); if (match.Success) { Crew crewMember = new Crew { - Function = match.Groups[1].Value, - Lc = match.Groups[2].Value, - Name = match.Groups[3].Value, + Function = match.Groups[1].Success ? match.Groups[1].Value : "N/A", + Lc = match.Groups[2].Success ? match.Groups[2].Value : "N/A", + Name = match.Groups[3].Success ? match.Groups[3].Value : "N/A", }; crewList.Add(crewMember); } + else + { + crewList.Add(new Crew { Function = "N/A", Lc = "N/A", Name = "N/A" }); + } } return crewList; } -} \ No newline at end of file +} diff --git a/capzlog-ExtractDataFromPDF/util/ExtractText.cs b/capzlog-ExtractDataFromPDF/util/ExtractText.cs index 5f4a700..79e44f3 100644 --- a/capzlog-ExtractDataFromPDF/util/ExtractText.cs +++ b/capzlog-ExtractDataFromPDF/util/ExtractText.cs @@ -6,7 +6,7 @@ namespace capzlog_ExtractDataFromPDF; public class ExtractText { - public string ExtractTextBlocks(PdfDocument document, int pageNumber) + public string GetTextFormSinglePage(PdfDocument document, int pageNumber) { if (pageNumber < 1 || pageNumber > document.NumberOfPages) { diff --git a/capzlog-ExtractDataFromPDF/util/FlightPlanExtractor.cs b/capzlog-ExtractDataFromPDF/util/FlightPlanExtractor.cs index 8e50410..f14b420 100644 --- a/capzlog-ExtractDataFromPDF/util/FlightPlanExtractor.cs +++ b/capzlog-ExtractDataFromPDF/util/FlightPlanExtractor.cs @@ -12,6 +12,7 @@ public class FlightPlanExtractor { OperationText = operationText; } + public Flight ExtractFlightPlan() { @@ -35,40 +36,59 @@ public class FlightPlanExtractor var flightNumberPattern = @"FltNr:\s([A-Z0-9]+)"; var atcCodePattern = @"ATC:\s([A-Z0-9]+)"; - FlightInfo flightInfo = new FlightInfo(); + var dateMatch = Regex.Match(OperationText, datePattern); if (dateMatch.Success) { - // Converti la data nel formato desiderato - string originalDate = dateMatch.Groups[1].Value; // e.g., "19MAR24" + string originalDate = dateMatch.Groups[1].Value; DateTime parsedDate = DateTime.ParseExact(originalDate, "ddMMMyy", CultureInfo.InvariantCulture); - flightInfo.Date = parsedDate.ToString("dd.MM.yyyy"); // e.g., "19.03.2024" + flightInfo.Date = parsedDate.ToString("dd.MM.yyyy"); + } + else + { + flightInfo.Date = "N/A"; } - flightInfo.Registration = Regex.Match(OperationText, registrationPattern).Groups[1].Value; - flightInfo.AircraftType = Regex.Match(OperationText, aircraftTypePattern).Groups[1].Value; - flightInfo.Departure = Regex.Match(OperationText, departurePattern).Groups[1].Value; - flightInfo.Destination = Regex.Match(OperationText, destinationPattern).Groups[1].Value; - flightInfo.Alternate1 = Regex.Match(OperationText, alternate1Pattern).Groups[1].Value; - flightInfo.FlightNumber = Regex.Match(OperationText, flightNumberPattern).Groups[1].Value; - flightInfo.ATCCode = Regex.Match(OperationText, atcCodePattern).Groups[1].Value; + flightInfo.Registration = Regex.Match(OperationText, registrationPattern).Groups[1].Success ? + Regex.Match(OperationText, registrationPattern).Groups[1].Value : "N/A"; + + flightInfo.AircraftType = Regex.Match(OperationText, aircraftTypePattern).Groups[1].Success ? + Regex.Match(OperationText, aircraftTypePattern).Groups[1].Value : "N/A"; + + flightInfo.Departure = Regex.Match(OperationText, departurePattern).Groups[1].Success ? + Regex.Match(OperationText, departurePattern).Groups[1].Value : "N/A"; + + flightInfo.Destination = Regex.Match(OperationText, destinationPattern).Groups[1].Success ? + Regex.Match(OperationText, destinationPattern).Groups[1].Value : "N/A"; + + flightInfo.Alternate1 = Regex.Match(OperationText, alternate1Pattern).Groups[1].Success ? + Regex.Match(OperationText, alternate1Pattern).Groups[1].Value : "N/A"; + + flightInfo.FlightNumber = Regex.Match(OperationText, flightNumberPattern).Groups[1].Success ? + Regex.Match(OperationText, flightNumberPattern).Groups[1].Value : "N/A"; + + flightInfo.ATCCode = Regex.Match(OperationText, atcCodePattern).Groups[1].Success ? + Regex.Match(OperationText, atcCodePattern).Groups[1].Value : "N/A"; + return flightInfo; } private Times ExtractTimes() { - // Pattern per catturare i tempi STD e STA var timesPattern = @"STD:\s(\d{2}:\d{2})\sSTA:\s(\d{2}:\d{2})"; - var times = new Times(); - - // Esegui il match per estrarre i tempi var match = Regex.Match(OperationText, timesPattern); + if (match.Success) { - times.ScheduledDepartureTime = match.Groups[1].Value; // Estrae STD - times.ScheduledArrivalTime = match.Groups[2].Value; // Estrae STA + times.ScheduledDepartureTime = match.Groups[1].Value; + times.ScheduledArrivalTime = match.Groups[2].Value; + } + else + { + times.ScheduledDepartureTime = "N/A"; + times.ScheduledArrivalTime = "N/A"; } return times; @@ -77,47 +97,29 @@ public class FlightPlanExtractor private LoadMass ExtractLoadMass() { var zeroFuelMassPattern = @"ZFM:\s(\d+)"; - LoadMass loadMass = new LoadMass(); - + var limcMatch = Regex.Match(OperationText, zeroFuelMassPattern); - if (limcMatch.Success) - { - loadMass.ZeroFuelMass = limcMatch.Groups[1].Value; // Fuel quantity for LIMC - } + loadMass.ZeroFuelMass = limcMatch.Success ? limcMatch.Groups[1].Value : "N/A"; return loadMass; } private Fuel ExtractFuel() { - // Regular expressions to capture the values for LIMC, LIML, and MIN var limcPattern = @"LIMC:\s([^\s]+ [^\s])"; var limlPattern = @"LIML:\s([^\s]+ [^\s])"; var minPattern = @"MIN:\s([^\s]+ [^\s])"; - var fuelData = new Fuel(); - // Match for LIMC var limcMatch = Regex.Match(OperationText, limcPattern); - if (limcMatch.Success) - { - fuelData.Limc = limcMatch.Groups[1].Value; // Fuel quantity for LIMC - } + fuelData.Limc = limcMatch.Success ? limcMatch.Groups[1].Value : "N/A"; - // Match for LIML var limlMatch = Regex.Match(OperationText, limlPattern); - if (limlMatch.Success) - { - fuelData.Liml = limlMatch.Groups[1].Value; // Fuel quantity for LIML - } + fuelData.Liml = limlMatch.Success ? limlMatch.Groups[1].Value : "N/A"; - // Match for MIN var minMatch = Regex.Match(OperationText, minPattern); - if (minMatch.Success) - { - fuelData.MinimumRequired = minMatch.Groups[1].Value; // Fuel quantity for MIN - } + fuelData.MinimumRequired = minMatch.Success ? minMatch.Groups[1].Value : "N/A"; return fuelData; } @@ -125,20 +127,20 @@ public class FlightPlanExtractor private Corrections ExtractCorrections() { var gainLossPattern = @"Gain\s*/\s*Loss:\s*(GAIN|LOSS)\s*(\d+)\$/TON"; - var corrections = new Corrections(); - - // Esegui il match per estrarre il tipo di guadagno/perdita e l'importo var match = Regex.Match(OperationText, gainLossPattern); + if (match.Success) { - string type = match.Groups[1].Value; // "GAIN" o "LOSS" - double amount = double.Parse(match.Groups[2].Value); // Importo numerico - - // Imposta il valore in positivo per GAIN e in negativo per LOSS + string type = match.Groups[1].Value; + double amount = double.Parse(match.Groups[2].Value); corrections.GainOrLoss = type == "GAIN" ? amount : -amount; } + else + { + corrections.GainOrLoss = 0; // Assuming no gain or loss found + } return corrections; } -} \ No newline at end of file +} diff --git a/capzlog-ExtractDataFromPDF/util/indexing.cs b/capzlog-ExtractDataFromPDF/util/indexing.cs index eea469f..8835131 100644 --- a/capzlog-ExtractDataFromPDF/util/indexing.cs +++ b/capzlog-ExtractDataFromPDF/util/indexing.cs @@ -1,5 +1,4 @@ - - +using System.Text.RegularExpressions; using UglyToad.PdfPig; namespace capzlog_ExtractDataFromPDF; @@ -9,26 +8,62 @@ public class Indexing private List<(int pageNumber, string firstLine)> _pageIndex; private readonly PdfDocument _pdfDoc; - public Indexing(PdfDocument pdfDoc) + public Indexing(PdfDocument pdfDoc) { _pdfDoc = pdfDoc; _pageIndex = GetFirstLines(); } - + private List<(int pageNumber, string firstLine)> GetFirstLines() { List<(int pageNumber, string firstLine)> index = new List<(int pageNumber, string firstLine)>(); ExtractText extractText = new ExtractText(); var firstLines = extractText.ExtractFirstLines(_pdfDoc); - for (int i = 1; i <= _pdfDoc.NumberOfPages; i++) + for (int i = 0; i < _pdfDoc.NumberOfPages; i++) { - - index.Add((i, firstLines[i])); } + return index; } - + + + public List GetIndexFlightPlans() + { + List flightPlans = new List(); + + string pattern = @"FMS\sIDENT=\S+\s+Log\sNr\.\:\s+\S+"; + + foreach (var (pageNumber, line) in _pageIndex) + { + if (Regex.IsMatch(line, @"FMS\sIDENT=\S+\s+Log\sNr\.\:\s+\S+\s+Page\s1")) + { + flightPlans.Add(pageNumber+1); + } + } + + return flightPlans; + } + + + //TODO: check for FlrNr, first line, and page 1 of + public int getIndexCrewBriefing(string flightNumber) + { + foreach (var (pageNumber, line) in _pageIndex) + { + if (line.Equals("Flight Assignment / Flight Crew Briefing")) + { + ExtractText extractText = new ExtractText(); + string content = extractText.GetTextFormSinglePage(_pdfDoc, pageNumber); + if (content.Contains(flightNumber) && Regex.IsMatch(content, @"Page\s1\sof")) + { + return pageNumber; + } + } + } + return -1; + } + public int GetPageNumber(string firstLine) { foreach (var (pageNumber, line) in _pageIndex) @@ -38,6 +73,7 @@ public class Indexing return pageNumber; } } + return -1; } } \ No newline at end of file