Added Elaborate class for logic

This commit is contained in:
Joe Küng
2024-11-07 11:25:17 +01:00
parent b100811fcd
commit 88ced93bb3
9 changed files with 212 additions and 121 deletions

View File

@@ -0,0 +1,41 @@
using capzlog_ExtractDataFromPDF.models;
using UglyToad.PdfPig;
namespace capzlog_ExtractDataFromPDF;
public class Elaborate
{
private PdfDocument _pdfDocument;
public Elaborate(PdfDocument pdfDocument)
{
_pdfDocument = pdfDocument;
}
public Flight GetAllFlightInfo(int pageOfFlightPlan)
{
ExtractText reader = new ExtractText();
string content = reader.GetTextFormSinglePage(_pdfDocument, pageOfFlightPlan);
FlightPlanExtractor flightPlanExtractor = new FlightPlanExtractor(content);
Flight flight = flightPlanExtractor.ExtractFlightPlan();
Indexing indexing = new Indexing(_pdfDocument);
int indexCrewBriefing = indexing.getIndexCrewBriefing(flight.Info.FlightNumber);
if (indexCrewBriefing < 0)
{
return flight;
}
string crewContent = reader.GetTextFormSinglePage(_pdfDocument, indexCrewBriefing);
BriefingExtractor briefingExtractor = new BriefingExtractor(crewContent);
CrewBriefing crewBriefing = briefingExtractor.ExtractCrewBriefing();
flight.CrewBriefing = crewBriefing;
return flight;
}
}

View File

@@ -1,6 +1,6 @@
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using capzlog_ExtractDataFromPDF.models;
using UglyToad.PdfPig; using UglyToad.PdfPig;
@@ -14,25 +14,39 @@ namespace capzlog_ExtractDataFromPDF
using (var pdfDoc = PdfDocument.Open(SRC)) using (var pdfDoc = PdfDocument.Open(SRC))
{ {
Indexing indexing = new Indexing(pdfDoc);
List<int> indexFlightPlans = indexing.GetIndexFlightPlans();
SinglePageReader reader = new SinglePageReader(); Console.WriteLine("Found "+indexFlightPlans.Count+" flight plans");
string content = reader.GetCrewAndFlightAssignment(pdfDoc, 89);
BriefingExtractor briefingExtractor = new BriefingExtractor(content);
Console.WriteLine("Crew Function: " + briefingExtractor.ExtractCrew()[0].Function); List<Flight> flights = new List<Flight>();
Console.WriteLine("Business Passengers: " + briefingExtractor.ExtractPassengers().Business);
Console.WriteLine("Flight Assignment DOI: " + briefingExtractor.ExtractFlightAssignment().DOI);
string content2 = reader.GetCrewAndFlightAssignment(pdfDoc, 12); Elaborate elaborate = new Elaborate(pdfDoc);
FlightPlanExtractor flightPlanExtractor = new FlightPlanExtractor(content2);
for (int i = 0; i < indexFlightPlans.Count; i++)
{
flights.Add(elaborate.GetAllFlightInfo(indexFlightPlans[i]));
}
flights.ForEach(flight =>
{
Console.WriteLine("Flight Number: " + flight.Info.FlightNumber);
Console.WriteLine("Aircraft Type: " + flight.Info.AircraftType);
Console.WriteLine("Departure: " + flight.Info.Departure);
Console.WriteLine("Arrival: " + flight.Info.Destination);
Console.WriteLine("Date: " + flight.Info.Date);
Console.WriteLine("Zero Fuel Mass: " + flight.MassLoad.ZeroFuelMass);
Console.WriteLine("Scheduled Departure Time: " + flight.Schedule.ScheduledDepartureTime);
flight.CrewBriefing.Crews.ForEach(crew =>
{
Console.WriteLine(crew.Function + ": " + crew.Name);
});
Console.WriteLine();
});
Console.WriteLine("Fuel Data Limc: " + flightPlanExtractor.ExtractFlightPlan().FuelData.Limc);
Console.WriteLine("Zero Fuel Mass: " + flightPlanExtractor.ExtractFlightPlan().MassLoad.ZeroFuelMass);
Console.WriteLine("Scheduled Arrival Time: " + flightPlanExtractor.ExtractFlightPlan().Schedule.ScheduledArrivalTime);
Console.WriteLine("GainLoss: " + flightPlanExtractor.ExtractFlightPlan().Correction.GainOrLoss);
Console.WriteLine("Aircraft Type: " + flightPlanExtractor.ExtractFlightPlan().Info.AircraftType);
Console.WriteLine("Flight Date: " + flightPlanExtractor.ExtractFlightPlan().Info.Date);
Console.WriteLine("Departure: " + flightPlanExtractor.ExtractFlightPlan().Info.Departure);
} }

View File

@@ -1,24 +0,0 @@
using capzlog_ExtractDataFromPDF.models;
using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis;
namespace capzlog_ExtractDataFromPDF;
//GET flightAssigment and flight crew
public class SinglePageReader
{
public string GetCrewAndFlightAssignment(PdfDocument pdfDocument, int pageNumber)
{
ExtractText extractText = new ExtractText();
var textBlocks = extractText.ExtractTextBlocks(pdfDocument, pageNumber);
return textBlocks;
}
}

View File

@@ -1,5 +1,12 @@
namespace capzlog_ExtractDataFromPDF.models namespace capzlog_ExtractDataFromPDF.models
{ {
public class CrewBriefing
{
public List<Crew> Crews { get; set; }= new List<Crew>();
public FlightAssigment FlightAssignment { get; set; } = new FlightAssigment();
public Passegers Passengers { get; set; } = new Passegers();
}
public class Crew public class Crew
{ {
public string Function { get; set; } = ""; public string Function { get; set; } = "";

View File

@@ -8,6 +8,8 @@ namespace capzlog_ExtractDataFromPDF.models
public LoadMass MassLoad { get; set; } = new LoadMass(); public LoadMass MassLoad { get; set; } = new LoadMass();
public Fuel FuelData { get; set; } = new Fuel(); public Fuel FuelData { get; set; } = new Fuel();
public Corrections Correction { get; set; } = new Corrections(); public Corrections Correction { get; set; } = new Corrections();
public CrewBriefing CrewBriefing { get; set; } = new CrewBriefing();
} }
public class FlightInfo public class FlightInfo

View File

@@ -12,41 +12,49 @@ public class BriefingExtractor
this.briefingText = briefingText; this.briefingText = briefingText;
} }
public FlightAssigment ExtractFlightAssignment() public CrewBriefing ExtractCrewBriefing()
{
CrewBriefing crewBriefing = new CrewBriefing();
crewBriefing.Crews = ExtractCrew();
crewBriefing.FlightAssignment = ExtractFlightAssignment();
crewBriefing.Passengers = ExtractPassengers();
return crewBriefing;
}
private FlightAssigment ExtractFlightAssignment()
{ {
FlightAssigment flightAssignment = new FlightAssigment(); FlightAssigment flightAssignment = new FlightAssigment();
var dowMatch = Regex.Match(briefingText, @"DOW:\s*(\d+(?:\.\d+)?)kg"); var dowMatch = Regex.Match(briefingText, @"DOW:\s*(\d+(?:\.\d+)?)kg");
if (dowMatch.Success) flightAssignment.DOW = dowMatch.Success ? double.Parse(dowMatch.Groups[1].Value) : 0.0;
{
flightAssignment.DOW = double.Parse(dowMatch.Groups[1].Value);
}
var doiMatch = Regex.Match(briefingText, @"DOI:\s*(\d+(?:\.\d+)?)"); var doiMatch = Regex.Match(briefingText, @"DOI:\s*(\d+(?:\.\d+)?)");
if (doiMatch.Success) flightAssignment.DOI = doiMatch.Success ? double.Parse(doiMatch.Groups[1].Value) : 0.0;
{
flightAssignment.DOI = double.Parse(doiMatch.Groups[1].Value);
}
return flightAssignment; return flightAssignment;
} }
public Passegers ExtractPassengers() private Passegers ExtractPassengers()
{ {
Passegers passengers = new Passegers(); Passegers passengers = new Passegers();
var paxMatch = Regex.Match(briefingText, @"\d+\/(\d+)"); var paxMatch = Regex.Match(briefingText, @"\d+\/(\d+)");
if (paxMatch.Success) if (paxMatch.Success)
{ {
passengers.Business = int.Parse(paxMatch.Groups[0].Value.Split("/")[0]);; passengers.Business = int.Parse(paxMatch.Groups[0].Value.Split("/")[0]);
passengers.Economy = int.Parse(paxMatch.Groups[1].Value); passengers.Economy = int.Parse(paxMatch.Groups[1].Value);
} }
else
{
passengers.Business = 0;
passengers.Economy = 0;
}
return passengers; return passengers;
} }
//TODO: check for multiple crew members in the same function // TODO: check for multiple crew members in the same function
public List<Crew> ExtractCrew() private List<Crew> ExtractCrew()
{ {
List<Crew> crewList = new List<Crew>(); List<Crew> crewList = new List<Crew>();
@@ -73,15 +81,17 @@ public class BriefingExtractor
} }
} }
if (!string.IsNullOrEmpty(currentLine)) if (!string.IsNullOrEmpty(currentLine))
{ {
combinedLines.Add(currentLine.Trim()); combinedLines.Add(currentLine.Trim());
} }
combinedLines.RemoveAt(0); if (combinedLines.Count > 0)
{
combinedLines.RemoveAt(0);
}
int index = combinedLines[combinedLines.Count - 1].IndexOf("X:"); int index = combinedLines.Count > 0 ? combinedLines[combinedLines.Count - 1].IndexOf("X:") : -1;
if (index >= 0) if (index >= 0)
{ {
@@ -90,18 +100,21 @@ public class BriefingExtractor
foreach (var line in combinedLines) foreach (var line in combinedLines)
{ {
var match = Regex.Match(line, var match = Regex.Match(line, @"(CMD|COP|CAB|SEN)\s+(\w+)\s+([A-Za-zÀ-ÿ\s\-]+)");
@"(CMD|COP|CAB|SEN)\s+(\w+)\s+([A-Za-zÀ-ÿ\s\-]+)");
if (match.Success) if (match.Success)
{ {
Crew crewMember = new Crew Crew crewMember = new Crew
{ {
Function = match.Groups[1].Value, Function = match.Groups[1].Success ? match.Groups[1].Value : "N/A",
Lc = match.Groups[2].Value, Lc = match.Groups[2].Success ? match.Groups[2].Value : "N/A",
Name = match.Groups[3].Value, Name = match.Groups[3].Success ? match.Groups[3].Value : "N/A",
}; };
crewList.Add(crewMember); crewList.Add(crewMember);
} }
else
{
crewList.Add(new Crew { Function = "N/A", Lc = "N/A", Name = "N/A" });
}
} }
return crewList; return crewList;

View File

@@ -6,7 +6,7 @@ namespace capzlog_ExtractDataFromPDF;
public class ExtractText public class ExtractText
{ {
public string ExtractTextBlocks(PdfDocument document, int pageNumber) public string GetTextFormSinglePage(PdfDocument document, int pageNumber)
{ {
if (pageNumber < 1 || pageNumber > document.NumberOfPages) if (pageNumber < 1 || pageNumber > document.NumberOfPages)
{ {

View File

@@ -13,6 +13,7 @@ public class FlightPlanExtractor
OperationText = operationText; OperationText = operationText;
} }
public Flight ExtractFlightPlan() public Flight ExtractFlightPlan()
{ {
Flight flight = new Flight(); Flight flight = new Flight();
@@ -35,40 +36,59 @@ public class FlightPlanExtractor
var flightNumberPattern = @"FltNr:\s([A-Z0-9]+)"; var flightNumberPattern = @"FltNr:\s([A-Z0-9]+)";
var atcCodePattern = @"ATC:\s([A-Z0-9]+)"; var atcCodePattern = @"ATC:\s([A-Z0-9]+)";
FlightInfo flightInfo = new FlightInfo(); FlightInfo flightInfo = new FlightInfo();
var dateMatch = Regex.Match(OperationText, datePattern); var dateMatch = Regex.Match(OperationText, datePattern);
if (dateMatch.Success) if (dateMatch.Success)
{ {
// Converti la data nel formato desiderato string originalDate = dateMatch.Groups[1].Value;
string originalDate = dateMatch.Groups[1].Value; // e.g., "19MAR24"
DateTime parsedDate = DateTime.ParseExact(originalDate, "ddMMMyy", CultureInfo.InvariantCulture); DateTime parsedDate = DateTime.ParseExact(originalDate, "ddMMMyy", CultureInfo.InvariantCulture);
flightInfo.Date = parsedDate.ToString("dd.MM.yyyy"); // e.g., "19.03.2024" flightInfo.Date = parsedDate.ToString("dd.MM.yyyy");
}
else
{
flightInfo.Date = "N/A";
} }
flightInfo.Registration = Regex.Match(OperationText, registrationPattern).Groups[1].Value; flightInfo.Registration = Regex.Match(OperationText, registrationPattern).Groups[1].Success ?
flightInfo.AircraftType = Regex.Match(OperationText, aircraftTypePattern).Groups[1].Value; Regex.Match(OperationText, registrationPattern).Groups[1].Value : "N/A";
flightInfo.Departure = Regex.Match(OperationText, departurePattern).Groups[1].Value;
flightInfo.Destination = Regex.Match(OperationText, destinationPattern).Groups[1].Value; flightInfo.AircraftType = Regex.Match(OperationText, aircraftTypePattern).Groups[1].Success ?
flightInfo.Alternate1 = Regex.Match(OperationText, alternate1Pattern).Groups[1].Value; Regex.Match(OperationText, aircraftTypePattern).Groups[1].Value : "N/A";
flightInfo.FlightNumber = Regex.Match(OperationText, flightNumberPattern).Groups[1].Value;
flightInfo.ATCCode = Regex.Match(OperationText, atcCodePattern).Groups[1].Value; flightInfo.Departure = Regex.Match(OperationText, departurePattern).Groups[1].Success ?
Regex.Match(OperationText, departurePattern).Groups[1].Value : "N/A";
flightInfo.Destination = Regex.Match(OperationText, destinationPattern).Groups[1].Success ?
Regex.Match(OperationText, destinationPattern).Groups[1].Value : "N/A";
flightInfo.Alternate1 = Regex.Match(OperationText, alternate1Pattern).Groups[1].Success ?
Regex.Match(OperationText, alternate1Pattern).Groups[1].Value : "N/A";
flightInfo.FlightNumber = Regex.Match(OperationText, flightNumberPattern).Groups[1].Success ?
Regex.Match(OperationText, flightNumberPattern).Groups[1].Value : "N/A";
flightInfo.ATCCode = Regex.Match(OperationText, atcCodePattern).Groups[1].Success ?
Regex.Match(OperationText, atcCodePattern).Groups[1].Value : "N/A";
return flightInfo; return flightInfo;
} }
private Times ExtractTimes() private Times ExtractTimes()
{ {
// Pattern per catturare i tempi STD e STA
var timesPattern = @"STD:\s(\d{2}:\d{2})\sSTA:\s(\d{2}:\d{2})"; var timesPattern = @"STD:\s(\d{2}:\d{2})\sSTA:\s(\d{2}:\d{2})";
var times = new Times(); var times = new Times();
// Esegui il match per estrarre i tempi
var match = Regex.Match(OperationText, timesPattern); var match = Regex.Match(OperationText, timesPattern);
if (match.Success) if (match.Success)
{ {
times.ScheduledDepartureTime = match.Groups[1].Value; // Estrae STD times.ScheduledDepartureTime = match.Groups[1].Value;
times.ScheduledArrivalTime = match.Groups[2].Value; // Estrae STA times.ScheduledArrivalTime = match.Groups[2].Value;
}
else
{
times.ScheduledDepartureTime = "N/A";
times.ScheduledArrivalTime = "N/A";
} }
return times; return times;
@@ -77,47 +97,29 @@ public class FlightPlanExtractor
private LoadMass ExtractLoadMass() private LoadMass ExtractLoadMass()
{ {
var zeroFuelMassPattern = @"ZFM:\s(\d+)"; var zeroFuelMassPattern = @"ZFM:\s(\d+)";
LoadMass loadMass = new LoadMass(); LoadMass loadMass = new LoadMass();
var limcMatch = Regex.Match(OperationText, zeroFuelMassPattern); var limcMatch = Regex.Match(OperationText, zeroFuelMassPattern);
if (limcMatch.Success) loadMass.ZeroFuelMass = limcMatch.Success ? limcMatch.Groups[1].Value : "N/A";
{
loadMass.ZeroFuelMass = limcMatch.Groups[1].Value; // Fuel quantity for LIMC
}
return loadMass; return loadMass;
} }
private Fuel ExtractFuel() private Fuel ExtractFuel()
{ {
// Regular expressions to capture the values for LIMC, LIML, and MIN
var limcPattern = @"LIMC:\s([^\s]+ [^\s])"; var limcPattern = @"LIMC:\s([^\s]+ [^\s])";
var limlPattern = @"LIML:\s([^\s]+ [^\s])"; var limlPattern = @"LIML:\s([^\s]+ [^\s])";
var minPattern = @"MIN:\s([^\s]+ [^\s])"; var minPattern = @"MIN:\s([^\s]+ [^\s])";
var fuelData = new Fuel(); var fuelData = new Fuel();
// Match for LIMC
var limcMatch = Regex.Match(OperationText, limcPattern); var limcMatch = Regex.Match(OperationText, limcPattern);
if (limcMatch.Success) fuelData.Limc = limcMatch.Success ? limcMatch.Groups[1].Value : "N/A";
{
fuelData.Limc = limcMatch.Groups[1].Value; // Fuel quantity for LIMC
}
// Match for LIML
var limlMatch = Regex.Match(OperationText, limlPattern); var limlMatch = Regex.Match(OperationText, limlPattern);
if (limlMatch.Success) fuelData.Liml = limlMatch.Success ? limlMatch.Groups[1].Value : "N/A";
{
fuelData.Liml = limlMatch.Groups[1].Value; // Fuel quantity for LIML
}
// Match for MIN
var minMatch = Regex.Match(OperationText, minPattern); var minMatch = Regex.Match(OperationText, minPattern);
if (minMatch.Success) fuelData.MinimumRequired = minMatch.Success ? minMatch.Groups[1].Value : "N/A";
{
fuelData.MinimumRequired = minMatch.Groups[1].Value; // Fuel quantity for MIN
}
return fuelData; return fuelData;
} }
@@ -125,19 +127,19 @@ public class FlightPlanExtractor
private Corrections ExtractCorrections() private Corrections ExtractCorrections()
{ {
var gainLossPattern = @"Gain\s*/\s*Loss:\s*(GAIN|LOSS)\s*(\d+)\$/TON"; var gainLossPattern = @"Gain\s*/\s*Loss:\s*(GAIN|LOSS)\s*(\d+)\$/TON";
var corrections = new Corrections(); var corrections = new Corrections();
// Esegui il match per estrarre il tipo di guadagno/perdita e l'importo
var match = Regex.Match(OperationText, gainLossPattern); var match = Regex.Match(OperationText, gainLossPattern);
if (match.Success) if (match.Success)
{ {
string type = match.Groups[1].Value; // "GAIN" o "LOSS" string type = match.Groups[1].Value;
double amount = double.Parse(match.Groups[2].Value); // Importo numerico double amount = double.Parse(match.Groups[2].Value);
// Imposta il valore in positivo per GAIN e in negativo per LOSS
corrections.GainOrLoss = type == "GAIN" ? amount : -amount; corrections.GainOrLoss = type == "GAIN" ? amount : -amount;
} }
else
{
corrections.GainOrLoss = 0; // Assuming no gain or loss found
}
return corrections; return corrections;
} }

View File

@@ -1,5 +1,4 @@
using System.Text.RegularExpressions;
using UglyToad.PdfPig; using UglyToad.PdfPig;
namespace capzlog_ExtractDataFromPDF; namespace capzlog_ExtractDataFromPDF;
@@ -9,7 +8,7 @@ public class Indexing
private List<(int pageNumber, string firstLine)> _pageIndex; private List<(int pageNumber, string firstLine)> _pageIndex;
private readonly PdfDocument _pdfDoc; private readonly PdfDocument _pdfDoc;
public Indexing(PdfDocument pdfDoc) public Indexing(PdfDocument pdfDoc)
{ {
_pdfDoc = pdfDoc; _pdfDoc = pdfDoc;
_pageIndex = GetFirstLines(); _pageIndex = GetFirstLines();
@@ -20,15 +19,51 @@ public class Indexing
List<(int pageNumber, string firstLine)> index = new List<(int pageNumber, string firstLine)>(); List<(int pageNumber, string firstLine)> index = new List<(int pageNumber, string firstLine)>();
ExtractText extractText = new ExtractText(); ExtractText extractText = new ExtractText();
var firstLines = extractText.ExtractFirstLines(_pdfDoc); var firstLines = extractText.ExtractFirstLines(_pdfDoc);
for (int i = 1; i <= _pdfDoc.NumberOfPages; i++) for (int i = 0; i < _pdfDoc.NumberOfPages; i++)
{ {
index.Add((i, firstLines[i])); index.Add((i, firstLines[i]));
} }
return index; return index;
} }
public List<int> GetIndexFlightPlans()
{
List<int> flightPlans = new List<int>();
string pattern = @"FMS\sIDENT=\S+\s+Log\sNr\.\:\s+\S+";
foreach (var (pageNumber, line) in _pageIndex)
{
if (Regex.IsMatch(line, @"FMS\sIDENT=\S+\s+Log\sNr\.\:\s+\S+\s+Page\s1"))
{
flightPlans.Add(pageNumber+1);
}
}
return flightPlans;
}
//TODO: check for FlrNr, first line, and page 1 of
public int getIndexCrewBriefing(string flightNumber)
{
foreach (var (pageNumber, line) in _pageIndex)
{
if (line.Equals("Flight Assignment / Flight Crew Briefing"))
{
ExtractText extractText = new ExtractText();
string content = extractText.GetTextFormSinglePage(_pdfDoc, pageNumber);
if (content.Contains(flightNumber) && Regex.IsMatch(content, @"Page\s1\sof"))
{
return pageNumber;
}
}
}
return -1;
}
public int GetPageNumber(string firstLine) public int GetPageNumber(string firstLine)
{ {
foreach (var (pageNumber, line) in _pageIndex) foreach (var (pageNumber, line) in _pageIndex)
@@ -38,6 +73,7 @@ public class Indexing
return pageNumber; return pageNumber;
} }
} }
return -1; return -1;
} }
} }