diff --git a/capzlog-ExtractDataFromPDF/util/indexing.cs b/capzlog-ExtractDataFromPDF/util/indexing.cs new file mode 100644 index 0000000..eea469f --- /dev/null +++ b/capzlog-ExtractDataFromPDF/util/indexing.cs @@ -0,0 +1,43 @@ + + +using UglyToad.PdfPig; + +namespace capzlog_ExtractDataFromPDF; + +public class Indexing +{ + private List<(int pageNumber, string firstLine)> _pageIndex; + private readonly PdfDocument _pdfDoc; + + public Indexing(PdfDocument pdfDoc) + { + _pdfDoc = pdfDoc; + _pageIndex = GetFirstLines(); + } + + private List<(int pageNumber, string firstLine)> GetFirstLines() + { + List<(int pageNumber, string firstLine)> index = new List<(int pageNumber, string firstLine)>(); + ExtractText extractText = new ExtractText(); + var firstLines = extractText.ExtractFirstLines(_pdfDoc); + for (int i = 1; i <= _pdfDoc.NumberOfPages; i++) + { + + + index.Add((i, firstLines[i])); + } + return index; + } + + public int GetPageNumber(string firstLine) + { + foreach (var (pageNumber, line) in _pageIndex) + { + if (line == firstLine) + { + return pageNumber; + } + } + return -1; + } +} \ No newline at end of file