first approach indexing

This commit is contained in:
Joe Küng
2024-11-06 10:29:04 +01:00
parent 9c6c61ada9
commit 728718ffe5

View File

@@ -0,0 +1,43 @@
using UglyToad.PdfPig;
namespace capzlog_ExtractDataFromPDF;
public class Indexing
{
private List<(int pageNumber, string firstLine)> _pageIndex;
private readonly PdfDocument _pdfDoc;
public Indexing(PdfDocument pdfDoc)
{
_pdfDoc = pdfDoc;
_pageIndex = GetFirstLines();
}
private List<(int pageNumber, string firstLine)> GetFirstLines()
{
List<(int pageNumber, string firstLine)> index = new List<(int pageNumber, string firstLine)>();
ExtractText extractText = new ExtractText();
var firstLines = extractText.ExtractFirstLines(_pdfDoc);
for (int i = 1; i <= _pdfDoc.NumberOfPages; i++)
{
index.Add((i, firstLines[i]));
}
return index;
}
public int GetPageNumber(string firstLine)
{
foreach (var (pageNumber, line) in _pageIndex)
{
if (line == firstLine)
{
return pageNumber;
}
}
return -1;
}
}