using System;
using System.Text;
using System.IO;
using System.Threading.Tasks;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
namespace Tools.PdfProvider
{
public class SimplePdfReader
{
public Task<string> ReadPdfAsync(Stream stream)
{
StringBuilder text = new StringBuilder();
using (PdfReader iTextReader = new PdfReader(stream))
using (PdfDocument pdfDoc = new PdfDocument(iTextReader))
{
int numberofpages = pdfDoc.GetNumberOfPages();
for (int page = 1; page <= numberofpages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(
pdfDoc.GetPage(page),
strategy
);
//currentText = Encoding.UTF8.GetString(
// ASCIIEncoding.Convert(
// Encoding.Default,
// Encoding.UTF8,
// Encoding.Default.GetBytes(currentText)
// )
// );
text.Append(currentText);
}
}
return Task.FromResult(
text.ToString()
);
}
}
}