SimplePdfReader.cs
Home
/
src /
Model /
Tools.PdfProvider /
SimplePdfReader.cs
using System;
using System.Text;
using System.IO;
using System.Threading.Tasks;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
namespace Tools.PdfProvider
{
public class SimplePdfReader
{
public Task<string> ReadPdfAsync(
Stream stream,
bool ignoreError
)
{
try
{
StringBuilder text = new StringBuilder();
using (PdfReader iTextReader = new PdfReader(stream))
using (PdfDocument pdfDoc = new PdfDocument(iTextReader))
{
int numberofpages = pdfDoc.GetNumberOfPages();
for (int page = 1; page <= numberofpages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(
pdfDoc.GetPage(page),
strategy
);
//currentText = Encoding.UTF8.GetString(
// ASCIIEncoding.Convert(
// Encoding.Default,
// Encoding.UTF8,
// Encoding.Default.GetBytes(currentText)
// )
// );
text.Append(currentText);
}
}
return Task.FromResult(
text.ToString()
);
}
catch (Exception ex)
{
if (ignoreError)
{
return Task.FromResult(ex.Message);
}
throw;
}
}
}
}