Skip to content
Guides

Extract Text in PDF

This sample shows how to extract all the text of a PDF

C#
using ComPDFKit.PDFDocument;
using System;
using System.IO;

namespace TextExtractTest
{
    internal class TextExtractTest
    {
        static private string outputPath = Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(System.IO.Directory.GetCurrentDirectory()))) + "\\Output\\TextExtract";

        static void Main(string[] args)
        {
            #region Perparation work
            Console.WriteLine("Running PDFPage test sample…\r\n");

            SDKLicenseHelper.LicenseVerify();
            CPDFDocument document = CPDFDocument.InitWithFilePath("CommonFivePage.pdf");

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }
            #endregion

            if (PDFToText(document))
            {
                Console.WriteLine("PDF to text done.");
            }
            else
            {
                Console.WriteLine("PDF to text failed.");
            }
            Console.WriteLine("--------------------");
            Console.WriteLine("Done!");
            Console.WriteLine("--------------------");
            Console.ReadLine();
        }

        //
        static private bool PDFToText(CPDFDocument document)
        {
            string path = outputPath + "//PDFToText.txt";
            if (!document.PdfToText("1-" + document.PageCount.ToString(), path))//Page ranges are counted from 1
            {
                return false;
            }
            Console.WriteLine("Browse the generated file in " + path);
            return true;
        }
    }
}