Skip to content
Guides

Extract Text in PDF

This sample shows how to extract all the text of a PDF

C#
using ComPDFKit.PDFDocument;
using System;
using System.IO;

namespace TextExtractTest
{
    internal class TextExtractTest
    {
        private static string parentPath =
            Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(Directory.GetCurrentDirectory())));
        private static string outputPath = Path.Combine(parentPath, "Output", "CS");
        static void Main(string[] args)
        {
            #region Perparation work
            Console.WriteLine("Running PDFPage test sample…\r\n");

            SDKLicenseHelper.LicenseVerify();
            CPDFDocument document = CPDFDocument.InitWithFilePath("CommonFivePage.pdf");

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }
            #endregion

            if (PDFToText(document))
            {
                Console.WriteLine("PDF to text done.");
            }
            else
            {
                Console.WriteLine("PDF to text failed.");
            }
            Console.WriteLine("--------------------");
            Console.WriteLine("Done!");
            Console.WriteLine("--------------------");
            Console.ReadLine();
        }

        //
        static private bool PDFToText(CPDFDocument document)
        {
            string path = Path.Combine(outputPath, "PDFToText.txt");
            if (!document.PdfToText("1-" + document.PageCount.ToString(), path))//Page ranges are counted from 1
            {
                return false;
            }
            Console.WriteLine("Browse the generated file in " + path);
            return true;
        }
    }
}