C# 提取PDF中的表格

 时间:2026-04-21 17:03:14

1、鼠标右键点击“引用”,“管理NuGet程序包”,

C# 提取PDF中的表格

2、点击“浏览”,在搜索框中输入,点击“安装”,

C# 提取PDF中的表格

3、或者 使用PM控制台安装:

PM>Install-Package Spire.PDF -Version 7.10.4

1、using Spire.Pdf;

using Spire.Pdf.Utilities;

using System.IO;

using System.Text;

namespace ExtractTable

{

    class Program

    {

        static void Main(string[] args)

        {

            //加载PDF文档

            PdfDocument pdf = new PdfDocument();

            pdf.LoadFromFile("sample.pdf");

            StringBuilder builder = new StringBuilder();

            //抽取表格

            PdfTableExtractor extractor = new PdfTableExtractor(pdf);

            PdfTable[] tableLists = null;

            for (int pageIndex = 0; pageIndex < pdf.Pages.Count; pageIndex++)

            {

                tableLists = extractor.ExtractTable(pageIndex);

                if (tableLists != null && tableLists.Length > 0)

                {

                    foreach (PdfTable table in tableLists)

                    {

                        int row = table.GetRowCount();

                        int column = table.GetColumnCount();

                        for (int i = 0; i < row; i++)

                        {

                            for (int j = 0; j < column; j++)

                            {

                                string text = table.GetText(i, j);

                                builder.Append(text + " ");

                            }

                            builder.Append("\r\n");

                        }

                    }

                }

            }

            //保存提取的表格内容到txt文档

            File.WriteAllText("ExtractedTable.txt", builder.ToString());

        }

    }

}

2、完成代码后,执行程序,生成txt文档。表格提取效果如图:

C# 提取PDF中的表格

1、Imports Spire.Pdf

Imports Spire.Pdf.Utilities

Imports System.IO

Imports System.Text

Namespace ExtractTable

    Class Program

        Private Shared Sub Main(args As String())

            '加载PDF文档

            Dim pdf As New PdfDocument()

            pdf.LoadFromFile("sample.pdf")

            Dim builder As New StringBuilder()

            '抽取表格

            Dim extractor As New PdfTableExtractor(pdf)

            Dim tableLists As PdfTable() = Nothing

            For pageIndex As Integer = 0 To pdf.Pages.Count - 1

                tableLists = extractor.ExtractTable(pageIndex)

                If tableLists IsNot Nothing AndAlso tableLists.Length > 0 Then

                    For Each table As PdfTable In tableLists

                        Dim row As Integer = table.GetRowCount()

                        Dim column As Integer = table.GetColumnCount()

                        For i As Integer = 0 To row - 1

                            For j As Integer = 0 To column - 1

                                Dim text As String = table.GetText(i, j)

                                builder.Append(text & Convert.ToString(" "))

                            Next

                            builder.Append(vbCr & vbLf)

                        Next

                    Next

                End If

            Next

            '保存提取的表格内容到txt文档

            File.WriteAllText("ExtractedTable.txt", builder.ToString())

        End Sub

    End Class

End Namespace

  • 怎样测试电脑某个端口是否打开?
  • thinkpad屏幕分辨率怎么修改?
  • OSPF中的NBMA网络简介
  • U盘启动项 BIOS找不到U盘启动项怎么办
  • win7系统uac通知功能要怎么关闭?
  • 热门搜索
    花生米怎么炸又酥又脆 怎么测试网速 人参怎么保存 怎么消除法令纹 小孩流鼻血是怎么回事 胡萝卜怎么炒好吃 怎么去法令纹 北京服装学院怎么样 我认为用英语怎么说 怎么变魔术