Class PdfExtractor

Info

Mewakili plugin Documentize.PdfExtractor. Digunakan untuk Mengekstrak Teks, Gambar, Data Formulir, Properti (Meta Data) dari dokumen PDF.

public static class PdfExtractor

Pewarisan

object
PdfExtractor

Anggota yang Diwariskan

Metode

Extract(ExtractTextOptions)

Mengekstrak Teks dari dokumen PDF.

public static string Extract(ExtractTextOptions options)

Parameter

Mengembalikan

string : Teks yang diekstrak.

Contoh

Contoh ini menunjukkan cara Mengekstrak konten Teks dari file PDF.

// Create ExtractTextOptions object to set input file path
var options = new ExtractTextOptions("path_to_your_pdf_file.pdf");
// Perform the process and get the extracted text
var textExtracted = PdfExtractor.Extract(options);

Contoh ini menunjukkan cara Mengekstrak konten Teks dari aliran PDF.

// Create ExtractTextOptions object to set input stream
var stream = File.OpenRead("path_to_your_pdf_file.pdf");
var options = new ExtractTextOptions(stream);
// Perform the process and get the extracted text
var textExtracted = PdfExtractor.Extract(options);

Contoh ini menunjukkan cara Mengekstrak konten Teks dokumen PDF dengan TextFormattingMode.

// Create ExtractTextOptions object to set input file path and TextFormattingMode
var options = new ExtractTextOptions("path_to_your_pdf_file.pdf", TextFormattingMode.Pure);
// Perform the process and get the extracted text
var textExtracted = PdfExtractor.Extract(options);

Contoh ini menunjukkan cara Mengekstrak Teks dari file PDF dengan gaya paling singkat.

// Perform the process and get the extracted text
var textExtracted = PdfExtractor.Extract(new ExtractTextOptions("path_to_your_pdf_file.pdf", TextFormattingMode.Pure));

Pengecualian

ArgumentException

Jika opsi tidak diatur.

Extract(ExtractImagesOptions)

Mengekstrak gambar dari dokumen PDF.

public static ResultContainer Extract(ExtractImagesOptions options)

Parameter

Mengembalikan

ResultContainer : Objek yang berisi hasil operasi.

Contoh

Contoh ini menunjukkan cara Mengekstrak Gambar dari dokumen PDF.

// Create ExtractImagesOptions to set instructions
var options = new ExtractImagesOptions();
// Add input file path
options.AddInput(new FileData("path_to_your_pdf_file.pdf"));
// Set output Directory path
options.AddOutput(new DirectoryData("path_to_results_directory"));
// Perform the process
var results = PdfExtractor.Extract(options);
// Get path to image result
var imageExtracted = results.ResultCollection[0].ToFile();

Contoh ini menunjukkan cara Mengekstrak Gambar dari dokumen PDF ke Stream tanpa folder.

// Create ExtractImagesOptions to set instructions
var options = new ExtractImagesOptions();
// Add input file path
options.AddInput(new FileData("path_to_your_pdf_file.pdf"));
// Not set output - it will write results to streams
// Perform the process
var results = PdfExtractor.Extract(options);
// Get Stream
var ms = results.ResultCollection[0].ToStream();
// Copy data to file for demo
ms.Seek(0, SeekOrigin.Begin);
using (var fs = File.Create("test_file.png"))
{
    ms.CopyTo(fs);
}

Pengecualian

ArgumentException

Jika opsi tidak diatur.

Extract(ExtractFormDataToDsvOptions)

Mengekstrak Data Formulir dari dokumen PDF.

public static ResultContainer Extract(ExtractFormDataToDsvOptions options)

Parameter

Mengembalikan

ResultContainer : Objek yang berisi hasil operasi.

Contoh

Contoh ini menunjukkan cara Mengekspor nilai Formulir ke file CSV.

// Create ExtractFormDataToDsvOptions object to set instructions
var options = new ExtractFormDataToDsvOptions(',', true);
// Add input file path
options.AddInput(new FileData("path_to_your_pdf_file.pdf"));
// Set output file path
options.AddOutput(new FileData("path_to_result_csv_file.csv"));
// Perform the process
PdfExtractor.Extract(options);

Contoh ini menunjukkan cara Mengekspor nilai Formulir ke file TSV dan mengatur Properti.

// Create ExtractFormDataToDsvOptions object to set instructions
var options = new ExtractFormDataToDsvOptions();
//Set Delimiter
options.Delimiter = '\t';
//Add Field Names to result
options.AddFieldName = true;
// Add input file path
options.AddInput(new FileData("path_to_your_pdf_file.pdf"));
// Set output file path
options.AddOutput(new FileData("path_to_result_csv_file.tsv"));
// Perform the process
PdfExtractor.Extract(options);

Pengecualian

ArgumentException

Jika opsi tidak diatur.

Extract(ExtractPropertiesOptions)

Mengekstrak Properti dari dokumen PDF.

public static PdfProperties Extract(ExtractPropertiesOptions options)

Parameter

Mengembalikan

PdfProperties : Objek yang berisi hasil operasi.

Contoh

Contoh ini menunjukkan cara Mengekstrak Properti (FileName, Title, Author, Subject, Keywords, Created, Modified, Application, PDF Producer, Number of Pages) dari file PDF.

// Create ExtractPropertiesOptions object to set input file
var options = new ExtractPropertiesOptions("path_to_your_pdf_file.pdf");
// Perform the process and get Properties
var pdfProperties = PdfExtractor.Extract(options);
var filename = pdfProperties.FileName;
var title = pdfProperties.Title;
var author = pdfProperties.Author;
var subject = pdfProperties.Subject;
var keywords = pdfProperties.Keywords;
var created = pdfProperties.Created;
var modified = pdfProperties.Modified;
var application = pdfProperties.Application;
var pdfProducer = pdfProperties.PdfProducer;
var numberOfPages = pdfProperties.NumberOfPages;

Contoh ini menunjukkan cara Mengekstrak Properti (Title, Author, Subject, Keywords, Created, Modified, Application, PDF Producer, Number of Pages) dari aliran PDF.

// Create ExtractPropertiesOptions object to set input stream
var stream = File.OpenRead("path_to_your_pdf_file.pdf");
var options = new ExtractPropertiesOptions(stream);
// Perform the process and get Properties
var pdfProperties = PdfExtractor.Extract(options);
var title = pdfProperties.Title;
var author = pdfProperties.Author;
var subject = pdfProperties.Subject;
var keywords = pdfProperties.Keywords;
var created = pdfProperties.Created;
var modified = pdfProperties.Modified;
var application = pdfProperties.Application;
var pdfProducer = pdfProperties.PdfProducer;
var numberOfPages = pdfProperties.NumberOfPages;

Contoh ini menunjukkan cara Mengekstrak Properti dari file PDF dengan gaya paling singkat.

// Perform the process and get Properties
var pdfProperties = PdfExtractor.Extract(new ExtractPropertiesOptions("path_to_your_pdf_file.pdf"));

Pengecualian

ArgumentException

Jika opsi tidak diatur.

Namespace: Documentize
Assembly: Documentize.dll

 Indonesia