AI powered document analysis can scan your document for tables and return the array of tables on pages with coordinates and information about columns detected in these tables.
This function finds tables in documents using an AI-powered table detection engine.
This endpoint locates tables in an input PDF document and returns JSON with:
The array of tables objects.
X, Y, Width, and Height coordinates for every table found.
Rect param for every table that you can re-use with pdf/convert/to/json, pdf/convert/to/csv, pdf/convert/to/csv, and other endpoints to extract a selected table only.
PageIndex page index for a page with a table. The very first page is 0 .
Columns array with the set of X coordinates for every column inside the table that was found.
To extract the table into CSV, JSON, or XML please use pdf/convert/to/csv, pdf/convert/to/json2, and pdf/convert/to/xml endpoints with rect parameter value from rect output param for this table accordingly.
The callback URL (or Webhook) used to receive the POST data. see Webhooks & Callbacks. This is only applicable when async is set to true.
httpusername
string
No
-
HTTP auth user name if required to access source URL.
httppassword
string
No
-
HTTP auth password if required to access source URL.
pages
string
No
all pages
Specify page indices as comma-separated values or ranges to process (e.g. “0, 1, 2-” or “1, 2, 3-7”). The first-page index is 0. Use ”!” before a number for inverted page numbers (e.g. “!0” for the last page). If not specified, the default configuration processes all pages. The input must be in string format.
inline
boolean
No
false
Set to true to return results inside the response. Otherwise, the endpoint will return a URL to the output file generated.
Controls the encryption algorithm used for data encryption. See User-Controlled Encryption for more information. The available algorithms are: AES128, AES192, AES256.
DataEncryptionKey
string
No
-
Controls the encryption key used for data encryption. See User-Controlled Encryption for more information.
DataEncryptionIV
string
No
-
Controls the encryption IV used for data encryption. See User-Controlled Encryption for more information.
DataDecryptionAlgorithm
string
No
-
Controls the decryption algorithm used for data decryption. See User-Controlled Encryption for more information. The available algorithms are: AES128, AES192, AES256.
DataDecryptionKey
string
No
-
Controls the decryption key used for data decryption. See User-Controlled Encryption for more information.
DataDecryptionIV
string
No
-
Controls the decryption IV used for data decryption. See User-Controlled Encryption for more information.
var https = require("https");var path = require("path");var fs = require("fs");// `request` module is required for file upload.// Use "npm install request" command to install.var request = require("request");// The authentication key (API Key).// Get your own by registering at https://app.pdf.coconst API_KEY = "***********************************";// Direct URL of source PDF file.// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/ const SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.const Pages = "";// PDF document password. Leave empty for unprotected documents.const Password = "";// Prepare URL for PDF Table Search API call.// See documentation: https://apidocs.pdf.covar query = `https://api.pdf.co/v1/pdf/find/table`;let reqOptions = { uri: query, headers: { "x-api-key": API_KEY }, formData: { password: Password, pages: Pages, url: SourceFileUrl }};// Send requestrequest.post(reqOptions, function (error, resp, body) { if (error) { return console.error("Error: ", error); } var jsonBody = JSON.parse(body); // Loop through all found tables, and get json data if (jsonBody.body.tables && jsonBody.body.tables.length > 0) { for (var i = 0; i < jsonBody.body.tables.length; i++) { getJSONFromCoordinates(SourceFileUrl, jsonBody.body.tables[i].PageIndex, jsonBody.body.tables[i].rect, `table_${i + 1}.json`); } }});/*** Get JSON from specific co-ordinates*/function getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName) { // Prepare request to `PDF To JSON` API endpoint var jsonQueryPath = `https://api.pdf.co/v1/pdf/convert/to/json`; // Json Request let jsonReqOptions = { uri: jsonQueryPath, headers: { "x-api-key": API_KEY }, formData: { pages: pageIndex, url: fileUrl, rect: rect } }; // Send request request.post(jsonReqOptions, function (error, resp, body) { if (error) { return console.error("Error: ", error); } var outputJsonUrl = JSON.parse(body).url; // Download JSON file var file = fs.createWriteStream(outputFileName); https.get(outputJsonUrl, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated JSON file saved as "${outputFileName}" file.`); }); }); });}
import requests import os # The authentication key (API Key). # Get your own by registering at https://app.pdf.co API_KEY = "***************************************" # Direct URL of source PDF file. SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf" # Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. Pages = "" # PDF document password. Leave empty for unprotected documents. Password = "" # Prepare URL for PDF Table Search API call. query = "https://api.pdf.co/v1/pdf/find/table" reqOptions = { 'password': Password, 'pages': Pages, 'url': SourceFileUrl } headers = { 'x-api-key': API_KEY } def getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName): # Prepare request to `PDF To JSON` API endpoint jsonQueryPath = "https://api.pdf.co/v1/pdf/convert/to/json" # Json Request jsonReqOptions = { 'pages': pageIndex, 'url': fileUrl, 'rect': rect } # Send request response = requests.post(jsonQueryPath, headers=headers, data=jsonReqOptions) if response.status_code == 200: outputJsonUrl = response.json()['url'] # Download JSON file res = requests.get(outputJsonUrl) with open(outputFileName, 'wb') as outfile: outfile.write(res.content) print(f'Generated JSON file saved as "{outputFileName}" file.') else: print(f"Request error: {response.status_code} {response.reason}") # Send request response = requests.post(query, headers=headers, data=reqOptions) if response.status_code == 200: jsonBody = response.json() # Loop through all found tables, and get json data if 'tables' in jsonBody['body'] and len(jsonBody['body']['tables']) > 0: for i, table in enumerate(jsonBody['body']['tables']): getJSONFromCoordinates(SourceFileUrl, table['PageIndex'], table['rect'], f"table_{i + 1}.json") else: print(f"Request error: {response.status_code} {response.reason}")
using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.Net; namespace PDFcoApiExample { class Program { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co const String API_KEY = "*****************************************"; // Direct URL of source PDF file. // You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/ const string SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const string Pages = ""; // PDF document password. Leave empty for unprotected documents. const string Password = ""; static void Main(string[] args) { // Create standard .NET web client instance WebClient webClient = new WebClient(); // Set API Key webClient.Headers.Add("x-api-key", API_KEY); // URL for PDF Table Search API call. // See documentation: https://apidocs.pdf.co string url = "https://api.pdf.co/v1/pdf/find/table"; // Prepare requests params as JSON Dictionary<string, object> parameters = new Dictionary<string, object>(); parameters.Add("password", Password); parameters.Add("pages", Pages); parameters.Add("url", SourceFileUrl); // Convert dictionary of params to JSON string jsonPayload = JsonConvert.SerializeObject(parameters); try { // Execute POST request with JSON payload string response = webClient.UploadString(url, jsonPayload); // Parse JSON response JObject json = JObject.Parse(response); if (json["status"].ToString() != "error") { Console.WriteLine(response); } else { Console.WriteLine(json["message"].ToString()); } } catch (WebException e) { Console.WriteLine(e.ToString()); } webClient.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); } } }
package com.company; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import okhttp3.*; import java.io.*; import java.net.*; public class Main { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co final static String API_KEY = "***********************************"; // Direct URL of source PDF file. // You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/ final static String SourceFileURL = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. final static String Pages = ""; // PDF document password. Leave empty for unprotected documents. final static String Password = ""; public static void main(String[] args) throws IOException { // Create HTTP client instance OkHttpClient webClient = new OkHttpClient(); // Prepare URL for PDF Table Search API call. // See documentation: https://apidocs.pdf.co String query = "https://api.pdf.co/v1/pdf/find/table"; // Make correctly escaped (encoded) URL URL url = null; try { url = new URI(null, query, null).toURL(); } catch (URISyntaxException e) { e.printStackTrace(); } // Create JSON payload String jsonPayload = String.format("{\"password\": \"%s\", \"pages\": \"%s\", \"url\": \"%s\"}", Password, Pages, SourceFileURL); // Prepare request body RequestBody body = RequestBody.create(MediaType.parse("application/json"), jsonPayload); // Prepare request Request request = new Request.Builder() .url(url) .addHeader("x-api-key", API_KEY) // (!) Set API Key .addHeader("Content-Type", "application/json") .post(body) .build(); // Execute request Response response = webClient.newCall(request).execute(); if (response.code() == 200) { // Parse JSON response JsonObject json = new JsonParser().parse(response.body().string()).getAsJsonObject(); boolean error = json.get("error").getAsBoolean(); if (!error) { System.out.println(response.body().string()); } else { // Display service reported error System.out.println(json.get("message").getAsString()); } } else { // Display request error System.out.println(response.code() + " " + response.message()); } } }