PDF Find#
Available Methods#
/pdf-find#
Find text in PDF and get coordinates. Supports regular expressions.
Method: POST
Endpoint: /v1/pdf-find
Attributes#
Note
Attributes are case-sensitive and should be inside JSON for POST request, for example:
{
"url": "https://example.com/file1.pdf"
}
Attribute |
Description |
Required |
---|---|---|
|
URL to the source file. 1 |
yes |
|
HTTP auth user name if required to access source |
no |
|
HTTP auth password if required to access source |
no |
|
Text to search can support regular expressions if you set the |
yes |
|
Specify page indices as comma-separated values or ranges to process (e.g. |
no |
|
Set to |
no |
|
Values can be either |
no |
|
Password of PDF file, the input must be in string format. |
no |
|
Must be one of: |
no |
|
Set |
no |
|
Use this parameter to set additional configurations for fine-tuning and extra options. Explore the Profiles section for more. |
no |
Query parameters#
No query parameters accepted.
Payload#
{
"async": "false",
"url": "pdfco-test-files.s3.us-west-2.amazonaws.compdf-to-text/sample.pdf",
"searchString": "Invoice Date \\d+/\\d+/\\d+",
"regexSearch": "true",
"name": "output",
"pages": "0-",
"inline": "true",
"wordMatchingMode": "",
"password": ""
}
Response 2#
{
"body": [
{
"text": "Invoice Date 01/01/2016",
"left": 436.5400085449219,
"top": 130.4599995137751,
"width": 122.85311957550027,
"height": 11.040000486224898,
"pageIndex": 0,
"bounds": {
"location": {
"isEmpty": false,
"x": 436.54,
"y": 130.46
},
"size": "122.853119, 11.0400009",
"x": 436.54,
"y": 130.46,
"width": 122.853119,
"height": 11.0400009,
"left": 436.54,
"top": 130.46,
"right": 559.3931,
"bottom": 141.5,
"isEmpty": false
},
"elementCount": 1,
"elements": [
{
"index": 0,
"left": 436.5400085449219,
"top": 130.4599995137751,
"width": 122.85311957550027,
"height": 11.040000486224898,
"angle": 0,
"text": "Invoice Date 01/01/2016",
"isNewLine": true,
"fontIsBold": true,
"fontIsItalic": false,
"fontName": "Helvetica-Bold",
"fontSize": 11,
"fontColor": "0, 0, 0",
"fontColorAsOleColor": 0,
"fontColorAsHtmlColor": "#000000",
"bounds": {
"location": {
"isEmpty": false,
"x": 436.54,
"y": 130.46
},
"size": "122.853119, 11.0400009",
"x": 436.54,
"y": 130.46,
"width": 122.853119,
"height": 11.0400009,
"left": 436.54,
"top": 130.46,
"right": 559.3931,
"bottom": 141.5,
"isEmpty": false
}
}
]
}
],
"pageCount": 1,
"error": false,
"status": 200,
"name": "output",
"remainingCredits": 59970
}
CURL#
curl --location --request POST 'https://api.pdf.co/v1/pdf/find' \
--header 'x-api-key: ' \
--header 'Content-Type: application/json' \
--data-raw '{
"async": "false",
"url": "pdfco-test-files.s3.us-west-2.amazonaws.compdf-to-text/sample.pdf",
"searchString": "Invoice Date \\d+/\\d+/\\d+",
"regexSearch": "true",
"name": "output",
"pages": "0-",
"inline": "true",
"wordMatchingMode": "",
"password": ""
}'
Code samples#
// `request` module is required for file upload.
// Use "npm install request" command to install.
var request = require("request");
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const API_KEY = "***********************************";
// Direct URL of source PDF file.
const SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const Pages = "";
// PDF document password. Leave empty for unprotected documents.
const Password = "";
// Search string.
const SearchString = '[4-9][0-9].[0-9][0-9]'; // Regular expression to find numbers in format dd.dd and between 40.00 to 99.99
// Enable regular expressions (Regex)
const RegexSearch = 'True';
// Prepare URL for PDF text search API call.
// See documentation: https://developer.pdf.co
var query = `https://api.pdf.co/v1/pdf/find`;
let reqOptions = {
uri: query,
headers: { "x-api-key": API_KEY },
formData: {
password: Password,
pages: Pages,
url: SourceFileUrl,
searchString: SearchString,
regexSearch: RegexSearch
}
};
// Send request
request.post(reqOptions, function (error, response, body) {
if (error) {
return console.error("Error: ", error);
}
// Parse JSON response
let data = JSON.parse(body);
for (let index = 0; index < data.body.length; index++) {
const element = data.body[index];
console.log("Found text " + element["text"] + " at coordinates " + element["left"] + ", " + element["top"]);
}
});
import os
import requests # pip install requests
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "******************************************"
# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"
# Source PDF file
SourceFile = ".\\sample.pdf"
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""
# PDF document password. Leave empty for unprotected documents.
Password = ""
# Search string.
SearchString = "\d{1,}\.\d\d" # Regular expression to find numbers like '100.00'
# Note: do not use `+` char in regex, but use `{1,}` instead.
# `+` char is valid for URL and will not be escaped, and it will become a space char on the server side.
# Enable regular expressions (Regex)
RegexSearch = True
def main(args = None):
uploadedFileUrl = uploadFile(SourceFile)
if (uploadedFileUrl != None):
searchTextInPDF(uploadedFileUrl)
def searchTextInPDF(uploadedFileUrl):
"""Search Text using PDF.co Web API"""
# Prepare requests params as JSON
# See documentation: https://developer.pdf.co
parameters = {}
parameters["password"] = Password
parameters["pages"] = Pages
parameters["url"] = uploadedFileUrl
parameters["searchString"] = SearchString
parameters["regexSearch"] = RegexSearch
# Prepare URL for 'PDF Text Search' API request
url = "{}/pdf/find".format(BASE_URL)
# Execute request and get response as JSON
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# Display found information
for item in json["body"]:
print(f"Found text {item['text']} at coordinates {item['left']}, {item['top']}")
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
def uploadFile(fileName):
"""Uploads file to the cloud"""
# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
# Prepare URL for 'Get Presigned URL' API request
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format(
BASE_URL, os.path.basename(fileName))
# Execute request and get response as JSON
response = requests.get(url, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# URL to use for file upload
uploadUrl = json["presignedUrl"]
# URL for future reference
uploadedFileUrl = json["url"]
# 2. UPLOAD FILE TO CLOUD.
with open(fileName, 'rb') as file:
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" })
return uploadedFileUrl
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
return None
if __name__ == '__main__':
main()
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace ByteScoutWebApiExample
{
class Program
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const String API_KEY = "*********************************";
// Source PDF file
const string SourceFile = @".\sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const string Pages = "";
// PDF document password. Leave empty for unprotected documents.
const string Password = "";
// Search string.
const string SearchString = @"\d{1,}\.\d\d"; // Regular expression to find numbers like '100.00'
// Note: do not use `+` char in regex, but use `{1,}` instead.
// `+` char is valid for URL and will not be escaped, and it will become a space char on the server side.
// Enable regular expressions (Regex)
const bool RegexSearch = true;
static void Main(string[] args)
{
// Create standard .NET web client instance
WebClient webClient = new WebClient();
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have a direct file URL, skip to the step 3.
// Prepare URL for `Get Presigned URL` API call
string query = Uri.EscapeUriString(string.Format(
"https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
Path.GetFileName(SourceFile)));
try
{
// Execute request
string response = webClient.DownloadString(query);
// Parse JSON response
JObject json = JObject.Parse(response);
if (json["error"].ToObject<bool>() == false)
{
// Get URL to use for the file upload
string uploadUrl = json["presignedUrl"].ToString();
string uploadedFileUrl = json["url"].ToString();
// 2. UPLOAD THE FILE TO CLOUD.
webClient.Headers.Add("content-type", "application/octet-stream");
webClient.UploadFile(uploadUrl, "PUT", SourceFile); // You can use UploadData() instead if your file is byte[] or Stream
// 3. MAKE UPLOADED PDF FILE SEARCHABLE
// URL for `PDF Text Search` API call
// See documentation: https://developer.pdf.co
string url = "https://api.pdf.co/v1/pdf/find";
// Prepare requests params as JSON
Dictionary<string, object> parameters = new Dictionary<string, object>();
parameters.Add("password", Password);
parameters.Add("pages", Pages);
parameters.Add("url", uploadedFileUrl);
parameters.Add("searchString", SearchString);
parameters.Add("regexSearch", RegexSearch);
// Convert dictionary of params to JSON
string jsonPayload = JsonConvert.SerializeObject(parameters);
// Execute POST request with JSON payload
response = webClient.UploadString(url, jsonPayload);
// Parse JSON response
json = JObject.Parse(response);
if (json["error"].ToObject<bool>() == false)
{
foreach (JToken item in json["body"])
{
Console.WriteLine($"Found text \"{item["text"]}\" at coordinates {item["left"]}, {item["top"]}");
}
}
else
{
Console.WriteLine(json["message"].ToString());
}
}
else
{
Console.WriteLine(json["message"].ToString());
}
}
catch (WebException ex)
{
Console.WriteLine(ex.ToString());
}
webClient.Dispose();
Console.WriteLine();
Console.WriteLine("Press any key...");
Console.ReadKey();
}
}
}
package com.company;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import okhttp3.*;
import java.io.*;
import java.net.*;
public class Main
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
final static String API_KEY = "***********************************";
// Direct URL of source PDF file.
final static String SourceFileURL = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
final static String Pages = "";
// PDF document password. Leave empty for unprotected documents.
final static String Password = "";
// Search string.
final static String SearchString = "\\d{1,}\\.\\d\\d"; // Regular expression to find numbers like '100.00'
// Note: do not use `+` char in regex, but use `{1,}` instead.
// `+` char is valid for URL and will not be escaped, and it will become a space char on the server side.
// Enable regular expressions (Regex)
final static boolean RegexSearch = true;
public static void main(String[] args) throws IOException
{
// Create HTTP client instance
OkHttpClient webClient = new OkHttpClient();
// Prepare URL for PDF text search API call.
// See documentation: https://developer.pdf.co
String query = "https://api.pdf.co/v1/pdf/find";
// Make correctly escaped (encoded) URL
URL url = null;
try
{
url = new URI(null, query, null).toURL();
}
catch (URISyntaxException e)
{
e.printStackTrace();
}
// Create JSON payload
String jsonPayload = String.format("{\"password\": \"%s\", \"pages\": \"%s\", \"url\": \"%s\", \"searchString\": \"%s\", \"regexSearch\": \"%s\"}",
Password,
Pages,
SourceFileURL,
SearchString,
RegexSearch);
// Prepare request body
RequestBody body = RequestBody.create(MediaType.parse("application/json"), jsonPayload);
// Prepare request
Request request = new Request.Builder()
.url(url)
.addHeader("x-api-key", API_KEY) // (!) Set API Key
.addHeader("Content-Type", "application/json")
.post(body)
.build();
// Execute request
Response response = webClient.newCall(request).execute();
if (response.code() == 200)
{
// Parse JSON response
JsonObject json = new JsonParser().parse(response.body().string()).getAsJsonObject();
boolean error = json.get("error").getAsBoolean();
if (!error)
{
// Display found items in console
for (JsonElement element : json.get("body").getAsJsonArray())
{
JsonObject item = (JsonObject) element;
System.out.println("Found text " + item.get("text") + " at coordinates " + item.get("left") + ", "+ item.get("top"));
}
}
else
{
// Display service reported error
System.out.println(json.get("message").getAsString());
}
}
else
{
// Display request error
System.out.println(response.code() + " " + response.message());
}
}
}
/pdf-find-table#
AI powered document analysis can scan your document for tables and return the array of tables on pages with coordinates and information about columns detected in these tables.
This function finds tables in documents using an AI-powered table detection engine.
This endpoint locates tables in an input PDF document and returns JSON with:
The array of
tables
objects.X
,Y
,Width
, andHeight
coordinates for every table found.Rect
param for every table that you can re-use withpdf/convert/to/json
,pdf/convert/to/csv
,pdf/convert/to/csv
, and other endpoints to extract a selected table only.PageIndex
page index for a page with a table. The very first page is0
.Columns
array with the set ofX
coordinates for every column inside the table that was found.
To extract the table into CSV, JSON, or XML please use pdf/convert/to/csv
, pdf/convert/to/json2
, and pdf/convert/to/xml
endpoints with rect
parameter value from rect
output param for this table accordingly.
Method: POST
Endpoint: /v1/pdf-find/table
Attributes#
Note
Attributes are case-sensitive and should be inside JSON for POST request, for example:
{
"url": "https://example.com/file1.pdf"
}
Attribute |
Description |
Required |
---|---|---|
|
URL to the source file. 1 |
yes |
|
HTTP auth user name if required to access source |
no |
|
HTTP auth password if required to access source |
no |
|
Specify page indices as comma-separated values or ranges to process (e.g. |
no |
|
Set to |
no |
|
Password of PDF file, the input must be in string format. |
no |
|
Set |
no |
|
File name for the generated output, the input must be in string format. |
no |
|
Set the expiration time for the output link in minutes (default is |
no |
|
Use this parameter to set additional configurations for fine-tuning and extra options. Explore the Profiles section for more. |
no |
Note
There is also a “legacy find tables” mode which can be used. Legacy mode can be enabled by setting an object on your profiles
attribute like this:
"profiles": "{ 'Mode': 'Legacy'}"
With a more detailed config with minimum rows, minimum columns, and column detection mode:
"profiles": { 'Mode': 'Legacy',
'ColumnDetectionMode': 'BorderedTables',
'DetectionMinNumberOfRows': 1,
'DetectionMinNumberOfColumns': 1,
'DetectionMaxNumberOfInvalidSubsequentRowsAllowed': 0,
'DetectionMinNumberOfLineBreaksBetweenTables': 0,
'EnhanceTableBorders': false
}
Query parameters#
No query parameters accepted.
Payload#
{
"url": "pdfco-test-files.s3.us-west-2.amazonaws.compdf-to-text/sample.pdf",
"async": "false",
"inline": "true",
"password": ""
}
Response 2#
{
"body": {
"tables": [
{
"PageIndex": 0,
"X": 36,
"Y": 34.4400024,
"Width": 523.44,
"Height": 160.82,
"Columns": [
357.675
],
"rect": "36, 34.4400024, 523.44, 160.82"
},
{
"PageIndex": 0,
"X": 36,
"Y": 316.249969,
"Width": 523.44,
"Height": 120.620026,
"Columns": [
157.117,
340.68,
475.84
],
"rect": "36, 316.249969, 523.44, 120.620026"
}
]
},
"pageCount": 1,
"error": false,
"status": 200,
"name": "sample.json",
"remainingCredits": 98892697,
"credits": 21
}
CURL#
curl --location --request POST 'https://api.pdf.co/v1/pdf/find/table' \
--header 'x-api-key: ' \
--header 'Content-Type: application/json' \
--data-raw '{
"url": "pdfco-test-files.s3.us-west-2.amazonaws.compdf-to-text/sample.pdf",
"async": "false",
"inline": "true",
"password": ""
}'
Code samples#
var https = require("https");
var path = require("path");
var fs = require("fs");
// `request` module is required for file upload.
// Use "npm install request" command to install.
var request = require("request");
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const API_KEY = "***********************************";
// Direct URL of source PDF file.
// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/
const SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const Pages = "";
// PDF document password. Leave empty for unprotected documents.
const Password = "";
// Prepare URL for PDF Table Search API call.
// See documentation: https://apidocs.pdf.co
var query = `https://api.pdf.co/v1/pdf/find/table`;
let reqOptions = {
uri: query,
headers: { "x-api-key": API_KEY },
formData: {
password: Password,
pages: Pages,
url: SourceFileUrl
}
};
// Send request
request.post(reqOptions, function (error, resp, body) {
if (error) {
return console.error("Error: ", error);
}
var jsonBody = JSON.parse(body);
// Loop through all found tables, and get json data
if (jsonBody.body.tables && jsonBody.body.tables.length > 0) {
for (var i = 0; i < jsonBody.body.tables.length; i++) {
getJSONFromCoordinates(SourceFileUrl, jsonBody.body.tables[i].PageIndex, jsonBody.body.tables[i].rect, `table_${i + 1}.json`);
}
}
});
/**
* Get JSON from specific co-ordinates
*/
function getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName) {
// Prepare request to `PDF To JSON` API endpoint
var jsonQueryPath = `https://api.pdf.co/v1/pdf/convert/to/json`;
// Json Request
let jsonReqOptions = {
uri: jsonQueryPath,
headers: { "x-api-key": API_KEY },
formData: {
pages: pageIndex,
url: fileUrl,
rect: rect
}
};
// Send request
request.post(jsonReqOptions, function (error, resp, body) {
if (error) {
return console.error("Error: ", error);
}
var outputJsonUrl = JSON.parse(body).url;
// Download JSON file
var file = fs.createWriteStream(outputFileName);
https.get(outputJsonUrl, (response2) => {
response2.pipe(file)
.on("close", () => {
console.log(`Generated JSON file saved as "${outputFileName}" file.`);
});
});
});
}
import requests
import os
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "***************************************"
# Direct URL of source PDF file.
SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""
# PDF document password. Leave empty for unprotected documents.
Password = ""
# Prepare URL for PDF Table Search API call.
query = "https://api.pdf.co/v1/pdf/find/table"
reqOptions = {
'password': Password,
'pages': Pages,
'url': SourceFileUrl
}
headers = {
'x-api-key': API_KEY
}
def getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName):
# Prepare request to `PDF To JSON` API endpoint
jsonQueryPath = "https://api.pdf.co/v1/pdf/convert/to/json"
# Json Request
jsonReqOptions = {
'pages': pageIndex,
'url': fileUrl,
'rect': rect
}
# Send request
response = requests.post(jsonQueryPath, headers=headers, data=jsonReqOptions)
if response.status_code == 200:
outputJsonUrl = response.json()['url']
# Download JSON file
res = requests.get(outputJsonUrl)
with open(outputFileName, 'wb') as outfile:
outfile.write(res.content)
print(f'Generated JSON file saved as "{outputFileName}" file.')
else:
print(f"Request error: {response.status_code} {response.reason}")
# Send request
response = requests.post(query, headers=headers, data=reqOptions)
if response.status_code == 200:
jsonBody = response.json()
# Loop through all found tables, and get json data
if 'tables' in jsonBody['body'] and len(jsonBody['body']['tables']) > 0:
for i, table in enumerate(jsonBody['body']['tables']):
getJSONFromCoordinates(SourceFileUrl, table['PageIndex'], table['rect'], f"table_{i + 1}.json")
else:
print(f"Request error: {response.status_code} {response.reason}")
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Net;
namespace ByteScoutWebApiExample
{
class Program
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const String API_KEY = "*****************************************";
// Direct URL of source PDF file.
// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/
const string SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const string Pages = "";
// PDF document password. Leave empty for unprotected documents.
const string Password = "";
static void Main(string[] args)
{
// Create standard .NET web client instance
WebClient webClient = new WebClient();
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
// URL for PDF Table Search API call.
// See documentation: https://apidocs.pdf.co
string url = "https://api.pdf.co/v1/pdf/find/table";
// Prepare requests params as JSON
Dictionary<string, object> parameters = new Dictionary<string, object>();
parameters.Add("password", Password);
parameters.Add("pages", Pages);
parameters.Add("url", SourceFileUrl);
// Convert dictionary of params to JSON
string jsonPayload = JsonConvert.SerializeObject(parameters);
try
{
// Execute POST request with JSON payload
string response = webClient.UploadString(url, jsonPayload);
// Parse JSON response
JObject json = JObject.Parse(response);
if (json["status"].ToString() != "error")
{
Console.WriteLine(response);
}
else
{
Console.WriteLine(json["message"].ToString());
}
}
catch (WebException e)
{
Console.WriteLine(e.ToString());
}
webClient.Dispose();
Console.WriteLine();
Console.WriteLine("Press any key...");
Console.ReadKey();
}
}
}
package com.company;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import okhttp3.*;
import java.io.*;
import java.net.*;
public class Main
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
final static String API_KEY = "***********************************";
// Direct URL of source PDF file.
// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/
final static String SourceFileURL = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
final static String Pages = "";
// PDF document password. Leave empty for unprotected documents.
final static String Password = "";
public static void main(String[] args) throws IOException
{
// Create HTTP client instance
OkHttpClient webClient = new OkHttpClient();
// Prepare URL for PDF Table Search API call.
// See documentation: https://apidocs.pdf.co
String query = "https://api.pdf.co/v1/pdf/find/table";
// Make correctly escaped (encoded) URL
URL url = null;
try
{
url = new URI(null, query, null).toURL();
}
catch (URISyntaxException e)
{
e.printStackTrace();
}
// Create JSON payload
String jsonPayload = String.format("{\"password\": \"%s\", \"pages\": \"%s\", \"url\": \"%s\"}",
Password,
Pages,
SourceFileURL);
// Prepare request body
RequestBody body = RequestBody.create(MediaType.parse("application/json"), jsonPayload);
// Prepare request
Request request = new Request.Builder()
.url(url)
.addHeader("x-api-key", API_KEY) // (!) Set API Key
.addHeader("Content-Type", "application/json")
.post(body)
.build();
// Execute request
Response response = webClient.newCall(request).execute();
if (response.code() == 200)
{
// Parse JSON response
JsonObject json = new JsonParser().parse(response.body().string()).getAsJsonObject();
boolean error = json.get("error").getAsBoolean();
if (!error)
{
System.out.println(response.body().string());
}
else
{
// Display service reported error
System.out.println(json.get("message").getAsString());
}
}
else
{
// Display request error
System.out.println(response.code() + " " + response.message());
}
}
}
On Github#
Footnotes
- 1(1,2)
Supports links from Google Drive, Dropbox, and PDF.co Built-In Files Storage. To upload files via the API check out the File Upload section. Note: If you experience intermittent Access Denied or Too Many Requests errors, please try to add
cache:
to enable built-in URL caching. (e.gcache:https://example.com/file1.pdf
) For data security, you have the option to encrypt output files and decrypt input files. Learn more about user-controlled data encryption.- 2(1,2)
Main response codes as follows:
Code
Description
200
Success
400
Bad request. Typically happens because of bad input parameters, or because the input URLs can’t be reached, possibly due to access restrictions like needing a login or password.
401
Unauthorized
402
Not enough credits
445
Timeout error. To process large documents or files please use asynchronous mode (set the
async
parameter totrue
) and then check status using the /job/check endpoint. If a file contains many pages then specify a page range using thepages
parameter. The number of pages of the document can be obtained using the /pdf/info endpoint.Note
For more see the complete list of available response codes.