Extract Attachments#
Available Methods#
/pdf/attachments/extract#
Extracts attachments from source PDF file.
Method: POST
Endpoint: /v1/pdf/attachments/extract
Attributes#
Note
Attributes are case-sensitive and should be inside JSON for POST request, for example:
{
"url": "https://example.com/file1.pdf"
}
Attribute |
Description |
Required |
---|---|---|
|
URL to the source file. 1 |
yes |
|
HTTP auth user name if required to access source |
no |
|
HTTP auth password if required to access source |
no |
|
Set to |
no |
|
Set the expiration time for the output link in minutes (default is |
no |
|
Password of PDF file, the input must be in string format. |
no |
|
Set |
no |
|
Use this parameter to set additional configurations for fine-tuning and extra options. Explore the Profiles section for more. |
no |
Query parameters#
No query parameters accepted.
Payload#
{
"url": "https://pdfco-test-files.s3.us-west-2.amazonaws.com/pdf-attachments/attachments.pdf",
"inline": false,
"async": false
}
Response 2#
{
"urls": [
"https://pdf-temp-files.s3.amazonaws.com/DO1TAIHEZR5P9QLI7ICYM9DI0AAH57HY/sample.png",
"https://pdf-temp-files.s3.amazonaws.com/EOINIMD7X48JSOB1G8ETLVPOFZLM1NJ2/SampleMetafile.emf",
"https://pdf-temp-files.s3.amazonaws.com/3LW4BXNSPAE0WQTG5DPMXX498OCPNU4Q/ab.tif"
],
"pageCount": 3,
"error": false,
"status": 200,
"name": "attachments.json",
"credits": 24,
"duration": 1211,
"remainingCredits": 98003902
}
CURL#
curl --location --request POST 'https://api.pdf.co/v1/pdf/attachments/extract' \
--header 'Content-Type: application/json' \
--header 'x-api-key: ' \
--data-raw '{
"url": "https://pdfco-test-files.s3.us-west-2.amazonaws.com/pdf-attachments/attachments.pdf",
"inline": false,
"async": false
}'
Code samples#
var https = require("https");
var fs = require("fs");
var path = require("path");
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const API_KEY = "***********************************";
// Source PDF file
// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/
const SourceFileUrl = "https://bytescout-com.s3.us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-attachments/attachments.pdf";
// Prepare request for API endpoint
var queryPath = `/v1/pdf/attachments/extract`;
// JSON payload for api request
var jsonPayload = JSON.stringify({
url: SourceFileUrl
});
var reqOptions = {
host: "api.pdf.co",
method: "POST",
path: queryPath,
headers: {
"x-api-key": API_KEY,
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(jsonPayload, 'utf8')
}
};
// Send request
var postRequest = https.request(reqOptions, (response) => {
let responseData = '';
response.setEncoding("utf8");
response.on("data", (chunk) => {
responseData += chunk;
});
response.on("end", () => {
// Parse JSON response
var data = JSON.parse(responseData);
if (data.error == false) {
// Download extracted files
data.urls.forEach((url) => {
var localFileName = path.basename(url);
var file = fs.createWriteStream(localFileName);
https.get(url, (response2) => {
response2.pipe(file)
.on("close", () => {
console.log(`Generated file saved as "${localFileName}" file.`);
});
});
}, this);
}
else {
// Service reported error
console.log(data.message);
}
});
}).on("error", (e) => {
// Request error
console.error(e);
});
// Write request data
postRequest.write(jsonPayload);
postRequest.end();
import requests
import json
url = "https://api.pdf.co/v1/pdf/attachments/extract"
payload = json.dumps({
"url": "https://bytescout-com.s3.us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-attachments/attachments.pdf",
"inline": True,
"async": False
})
headers = {
'Content-Type': 'application/json',
'x-api-key': '__Replace_With_Your_PDFco_API_Key__'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
using System;
using RestSharp;
namespace HelloWorldApplication {
class HelloWorld {
static void Main(string[] args) {
var client = new RestClient("https://api.pdf.co/v1/pdf/attachments/extract");
client.Timeout = -1;
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "application/json");
request.AddHeader("x-api-key", "__Replace_With_Your_PDFco_API_Key__");
var body = @"{" + "\n" +
@" ""url"": ""https://bytescout-com.s3.us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-attachments/attachments.pdf""," + "\n" +
@" ""inline"": true," + "\n" +
@" ""async"": false" + "\n" +
@"}";
request.AddParameter("application/json", body, ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
Console.WriteLine(response.Content);
}
}
}
import java.io.*;
import okhttp3.*;
public class main {
public static void main(String []args) throws IOException{
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\n \"url\": \"https://bytescout-com.s3.us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-attachments/attachments.pdf\",\n \"inline\": true,\n \"async\": false\n}");
Request request = new Request.Builder()
.url("https://api.pdf.co/v1/pdf/attachments/extract")
.method("POST", body)
.addHeader("Content-Type", "application/json")
.addHeader("x-api-key", "__Replace_With_Your_PDFco_API_Key__")
.build();
Response response = client.newCall(request).execute();
System.out.println(response.body().string());
}
}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Cloud API asynchronous "Extract PDF Attachment" job example (allows to avoid timeout errors).</title>
</head>
<body>
<?php
// Cloud API asynchronous "Extract PDF Attachment" job example.
// Allows to avoid timeout errors when processing huge or scanned PDF documents.
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
$apiKey = "********************************";
// Direct URL of source PDF file. Check another example if you need to upload a local file to the cloud.
// You can also upload your own file into PDF.co and use it as url. Check "Upload File" samples for code snippets: https://github.com/bytescout/pdf-co-api-samples/tree/master/File%20Upload/
$sourceFileUrl = "https://bytescout-com.s3.us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-attachments/attachments.pdf";
// Prepare URL for `Extract PDF Attachment` API call
$url = "https://api.pdf.co/v1/pdf/attachments/extract";
// Prepare requests params
$parameters = array();
$parameters["url"] = $sourceFileUrl;
$parameters["async"] = true; // (!) Make asynchronous job
// Create Json payload
$data = json_encode($parameters);
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
if (!isset($json["error"]) || $json["error"] == false)
{
// URL of generated JSON file available after the job completion; it will contain URLs of result PDF files.
$resultFileUrl = $json["url"];
// Asynchronous job ID
$jobId = $json["jobId"];
// Check the job status in a loop
do
{
$status = CheckJobStatus($jobId, $apiKey); // Possible statuses: "working", "failed", "aborted", "success".
// Display timestamp and status (for demo purposes)
echo "<p>" . date(DATE_RFC2822) . ": " . $status . "</p>";
if ($status == "success")
{
// Display link to the file with conversion results
echo "<div><h2>Results:</h2><a href='" . $resultFileUrl . "' target='_blank'>" . $resultFileUrl . "</a></div>";
break;
}
else if ($status == "working")
{
// Pause for a few seconds
sleep(3);
}
else
{
echo $status . "<br/>";
break;
}
}
while (true);
}
else
{
// Display service reported error
echo "<p>Error: " . $json["message"] . "</p>";
}
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
// Cleanup
curl_close($curl);
function CheckJobStatus($jobId, $apiKey)
{
$status = null;
// Create URL
$url = "https://api.pdf.co/v1/job/check";
// Prepare requests params
$parameters = array();
$parameters["jobid"] = $jobId;
// Create Json payload
$data = json_encode($parameters);
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
if (!isset($json["error"]) || $json["error"] == false)
{
$status = $json["status"];
}
else
{
// Display service reported error
echo "<p>Error: " . $json["message"] . "</p>";
}
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
// Cleanup
curl_close($curl);
return $status;
}
?>
</body>
</html>
On Github#
Footnotes
- 1
Supports links from Google Drive, Dropbox, and PDF.co Built-In Files Storage. To upload files via the API check out the File Upload section. Note: If you experience intermittent Access Denied or Too Many Requests errors, please try to add
cache:
to enable built-in URL caching. (e.gcache:https://example.com/file1.pdf
) For data security, you have the option to encrypt output files and decrypt input files. Learn more about user-controlled data encryption.- 2
Main response codes as follows:
Code
Description
200
Success
400
Bad request. Typically happens because of bad input parameters, or because the input URLs can’t be reached, possibly due to access restrictions like needing a login or password.
401
Unauthorized
402
Not enough credits
445
Timeout error. To process large documents or files please use asynchronous mode (set the
async
parameter totrue
) and then check status using the /job/check endpoint. If a file contains many pages then specify a page range using thepages
parameter. The number of pages of the document can be obtained using the /pdf/info endpoint.Note
For more see the complete list of available response codes.