Here's the minimum viable receipt OCR implementation in Python. Get your API key at docuparseapi.com/signup — free, no credit card — and run this:
import os
import requests
def parse_receipt(file_path: str) -> dict:
with open(file_path, "rb") as f:
response = requests.post(
"https://docuparseapi.com/api/v1/extract",
headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
files={"file": f},
)
data = response.json()
if not data["success"]:
raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
return data
receipt = parse_receipt("receipt.jpg")
print(f"Store: {receipt['merchant']}")
print(f"Date: {receipt['date']}")
print(f"Total: {receipt['currency']} {receipt['total']}")
print(f"Tax: {receipt['tax']}")
print(f"Method: {receipt['payment_method']}")
for item in receipt.get("line_items", []):
print(f" {item['description']}: {item['amount']}")What the response looks like
{
"success": true,
"document_type": "receipt",
"merchant": "Whole Foods Market",
"date": "2026-05-14",
"total": "67.43",
"subtotal": "62.00",
"tax": "5.43",
"tax_rate": "8.76%",
"currency": "USD",
"receipt_id": "R-0042-7721",
"payment_method": "Visa Card",
"line_items": [
{ "description": "Organic Milk", "quantity": 1, "amount": "4.99" },
{ "description": "Free-Range Eggs", "quantity": 1, "amount": "7.49" },
{ "description": "Sourdough Bread", "quantity": 1, "amount": "6.99" }
],
"processing_time_ms": 2980
}Every field is already named, typed, and normalized. Access receipt['merchant'] directly — no parsing required. Missing fields return null rather than being omitted, so you never need to check for key existence.
Supported formats
The API accepts JPG, PNG, and PDF files up to 10MB. It handles both digital receipts and scanned or photographed paper receipts — OCR is applied automatically when needed.
Processing a folder of receipts
from pathlib import Path
def batch_parse(folder: str) -> list[dict]:
api_key = os.environ["DOCUPARSE_API_KEY"]
results = []
for path in Path(folder).glob("*.jpg"):
with open(path, "rb") as f:
response = requests.post(
"https://docuparseapi.com/api/v1/extract",
headers={"Authorization": f"Bearer {api_key}"},
files={"file": f},
timeout=30,
)
data = response.json()
results.append({"file": path.name, **data})
print(f"{'✓' if data['success'] else '✗'} {path.name}")
return results
receipts = batch_parse("./receipts/")
total_spend = sum(float(r.get("total") or 0) for r in receipts if r.get("success"))
print(f"\nTotal spend: ${total_spend:.2f}")Also works with *.png and *.pdf — just add more glob patterns.
Async version for FastAPI and asyncio
import httpx
async def parse_receipt_async(file_path: str) -> dict:
async with httpx.AsyncClient(timeout=30) as client:
with open(file_path, "rb") as f:
response = await client.post(
"https://docuparseapi.com/api/v1/extract",
headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
files={"file": (os.path.basename(file_path), f.read())},
)
data = response.json()
if not data.get("success"):
raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
return dataInstall httpx with pip install httpx if you don't have it.
Error handling
def parse_receipt_safe(file_path: str) -> dict | None:
try:
return parse_receipt(file_path)
except Exception as e:
msg = str(e)
if "LIMIT_EXCEEDED" in msg:
print("Monthly limit reached — upgrade at docuparseapi.com/pricing")
elif "EXTRACTION_FAILED" in msg:
print(f"Could not parse {file_path} — try a cleaner image")
elif "UNSUPPORTED_FILE_TYPE" in msg:
print("Use JPG, PNG, or PDF only")
elif "FILE_TOO_LARGE" in msg:
print("File exceeds 10MB — compress before sending")
else:
print(f"Unexpected error: {e}")
return NoneStoring results
import json
import sqlite3
from datetime import datetime
def store_receipt(conn: sqlite3.Connection, receipt: dict) -> int:
conn.execute("""
CREATE TABLE IF NOT EXISTS receipts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
document_id TEXT UNIQUE,
merchant TEXT,
date TEXT,
currency TEXT,
total REAL,
tax REAL,
payment_method TEXT,
line_items TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
cursor = conn.execute("""
INSERT OR IGNORE INTO receipts
(document_id, merchant, date, currency, total, tax, payment_method, line_items)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
receipt.get("document_id"),
receipt.get("merchant"),
receipt.get("date"),
receipt.get("currency"),
float(receipt.get("total") or 0),
float(receipt.get("tax") or 0),
receipt.get("payment_method"),
json.dumps(receipt.get("line_items", [])),
))
conn.commit()
return cursor.lastrowid
conn = sqlite3.connect("receipts.db")
receipt = parse_receipt("receipt.jpg")
if receipt:
row_id = store_receipt(conn, receipt)
print(f"Stored receipt #{row_id}: {receipt['merchant']} — {receipt['total']}")Use document_id as your deduplication key — it's unique per extraction, so storing it prevents double-processing the same receipt.