Receipt OCR with Python — Extract Receipt Data in 10 Lines

Here's the minimum viable receipt OCR implementation in Python. Get your API key at docuparseapi.com/signup — free, no credit card — and run this:

python · 25 lines

import os
import requests

def parse_receipt(file_path: str) -> dict:
    with open(file_path, "rb") as f:
        response = requests.post(
            "https://docuparseapi.com/api/v1/extract",
            headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
            files={"file": f},
        )
    data = response.json()
    if not data["success"]:
        raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
    return data

receipt = parse_receipt("receipt.jpg")

print(f"Store:    {receipt['merchant']}")
print(f"Date:     {receipt['date']}")
print(f"Total:    {receipt['currency']} {receipt['total']}")
print(f"Tax:      {receipt['tax']}")
print(f"Method:   {receipt['payment_method']}")

for item in receipt.get("line_items", []):
    print(f"  {item['description']}: {item['amount']}")

API Response

What the response looks like

json response⚡ extracted in ~3s

merchant"Whole Foods Market"Store name

total"67.43"Final amount

tax"5.43"Tax charged

date"2026-05-14"ISO date

currency"USD"ISO 4217

payment_method"Visa Card"Card type

line_items[...]Item array

✓ Every field named and normalized. Access receipt['merchant'] directly — no parsing required.

json · 19 lines

{
  "success": true,
  "document_type": "receipt",
  "merchant": "Whole Foods Market",
  "date": "2026-05-14",
  "total": "67.43",
  "subtotal": "62.00",
  "tax": "5.43",
  "tax_rate": "8.76%",
  "currency": "USD",
  "receipt_id": "R-0042-7721",
  "payment_method": "Visa Card",
  "line_items": [
    { "description": "Organic Milk", "quantity": 1, "amount": "4.99" },
    { "description": "Free-Range Eggs", "quantity": 1, "amount": "7.49" },
    { "description": "Sourdough Bread", "quantity": 1, "amount": "6.99" }
  ],
  "processing_time_ms": 2980
}

Every field is already named, typed, and normalized. Access receipt['merchant'] directly — no parsing required. Missing fields return null rather than being omitted, so you never need to check for key existence.

File Types

Supported formats

The API accepts JPG, PNG, and PDF files up to 10MB. It handles both digital receipts and scanned or photographed paper receipts — OCR is applied automatically when needed.

Batch Processing

Processing a folder of receipts

python · 23 lines

from pathlib import Path

def batch_parse(folder: str) -> list[dict]:
    api_key = os.environ["DOCUPARSE_API_KEY"]
    results = []

    for path in Path(folder).glob("*.jpg"):
        with open(path, "rb") as f:
            response = requests.post(
                "https://docuparseapi.com/api/v1/extract",
                headers={"Authorization": f"Bearer {api_key}"},
                files={"file": f},
                timeout=30,
            )
        data = response.json()
        results.append({"file": path.name, **data})
        print(f"{'✓' if data['success'] else '✗'} {path.name}")

    return results

receipts = batch_parse("./receipts/")
total_spend = sum(float(r.get("total") or 0) for r in receipts if r.get("success"))
print(f"\nTotal spend: ${total_spend:.2f}")

Also works with *.png and *.pdf — just add more glob patterns.

Ready to run this on your receipts?

20 documents/month — free forever · No credit card · API key in 60 seconds

Get Your Free API Key →

Async / FastAPI

Async version for FastAPI and asyncio

python · 14 lines

import httpx

async def parse_receipt_async(file_path: str) -> dict:
    async with httpx.AsyncClient(timeout=30) as client:
        with open(file_path, "rb") as f:
            response = await client.post(
                "https://docuparseapi.com/api/v1/extract",
                headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
                files={"file": (os.path.basename(file_path), f.read())},
            )
    data = response.json()
    if not data.get("success"):
        raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
    return data

Install httpx with pip install httpx if you don't have it.

Error Handling

Error handling

python · 16 lines

def parse_receipt_safe(file_path: str) -> dict | None:
    try:
        return parse_receipt(file_path)
    except Exception as e:
        msg = str(e)
        if "LIMIT_EXCEEDED" in msg:
            print("Monthly limit reached — upgrade at docuparseapi.com/pricing")
        elif "EXTRACTION_FAILED" in msg:
            print(f"Could not parse {file_path} — try a cleaner image")
        elif "UNSUPPORTED_FILE_TYPE" in msg:
            print("Use JPG, PNG, or PDF only")
        elif "FILE_TOO_LARGE" in msg:
            print("File exceeds 10MB — compress before sending")
        else:
            print(f"Unexpected error: {e}")
        return None

Database Storage

Storing results

python · 41 lines

import json
import sqlite3
from datetime import datetime

def store_receipt(conn: sqlite3.Connection, receipt: dict) -> int:
    conn.execute("""
        CREATE TABLE IF NOT EXISTS receipts (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            document_id TEXT UNIQUE,
            merchant TEXT,
            date TEXT,
            currency TEXT,
            total REAL,
            tax REAL,
            payment_method TEXT,
            line_items TEXT,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)
    cursor = conn.execute("""
        INSERT OR IGNORE INTO receipts
        (document_id, merchant, date, currency, total, tax, payment_method, line_items)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        receipt.get("document_id"),
        receipt.get("merchant"),
        receipt.get("date"),
        receipt.get("currency"),
        float(receipt.get("total") or 0),
        float(receipt.get("tax") or 0),
        receipt.get("payment_method"),
        json.dumps(receipt.get("line_items", [])),
    ))
    conn.commit()
    return cursor.lastrowid

conn = sqlite3.connect("receipts.db")
receipt = parse_receipt("receipt.jpg")
if receipt:
    row_id = store_receipt(conn, receipt)
    print(f"Stored receipt #{row_id}: {receipt['merchant']} — {receipt['total']}")

Use document_id as your deduplication key — it's unique per extraction, so storing it prevents double-processing the same receipt.