BlogReceipt OCR with Python — Extract Receipt Data in 10 Lines

Receipt OCR with Python — Extract Receipt Data in 10 Lines

2026-05-30 · 4 min read

10
lines of code
~3s
avg. extract time
Free
20 docs/month
Any
JPG, PNG, PDF

Here's the minimum viable receipt OCR implementation in Python. Get your API key at docuparseapi.com/signup — free, no credit card — and run this:

python
import os
import requests

def parse_receipt(file_path: str) -> dict:
    with open(file_path, "rb") as f:
        response = requests.post(
            "https://docuparseapi.com/api/v1/extract",
            headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
            files={"file": f},
        )
    data = response.json()
    if not data["success"]:
        raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
    return data

receipt = parse_receipt("receipt.jpg")

print(f"Store:    {receipt['merchant']}")
print(f"Date:     {receipt['date']}")
print(f"Total:    {receipt['currency']} {receipt['total']}")
print(f"Tax:      {receipt['tax']}")
print(f"Method:   {receipt['payment_method']}")

for item in receipt.get("line_items", []):
    print(f"  {item['description']}: {item['amount']}")
API Response

What the response looks like

json response⚡ extracted in ~3s
merchant"Whole Foods Market"Store name
total"67.43"Final amount
tax"5.43"Tax charged
date"2026-05-14"ISO date
currency"USD"ISO 4217
payment_method"Visa Card"Card type
line_items[...]Item array
✓ Every field named and normalized. Access receipt['merchant'] directly — no parsing required.
json
{
  "success": true,
  "document_type": "receipt",
  "merchant": "Whole Foods Market",
  "date": "2026-05-14",
  "total": "67.43",
  "subtotal": "62.00",
  "tax": "5.43",
  "tax_rate": "8.76%",
  "currency": "USD",
  "receipt_id": "R-0042-7721",
  "payment_method": "Visa Card",
  "line_items": [
    { "description": "Organic Milk", "quantity": 1, "amount": "4.99" },
    { "description": "Free-Range Eggs", "quantity": 1, "amount": "7.49" },
    { "description": "Sourdough Bread", "quantity": 1, "amount": "6.99" }
  ],
  "processing_time_ms": 2980
}

Every field is already named, typed, and normalized. Access receipt['merchant'] directly — no parsing required. Missing fields return null rather than being omitted, so you never need to check for key existence.

File Types

Supported formats

The API accepts JPG, PNG, and PDF files up to 10MB. It handles both digital receipts and scanned or photographed paper receipts — OCR is applied automatically when needed.

Batch Processing

Processing a folder of receipts

python
from pathlib import Path

def batch_parse(folder: str) -> list[dict]:
    api_key = os.environ["DOCUPARSE_API_KEY"]
    results = []

    for path in Path(folder).glob("*.jpg"):
        with open(path, "rb") as f:
            response = requests.post(
                "https://docuparseapi.com/api/v1/extract",
                headers={"Authorization": f"Bearer {api_key}"},
                files={"file": f},
                timeout=30,
            )
        data = response.json()
        results.append({"file": path.name, **data})
        print(f"{'✓' if data['success'] else '✗'} {path.name}")

    return results

receipts = batch_parse("./receipts/")
total_spend = sum(float(r.get("total") or 0) for r in receipts if r.get("success"))
print(f"\nTotal spend: ${total_spend:.2f}")

Also works with *.png and *.pdf — just add more glob patterns.

Ready to run this on your receipts?
20 documents free every month · No credit card · API key in 60 seconds
Get Your Free API Key →
Async / FastAPI

Async version for FastAPI and asyncio

python
import httpx

async def parse_receipt_async(file_path: str) -> dict:
    async with httpx.AsyncClient(timeout=30) as client:
        with open(file_path, "rb") as f:
            response = await client.post(
                "https://docuparseapi.com/api/v1/extract",
                headers={"Authorization": f"Bearer {os.environ['DOCUPARSE_API_KEY']}"},
                files={"file": (os.path.basename(file_path), f.read())},
            )
    data = response.json()
    if not data.get("success"):
        raise RuntimeError(f"[{data['error']['code']}] {data['error']['message']}")
    return data

Install httpx with pip install httpx if you don't have it.

Error Handling

Error handling

python
def parse_receipt_safe(file_path: str) -> dict | None:
    try:
        return parse_receipt(file_path)
    except Exception as e:
        msg = str(e)
        if "LIMIT_EXCEEDED" in msg:
            print("Monthly limit reached — upgrade at docuparseapi.com/pricing")
        elif "EXTRACTION_FAILED" in msg:
            print(f"Could not parse {file_path} — try a cleaner image")
        elif "UNSUPPORTED_FILE_TYPE" in msg:
            print("Use JPG, PNG, or PDF only")
        elif "FILE_TOO_LARGE" in msg:
            print("File exceeds 10MB — compress before sending")
        else:
            print(f"Unexpected error: {e}")
        return None
Database Storage

Storing results

python
import json
import sqlite3
from datetime import datetime

def store_receipt(conn: sqlite3.Connection, receipt: dict) -> int:
    conn.execute("""
        CREATE TABLE IF NOT EXISTS receipts (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            document_id TEXT UNIQUE,
            merchant TEXT,
            date TEXT,
            currency TEXT,
            total REAL,
            tax REAL,
            payment_method TEXT,
            line_items TEXT,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)
    cursor = conn.execute("""
        INSERT OR IGNORE INTO receipts
        (document_id, merchant, date, currency, total, tax, payment_method, line_items)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        receipt.get("document_id"),
        receipt.get("merchant"),
        receipt.get("date"),
        receipt.get("currency"),
        float(receipt.get("total") or 0),
        float(receipt.get("tax") or 0),
        receipt.get("payment_method"),
        json.dumps(receipt.get("line_items", [])),
    ))
    conn.commit()
    return cursor.lastrowid

conn = sqlite3.connect("receipts.db")
receipt = parse_receipt("receipt.jpg")
if receipt:
    row_id = store_receipt(conn, receipt)
    print(f"Stored receipt #{row_id}: {receipt['merchant']} — {receipt['total']}")

Ready to start parsing documents?

More from the blog