pip install requests xero-python
import os
import requests
DOCUPARSE_KEY = os.environ["DOCUPARSE_API_KEY"]
XERO_TOKEN = os.environ["XERO_ACCESS_TOKEN"]
XERO_TENANT_ID = os.environ["XERO_TENANT_ID"]
XERO_BASE = "https://api.xero.com/api.xro/2.0"
XERO_HEADERS = {
"Authorization": f"Bearer {XERO_TOKEN}",
"Xero-tenant-id": XERO_TENANT_ID,
"Content-Type": "application/json",
"Accept": "application/json",
}
# ── Step 1: Extract ────────────────────────────────────────────
def extract_invoice(file_path: str) -> dict:
"""Send PDF to DocuParseAPI, return structured JSON."""
with open(file_path, "rb") as f:
response = requests.post(
"https://docuparseapi.com/api/v1/extract",
headers={"Authorization": f"Bearer {DOCUPARSE_KEY}"},
files={"file": (os.path.basename(file_path), f)},
timeout=30,
)
data = response.json()
if not data.get("success"):
raise RuntimeError(f"Extraction failed [{data['error']['code']}]")
return data
# ── Step 2: Vendor matching ────────────────────────────────────
def find_contact(merchant_name: str) -> str | None:
"""Look up a Xero Contact by name. Returns ContactID or None."""
response = requests.get(
f"{XERO_BASE}/Contacts",
headers=XERO_HEADERS,
params={"searchTerm": merchant_name},
)
contacts = response.json().get("Contacts", [])
# Try exact match first
for c in contacts:
if c["Name"].lower() == merchant_name.lower():
return c["ContactID"]
# Fall back to partial match
if contacts:
return contacts[0]["ContactID"]
return None
def create_contact(merchant_name: str) -> str:
"""Create a new Xero Contact and return its ContactID."""
response = requests.post(
f"{XERO_BASE}/Contacts",
headers=XERO_HEADERS,
json={"Contacts": [{"Name": merchant_name}]},
)
response.raise_for_status()
return response.json()["Contacts"][0]["ContactID"]
def get_or_create_contact(merchant_name: str) -> str:
"""Find existing contact or create one."""
contact_id = find_contact(merchant_name)
if contact_id:
return contact_id
print(f" Creating new contact: {merchant_name}")
return create_contact(merchant_name)
# ── Step 3: Duplicate check ─────────────────────────────────────
def is_duplicate(invoice_number: str, contact_id: str) -> bool:
"""Check if a bill with this invoice number already exists for this contact."""
if not invoice_number:
return False
response = requests.get(
f"{XERO_BASE}/Invoices",
headers=XERO_HEADERS,
params={
"InvoiceNumbers": invoice_number,
"ContactIDs": contact_id,
"Type": "ACCPAY",
},
)
invoices = response.json().get("Invoices", [])
return len(invoices) > 0
# ── Step 4: Create Bill ─────────────────────────────────────────
XERO_ACCOUNT_CODE = os.environ.get("XERO_EXPENSE_ACCOUNT_CODE", "429")
def create_xero_bill(invoice_data: dict, contact_id: str) -> dict:
"""Create a supplier Bill (ACCPAY) in Xero."""
# Build line items from extracted data
if invoice_data.get("line_items"):
line_items = [
{
"Description": item.get("description", ""),
"Quantity": item.get("quantity", 1),
"UnitAmount": float(
item.get("unit_price") or item.get("amount") or 0
),
"AccountCode": XERO_ACCOUNT_CODE,
"TaxType": "INPUT",
}
for item in invoice_data["line_items"]
]
else:
# Single summary line if no line items extracted
line_items = [{
"Description": (
f"Invoice {invoice_data.get('invoice_id', '')} "
f"from {invoice_data.get('merchant', '')}"
).strip(),
"Quantity": 1,
"UnitAmount": float(
invoice_data.get("subtotal") or invoice_data.get("total") or 0
),
"AccountCode": XERO_ACCOUNT_CODE,
"TaxType": "INPUT",
}]
bill = {
"Type": "ACCPAY",
"Contact": {"ContactID": contact_id},
"Date": invoice_data.get("date"),
"DueDate": invoice_data.get("due_date"),
"InvoiceNumber": invoice_data.get("invoice_id"),
"CurrencyCode": invoice_data.get("currency", "USD"),
"Status": "DRAFT", # Change to "SUBMITTED" to bypass approval
"LineAmountTypes": "Exclusive", # Tax exclusive — adjust if needed
"LineItems": line_items,
}
response = requests.post(
f"{XERO_BASE}/Invoices",
headers=XERO_HEADERS,
json={"Invoices": [bill]},
)
response.raise_for_status()
result = response.json()["Invoices"][0]
# Check for Xero validation errors
if result.get("HasValidationErrors"):
errors = result.get("ValidationErrors", [])
raise RuntimeError(f"Xero validation error: {errors}")
return result
# ── Main workflow ───────────────────────────────────────────────
def process_invoice(file_path: str) -> dict:
"""
Full workflow: PDF → Xero Bill
Returns dict with status and Bill details.
"""
print(f"\nProcessing: {os.path.basename(file_path)}")
# 1. Extract
extracted = extract_invoice(file_path)
merchant = extracted.get("merchant") or "Unknown Vendor"
total = extracted.get("total", "?")
currency = extracted.get("currency", "")
print(f" Extracted: {merchant} — {currency} {total}")
# 2. Find or create contact
contact_id = get_or_create_contact(merchant)
# 3. Check for duplicate
if is_duplicate(extracted.get("invoice_id"), contact_id):
print(f" DUPLICATE: invoice {extracted.get('invoice_id')} already exists")
return {
"status": "duplicate",
"invoice_id": extracted.get("invoice_id"),
"merchant": merchant,
}
# 4. Create bill
bill = create_xero_bill(extracted, contact_id)
print(f" Created Bill: {bill['InvoiceID']} — Status: {bill['Status']}")
return {
"status": "created",
"xero_bill_id": bill["InvoiceID"],
"xero_invoice_number": bill.get("InvoiceNumber"),
"merchant": merchant,
"total": total,
"currency": currency,
"xero_status": bill["Status"],
}
# Process a single file
result = process_invoice("supplier_invoice.pdf")
print(result)
# Process all PDFs in a folder
from pathlib import Path
for pdf in Path("./invoices/").glob("*.pdf"):
try:
result = process_invoice(str(pdf))
except Exception as e:
print(f" ERROR: {pdf.name}: {e}")