← Blog · April 28, 2026 · 8 min read

Wrapping OpenAI Privacy Filter in a FastAPI endpoint

If you want to integrate PII redaction into your own backend — adding an internal API that your team or services can call without exposing a license key in client code — a thin FastAPI wrapper is the right pattern.

This tutorial builds a production-ready /redact endpoint with: async httpx calls, per-IP rate limiting, Bearer token auth, and a structured JSON response including character offsets.

Project structure

redact-service/
├── main.py
├── .env
└── requirements.txt

Requirements

# requirements.txt
fastapi
uvicorn[standard]
httpx
python-dotenv

The service

"""redact-service/main.py — thin FastAPI wrapper around PrivacyFilter API"""
import os
import time
import threading
from contextlib import asynccontextmanager

import httpx
from dotenv import load_dotenv
from fastapi import Depends, FastAPI, HTTPException, Request, Security
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from pydantic import BaseModel

load_dotenv()

PRIVACYFILTER_KEY = os.getenv("PRIVACYFILTER_LICENSE_KEY", "")
INTERNAL_API_TOKEN = os.getenv("INTERNAL_API_TOKEN", "change-me")
MAX_CHARS = int(os.getenv("MAX_CHARS", "10000"))
RATE_LIMIT_PER_MIN = int(os.getenv("RATE_LIMIT_PER_MIN", "60"))

# Per-IP rate limiter: (count, window_start)
_rl: dict[str, tuple[int, float]] = {}
_rl_lock = threading.Lock()

def check_rate_limit(ip: str):
    now = time.time()
    with _rl_lock:
        count, start = _rl.get(ip, (0, now))
        if now - start > 60:
            count, start = 0, now
        count += 1
        _rl[ip] = (count, start)
        if count > RATE_LIMIT_PER_MIN:
            raise HTTPException(429, detail="Rate limit exceeded")

# HTTP client — shared across requests
_http: httpx.AsyncClient | None = None

@asynccontextmanager
async def lifespan(app: FastAPI):
    global _http
    _http = httpx.AsyncClient(timeout=30)
    yield
    await _http.aclose()

app = FastAPI(title="Redact Service", lifespan=lifespan)
security = HTTPBearer()

def verify_token(creds: HTTPAuthorizationCredentials = Security(security)):
    if creds.credentials != INTERNAL_API_TOKEN:
        raise HTTPException(403, detail="Invalid token")

class RedactRequest(BaseModel):
    text: str
    mode: str = "replace"  # replace | mask | tag

class Entity(BaseModel):
    type: str
    original: str
    replacement: str
    start: int
    end: int

class RedactResponse(BaseModel):
    redacted_text: str
    entities: list[Entity]
    char_count: int

@app.post("/redact", response_model=RedactResponse, dependencies=[Depends(verify_token)])
async def redact(req: RedactRequest, request: Request):
    client_ip = request.headers.get("X-Forwarded-For", request.client.host)
    check_rate_limit(client_ip)

    if len(req.text) > MAX_CHARS:
        raise HTTPException(422, detail=f"Text exceeds {MAX_CHARS} characters")

    r = await _http.post(
        "https://privacyfilter.run/api/redact",
        json={
            "text": req.text,
            "license_key": PRIVACYFILTER_KEY,
            "mode": req.mode,
        },
    )
    if r.status_code == 429:
        raise HTTPException(429, detail="Upstream rate limit hit")
    r.raise_for_status()

    data = r.json()
    return RedactResponse(
        redacted_text=data["redacted_text"],
        entities=data["entities"],
        char_count=len(req.text),
    )

@app.get("/health")
async def health():
    return {"status": "ok"}

Environment file

# .env
PRIVACYFILTER_LICENSE_KEY=your-uuid-here
INTERNAL_API_TOKEN=a-long-random-secret-token
MAX_CHARS=10000
RATE_LIMIT_PER_MIN=60

Running the service

uvicorn main:app --host 0.0.0.0 --port 8080 --workers 2

Calling the service from another microservice

import httpx

REDACT_URL = "http://redact-service:8080"
REDACT_TOKEN = "a-long-random-secret-token"

def redact(text: str) -> tuple[str, list[dict]]:
    r = httpx.post(
        f"{REDACT_URL}/redact",
        headers={"Authorization": f"Bearer {REDACT_TOKEN}"},
        json={"text": text},
        timeout=20,
    ).raise_for_status().json()
    return r["redacted_text"], r["entities"]

clean, ents = redact("Hello Alice, your order is ready at 42 Main St.")
print(clean)  # → "Hello [PERSON_1], your order is ready at [ADDRESS_2]."

Docker deployment

FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY main.py .
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "2"]

# docker-compose.yml
services:
  redact:
    build: .
    env_file: .env
    ports:
      - "8080:8080"
    restart: unless-stopped

Adding batch support

To expose a batch endpoint, add a second route that fans out to /api/redact/batch on the upstream:

class BatchRequest(BaseModel):
    documents: list[dict]  # [{"id": "...", "text": "..."}]
    mode: str = "replace"

@app.post("/redact/batch", dependencies=[Depends(verify_token)])
async def redact_batch(req: BatchRequest, request: Request):
    client_ip = request.headers.get("X-Forwarded-For", request.client.host)
    check_rate_limit(client_ip)
    r = await _http.post(
        "https://privacyfilter.run/api/redact/batch",
        json={"documents": req.documents, "license_key": PRIVACYFILTER_KEY, "mode": req.mode},
    )
    r.raise_for_status()
    return r.json()

Get a license key at PrivacyFilter.run — $9 one-time for 50 redactions or $19/month unlimited.

See pricing →