Database Testing Patterns¶
This guide covers patterns for testing database-related code with pytest-test-categories, from small tests using fakes to medium tests with real databases.
The Testing Pyramid for Databases¶
Database testing follows the same pyramid as all testing:
Test Size |
Database Approach |
Use Case |
|---|---|---|
Small (80%) |
In-memory fakes, mocks |
Business logic, repository contracts |
Medium (15%) |
SQLite in-memory, containers |
SQL queries, migrations, ORM behavior |
Large (5%) |
Staging database |
Full integration, performance |
Small Tests: Repository Fakes¶
The most effective way to test database-related code is to abstract the database behind a repository interface and use fakes in tests.
Define the Repository Interface¶
# src/repositories.py
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
@dataclass
class User:
id: int | None
name: str
email: str
created_at: datetime | None = None
class UserRepository(ABC):
"""Abstract repository for user persistence."""
@abstractmethod
def get_by_id(self, user_id: int) -> User | None:
"""Find user by ID."""
@abstractmethod
def get_by_email(self, email: str) -> User | None:
"""Find user by email."""
@abstractmethod
def save(self, user: User) -> User:
"""Save user and return with assigned ID."""
@abstractmethod
def delete(self, user_id: int) -> bool:
"""Delete user. Returns True if deleted."""
@abstractmethod
def list_all(self, limit: int = 100, offset: int = 0) -> list[User]:
"""List users with pagination."""
Create an In-Memory Fake¶
# tests/fakes/fake_user_repository.py
from datetime import datetime
from repositories import User, UserRepository
class FakeUserRepository(UserRepository):
"""In-memory fake for testing without a database."""
def __init__(self):
self._users: dict[int, User] = {}
self._next_id = 1
def get_by_id(self, user_id: int) -> User | None:
return self._users.get(user_id)
def get_by_email(self, email: str) -> User | None:
for user in self._users.values():
if user.email == email:
return user
return None
def save(self, user: User) -> User:
if user.id is None:
user = User(
id=self._next_id,
name=user.name,
email=user.email,
created_at=datetime.now(),
)
self._next_id += 1
self._users[user.id] = user
return user
def delete(self, user_id: int) -> bool:
if user_id in self._users:
del self._users[user_id]
return True
return False
def list_all(self, limit: int = 100, offset: int = 0) -> list[User]:
users = list(self._users.values())
return users[offset : offset + limit]
# Test helpers (not part of interface)
def add_users(self, *users: User) -> None:
"""Bulk add users for test setup."""
for user in users:
self.save(user)
def clear(self) -> None:
"""Reset repository state."""
self._users.clear()
self._next_id = 1
Test with the Fake¶
import pytest
from datetime import datetime
from repositories import User
from fakes.fake_user_repository import FakeUserRepository
@pytest.fixture
def user_repository():
"""Provide a fresh fake repository for each test."""
return FakeUserRepository()
@pytest.mark.small
class DescribeUserRepository:
"""Tests for user repository behavior."""
def test_saves_new_user_with_generated_id(self, user_repository):
user = User(id=None, name="Alice", email="alice@example.com")
saved = user_repository.save(user)
assert saved.id is not None
assert saved.id > 0
assert saved.name == "Alice"
def test_get_by_id_returns_saved_user(self, user_repository):
user = User(id=None, name="Bob", email="bob@example.com")
saved = user_repository.save(user)
retrieved = user_repository.get_by_id(saved.id)
assert retrieved is not None
assert retrieved.name == "Bob"
def test_get_by_id_returns_none_for_unknown(self, user_repository):
result = user_repository.get_by_id(999)
assert result is None
def test_get_by_email_finds_user(self, user_repository):
user = User(id=None, name="Charlie", email="charlie@example.com")
user_repository.save(user)
found = user_repository.get_by_email("charlie@example.com")
assert found is not None
assert found.name == "Charlie"
def test_delete_removes_user(self, user_repository):
user = User(id=None, name="Dave", email="dave@example.com")
saved = user_repository.save(user)
deleted = user_repository.delete(saved.id)
assert deleted is True
assert user_repository.get_by_id(saved.id) is None
def test_delete_returns_false_for_unknown(self, user_repository):
result = user_repository.delete(999)
assert result is False
def test_list_all_returns_all_users(self, user_repository):
user_repository.add_users(
User(id=None, name="User1", email="user1@example.com"),
User(id=None, name="User2", email="user2@example.com"),
User(id=None, name="User3", email="user3@example.com"),
)
users = user_repository.list_all()
assert len(users) == 3
def test_list_all_respects_pagination(self, user_repository):
for i in range(10):
user_repository.save(User(id=None, name=f"User{i}", email=f"user{i}@example.com"))
page1 = user_repository.list_all(limit=3, offset=0)
page2 = user_repository.list_all(limit=3, offset=3)
assert len(page1) == 3
assert len(page2) == 3
assert page1[0].id != page2[0].id
Test Services Using the Repository¶
# src/user_service.py
from repositories import User, UserRepository
class UserService:
"""Business logic for user operations."""
def __init__(self, repository: UserRepository):
self._repository = repository
def register_user(self, name: str, email: str) -> User:
"""Register a new user, checking for duplicates."""
existing = self._repository.get_by_email(email)
if existing:
raise ValueError(f"Email already registered: {email}")
user = User(id=None, name=name, email=email)
return self._repository.save(user)
def get_user_profile(self, user_id: int) -> dict:
"""Get user profile or raise if not found."""
user = self._repository.get_by_id(user_id)
if not user:
raise ValueError(f"User not found: {user_id}")
return {
"id": user.id,
"name": user.name,
"email": user.email,
"member_since": user.created_at.isoformat() if user.created_at else None,
}
# tests/test_user_service.py
import pytest
from user_service import UserService
from fakes.fake_user_repository import FakeUserRepository
@pytest.fixture
def user_service():
"""Provide user service with fake repository."""
repository = FakeUserRepository()
return UserService(repository)
@pytest.mark.small
class DescribeUserService:
"""Tests for user service business logic."""
def test_registers_new_user(self, user_service):
user = user_service.register_user("Alice", "alice@example.com")
assert user.id is not None
assert user.name == "Alice"
def test_rejects_duplicate_email(self, user_service):
user_service.register_user("Alice", "alice@example.com")
with pytest.raises(ValueError, match="Email already registered"):
user_service.register_user("Bob", "alice@example.com")
def test_gets_user_profile(self, user_service):
user = user_service.register_user("Charlie", "charlie@example.com")
profile = user_service.get_user_profile(user.id)
assert profile["name"] == "Charlie"
assert profile["email"] == "charlie@example.com"
def test_raises_for_unknown_user(self, user_service):
with pytest.raises(ValueError, match="User not found"):
user_service.get_user_profile(999)
Small Tests: SQLite In-Memory¶
For testing actual SQL queries without a real database server, use SQLite in-memory mode.
import pytest
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
@pytest.fixture
def sqlite_engine():
"""Create in-memory SQLite engine."""
engine = create_engine("sqlite:///:memory:")
yield engine
engine.dispose()
@pytest.fixture
def sqlite_session(sqlite_engine):
"""Create session with initialized schema."""
# Initialize schema
with sqlite_engine.connect() as conn:
conn.execute(text("""
CREATE TABLE users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
email TEXT UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""))
conn.commit()
Session = sessionmaker(bind=sqlite_engine)
session = Session()
yield session
session.close()
@pytest.mark.small
def test_inserts_user(sqlite_session):
"""Test SQL insert with in-memory SQLite."""
sqlite_session.execute(
text("INSERT INTO users (name, email) VALUES (:name, :email)"),
{"name": "Alice", "email": "alice@example.com"},
)
sqlite_session.commit()
result = sqlite_session.execute(text("SELECT name FROM users WHERE email = :email"), {"email": "alice@example.com"})
name = result.scalar()
assert name == "Alice"
@pytest.mark.small
def test_enforces_unique_email(sqlite_session):
"""Test unique constraint with in-memory SQLite."""
from sqlalchemy.exc import IntegrityError
sqlite_session.execute(
text("INSERT INTO users (name, email) VALUES (:name, :email)"),
{"name": "Alice", "email": "alice@example.com"},
)
sqlite_session.commit()
with pytest.raises(IntegrityError):
sqlite_session.execute(
text("INSERT INTO users (name, email) VALUES (:name, :email)"),
{"name": "Bob", "email": "alice@example.com"},
)
sqlite_session.commit()
SQLite Limitations¶
SQLite differs from PostgreSQL/MySQL in several ways:
No
SERIALtype (useINTEGER PRIMARY KEY AUTOINCREMENT)Limited constraint enforcement
No stored procedures
Different date/time handling
For PostgreSQL-specific features, use medium tests with containers.
Medium Tests: pytest-postgresql¶
pytest-postgresql provides fixtures for PostgreSQL testing.
Installation¶
pip install pytest-postgresql psycopg2-binary
# or
uv add --dev pytest-postgresql psycopg2-binary
Basic Usage¶
import pytest
@pytest.mark.medium
def test_with_postgresql(postgresql):
"""Test with real PostgreSQL using pytest-postgresql."""
cursor = postgresql.cursor()
cursor.execute("""
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10, 2)
)
""")
cursor.execute(
"INSERT INTO products (name, price) VALUES (%s, %s) RETURNING id",
("Widget", 19.99),
)
product_id = cursor.fetchone()[0]
postgresql.commit()
cursor.execute("SELECT name, price FROM products WHERE id = %s", (product_id,))
name, price = cursor.fetchone()
assert name == "Widget"
assert float(price) == 19.99
With SQLAlchemy ORM¶
import pytest
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.orm import declarative_base, sessionmaker
Base = declarative_base()
class UserModel(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False)
email = Column(String(255), unique=True, nullable=False)
@pytest.fixture
def sqlalchemy_session(postgresql):
"""Create SQLAlchemy session connected to test PostgreSQL."""
connection_string = (
f"postgresql://{postgresql.info.user}:"
f"@{postgresql.info.host}:{postgresql.info.port}"
f"/{postgresql.info.dbname}"
)
engine = create_engine(connection_string)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
yield session
session.close()
engine.dispose()
@pytest.mark.medium
def test_orm_operations(sqlalchemy_session):
"""Test SQLAlchemy ORM with real PostgreSQL."""
user = UserModel(name="Alice", email="alice@example.com")
sqlalchemy_session.add(user)
sqlalchemy_session.commit()
retrieved = sqlalchemy_session.query(UserModel).filter_by(email="alice@example.com").first()
assert retrieved is not None
assert retrieved.name == "Alice"
assert retrieved.id is not None
Medium Tests: Testcontainers¶
For more control over database containers, use testcontainers (see Container Testing).
import pytest
from testcontainers.postgres import PostgresContainer
from sqlalchemy import create_engine, text
@pytest.fixture(scope="module")
def postgres_container():
"""Start PostgreSQL container for the test module."""
with PostgresContainer("postgres:16-alpine") as postgres:
yield postgres
@pytest.fixture
def db_engine(postgres_container):
"""Create engine connected to container."""
engine = create_engine(postgres_container.get_connection_url())
yield engine
engine.dispose()
@pytest.mark.medium
def test_postgres_specific_features(db_engine):
"""Test PostgreSQL-specific features."""
with db_engine.connect() as conn:
# Test JSONB (PostgreSQL-specific)
conn.execute(text("""
CREATE TABLE IF NOT EXISTS events (
id SERIAL PRIMARY KEY,
data JSONB NOT NULL
)
"""))
conn.execute(
text("INSERT INTO events (data) VALUES (:data)"),
{"data": '{"type": "click", "button": "submit"}'},
)
conn.commit()
# Query JSONB
result = conn.execute(text("SELECT data->>'type' FROM events"))
event_type = result.scalar()
assert event_type == "click"
Fixture Patterns¶
Transaction Rollback Pattern¶
Use transactions to isolate tests without recreating tables:
@pytest.fixture(scope="module")
def db_engine(postgres_container):
"""Engine shared across module."""
engine = create_engine(postgres_container.get_connection_url())
# Create schema once
with engine.connect() as conn:
conn.execute(text("""
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
name VARCHAR(255),
email VARCHAR(255) UNIQUE
)
"""))
conn.commit()
yield engine
engine.dispose()
@pytest.fixture
def db_session(db_engine):
"""Session with automatic rollback."""
connection = db_engine.connect()
transaction = connection.begin()
Session = sessionmaker(bind=connection)
session = Session()
yield session
session.close()
transaction.rollback() # Undo all changes
connection.close()
@pytest.mark.medium
def test_creates_user(db_session):
"""Changes are rolled back after test."""
db_session.execute(
text("INSERT INTO users (name, email) VALUES (:name, :email)"),
{"name": "Test", "email": "test@example.com"},
)
# This data won't persist to other tests
@pytest.mark.medium
def test_table_is_empty(db_session):
"""Previous test's data was rolled back."""
result = db_session.execute(text("SELECT COUNT(*) FROM users"))
count = result.scalar()
assert count == 0
Factory Fixtures¶
@pytest.fixture
def user_factory(db_session):
"""Factory for creating test users."""
created_ids = []
def create(name="Test User", email=None):
if email is None:
email = f"{name.lower().replace(' ', '.')}@example.com"
result = db_session.execute(
text("INSERT INTO users (name, email) VALUES (:name, :email) RETURNING id"),
{"name": name, "email": email},
)
user_id = result.scalar()
db_session.commit()
created_ids.append(user_id)
return user_id
yield create
# Cleanup (if not using transaction rollback)
for user_id in created_ids:
db_session.execute(text("DELETE FROM users WHERE id = :id"), {"id": user_id})
db_session.commit()
@pytest.mark.medium
def test_with_multiple_users(user_factory, db_session):
"""Create users via factory."""
alice_id = user_factory("Alice")
bob_id = user_factory("Bob")
result = db_session.execute(text("SELECT COUNT(*) FROM users"))
count = result.scalar()
assert count == 2
Alembic Migration Testing¶
Test database migrations with Alembic:
import pytest
from alembic import command
from alembic.config import Config
from sqlalchemy import create_engine, inspect
@pytest.fixture(scope="module")
def alembic_config(postgres_container):
"""Configure Alembic for test database."""
config = Config("alembic.ini")
config.set_main_option("sqlalchemy.url", postgres_container.get_connection_url())
return config
@pytest.mark.medium
def test_migrations_apply_cleanly(alembic_config, postgres_container):
"""Test that all migrations apply without errors."""
# Apply all migrations
command.upgrade(alembic_config, "head")
# Verify expected tables exist
engine = create_engine(postgres_container.get_connection_url())
inspector = inspect(engine)
tables = inspector.get_table_names()
assert "users" in tables
assert "orders" in tables
assert "alembic_version" in tables
@pytest.mark.medium
def test_migrations_are_reversible(alembic_config, postgres_container):
"""Test that migrations can be rolled back."""
# Apply all
command.upgrade(alembic_config, "head")
# Roll back all
command.downgrade(alembic_config, "base")
# Verify tables are gone
engine = create_engine(postgres_container.get_connection_url())
inspector = inspect(engine)
tables = inspector.get_table_names()
assert "users" not in tables
assert "orders" not in tables
When to Use Each Approach¶
Approach |
Test Size |
Use When |
|---|---|---|
Fake repositories |
Small |
Testing business logic, service layers |
SQLite in-memory |
Small |
Testing basic SQL, ORM mappings |
pytest-postgresql |
Medium |
Testing PostgreSQL-specific features |
Testcontainers |
Medium |
Testing with specific database versions |
Staging database |
Large |
Full integration testing |
Decision Guide¶
Can you test without SQL? Use fake repositories (small)
Testing basic SQL/ORM? Use SQLite in-memory (small)
Need PostgreSQL features? Use containers (medium)
Testing production parity? Use staging database (large)
Best Practices¶
1. Prefer Fakes Over Mocks¶
Fakes implement the real interface; mocks just record calls. Fakes catch more bugs:
# Good: Fake implements real behavior
class FakeUserRepository(UserRepository):
def get_by_email(self, email: str) -> User | None:
for user in self._users.values():
if user.email == email:
return user
return None
# Less good: Mock just returns configured value
mock_repo = mocker.Mock()
mock_repo.get_by_email.return_value = User(...)
2. Test Repository Contracts¶
Ensure fakes and real implementations behave identically:
class RepositoryContractTests:
"""Contract tests that both fake and real implementations must pass."""
@pytest.fixture
def repository(self):
raise NotImplementedError
def test_save_assigns_id(self, repository):
user = User(id=None, name="Test", email="test@example.com")
saved = repository.save(user)
assert saved.id is not None
def test_get_returns_saved_user(self, repository):
user = User(id=None, name="Test", email="test@example.com")
saved = repository.save(user)
retrieved = repository.get_by_id(saved.id)
assert retrieved.name == saved.name
class TestFakeRepository(RepositoryContractTests):
@pytest.fixture
def repository(self):
return FakeUserRepository()
@pytest.mark.medium
class TestRealRepository(RepositoryContractTests):
@pytest.fixture
def repository(self, postgres_container):
return PostgresUserRepository(postgres_container.get_connection_url())
3. Isolate Test Data¶
Each test should create its own data, not rely on pre-existing data:
# Good: Test creates its own data
def test_finds_user_by_email(user_repository):
user = user_repository.save(User(id=None, name="Alice", email="alice@example.com"))
found = user_repository.get_by_email("alice@example.com")
assert found.id == user.id
# Bad: Test relies on data from other tests
def test_finds_existing_user(user_repository):
# Assumes "alice@example.com" exists from another test
found = user_repository.get_by_email("alice@example.com")
assert found is not None