Testing

Testing Guide¶

This guide covers testing practices for LDA development, including unit tests, integration tests, and testing guidelines for contributors.

Test Structure¶

LDA follows a standard test structure:

tests/
├── unit/              # Unit tests
│   ├── test_config.py
│   ├── test_core.py
│   ├── test_tracking.py
│   └── test_utils.py
├── integration/       # Integration tests
│   ├── test_cli.py
│   ├── test_workflows.py
│   └── test_database.py
├── fixtures/         # Test data
│   ├── configs/
│   ├── data/
│   └── projects/
├── conftest.py       # Test configuration
└── utils.py          # Test utilities

Running Tests¶

Quick Start¶

# Install test dependencies
pip install -e ".[test]"

# Run all tests
pytest

# Run with coverage
pytest --cov=lda --cov-report=html

# Run specific test file
pytest tests/unit/test_config.py

# Run specific test
pytest tests/unit/test_config.py::test_load_config

Test Categories¶

# Unit tests only
pytest tests/unit/

# Integration tests only
pytest tests/integration/

# Fast tests (no network/db)
pytest -m "not slow"

# Specific markers
pytest -m "tracking"
pytest -m "cli"

Writing Tests¶

Unit Tests¶

Example unit test:

# tests/unit/test_config.py
import pytest
from lda.config import LDAConfig
from pathlib import Path

class TestLDAConfig:
    """Test configuration functionality."""

    def test_load_config(self, tmp_path):
        """Test loading configuration from file."""
        # Create test config
        config_file = tmp_path / "test_config.yaml"
        config_file.write_text("""
        project:
          name: Test Project
          code: TEST001
        """)

        # Load config
        config = LDAConfig(str(config_file))

        # Assertions
        assert config.get("project.name") == "Test Project"
        assert config.get("project.code") == "TEST001"

    def test_missing_config(self):
        """Test handling missing config file."""
        with pytest.raises(FileNotFoundError):
            LDAConfig("nonexistent.yaml")

    @pytest.mark.parametrize("path,expected", [
        ("project.name", "Test"),
        ("project.missing", None),
        ("deep.nested.value", None),
    ])
    def test_get_path(self, config, path, expected):
        """Test getting values by path."""
        assert config.get(path) == expected

Integration Tests¶

# tests/integration/test_cli.py
import pytest
from click.testing import CliRunner
from lda.cli.main import cli

class TestCLIIntegration:
    """Test CLI commands integration."""

    @pytest.fixture
    def runner(self):
        """Create CLI test runner."""
        return CliRunner()

    def test_init_command(self, runner, tmp_path):
        """Test project initialization."""
        with runner.isolated_filesystem():
            result = runner.invoke(cli, [
                'init',
                '--name', 'Test Project',
                '--analyst', 'test.user'
            ])

            assert result.exit_code == 0
            assert Path("lda_config.yaml").exists()
            assert "Project initialized" in result.output

    def test_status_command(self, runner, sample_project):
        """Test status command with sample project."""
        result = runner.invoke(cli, ['status'], cwd=sample_project)

        assert result.exit_code == 0
        assert "Project Status" in result.output
        assert "Sections: 3" in result.output

Fixtures¶

Common test fixtures:

# tests/conftest.py
import pytest
from pathlib import Path
import tempfile
import shutil

@pytest.fixture
def tmp_project(tmp_path):
    """Create temporary project structure."""
    project_dir = tmp_path / "test_project"
    project_dir.mkdir()

    # Create basic structure
    (project_dir / "lda_config.yaml").write_text("""
    project:
      name: Test Project
      code: TEST001
    sections:
      - id: sec01
        name: Data
    """)

    (project_dir / "sec01").mkdir()
    (project_dir / "sec01" / "manifest.json").write_text("{}")

    yield project_dir

    # Cleanup is automatic with tmp_path

@pytest.fixture
def sample_data():
    """Provide sample data files."""
    data_dir = Path(__file__).parent / "fixtures" / "data"
    return {
        "csv": data_dir / "sample.csv",
        "json": data_dir / "sample.json",
        "large": data_dir / "large_file.dat"
    }

@pytest.fixture
def mock_config():
    """Mock configuration object."""
    from unittest.mock import Mock

    config = Mock()
    config.get.side_effect = lambda key, default=None: {
        "project.name": "Mock Project",
        "project.code": "MOCK001",
        "tracking.hash_algorithm": "sha256"
    }.get(key, default)

    return config

Testing Best Practices¶

1. Test Isolation¶

Each test should be independent:

class TestFileTracking:
    def test_track_file(self, tmp_path):
        """Test file tracking in isolation."""
        # Create test file
        test_file = tmp_path / "test.txt"
        test_file.write_text("test content")

        # Track file
        tracker = LDATracker()
        result = tracker.track_file(test_file)

        # Verify tracking
        assert result["hash"] is not None
        assert result["size"] == 12

        # Cleanup happens automatically

    @pytest.fixture(autouse=True)
    def cleanup(self):
        """Ensure clean state between tests."""
        yield
        # Reset any global state
        LDATracker._instances.clear()

2. Mocking External Dependencies¶

from unittest.mock import patch, Mock

class TestS3Integration:
    @patch('boto3.client')
    def test_s3_upload(self, mock_boto3):
        """Test S3 upload without real AWS calls."""
        # Mock S3 client
        mock_s3 = Mock()
        mock_boto3.return_value = mock_s3

        # Test upload
        s3_integration = S3Integration(config={
            "bucket": "test-bucket",
            "region": "us-east-1"
        })

        s3_integration.upload_file("test.txt")

        # Verify S3 was called correctly
        mock_s3.upload_file.assert_called_once_with(
            "test.txt",
            "test-bucket",
            "test.txt"
        )

3. Testing Error Conditions¶

class TestErrorHandling:
    def test_invalid_config(self):
        """Test handling of invalid configuration."""
        with pytest.raises(ConfigurationError) as exc_info:
            config = LDAConfig()
            config.set("project.name", None)  # Invalid
            config.validate()

        assert "Project name is required" in str(exc_info.value)

    def test_file_not_found(self):
        """Test handling missing files."""
        tracker = LDATracker()

        with pytest.raises(FileNotFoundError):
            tracker.track_file("nonexistent.txt")

    @pytest.mark.timeout(5)
    def test_timeout_handling(self):
        """Test operation timeouts."""
        with pytest.raises(TimeoutError):
            slow_operation(timeout=1)

4. Parametrized Tests¶

@pytest.mark.parametrize("algorithm,expected_length", [
    ("md5", 32),
    ("sha1", 40),
    ("sha256", 64),
    ("sha512", 128),
])
def test_hash_algorithms(tmp_path, algorithm, expected_length):
    """Test different hash algorithms."""
    test_file = tmp_path / "test.txt"
    test_file.write_text("test content")

    hasher = FileHasher(algorithm=algorithm)
    hash_value = hasher.calculate(test_file)

    assert len(hash_value) == expected_length
    assert all(c in "0123456789abcdef" for c in hash_value)

5. Performance Tests¶

import time
import pytest

class TestPerformance:
    @pytest.mark.performance
    def test_large_file_tracking(self, large_file):
        """Test tracking performance with large files."""
        tracker = LDATracker()

        start_time = time.time()
        tracker.track_file(large_file)
        duration = time.time() - start_time

        # Should complete within reasonable time
        assert duration < 5.0  # seconds

    @pytest.mark.benchmark
    def test_manifest_load_performance(self, benchmark):
        """Benchmark manifest loading."""
        manifest = LDAManifest("large_manifest.json")

        # Run benchmark
        result = benchmark(manifest.load)

        # Check performance metrics
        assert result.stats.median < 0.1  # seconds

Test Coverage¶

Coverage Requirements¶

Minimum coverage: 80%
Core modules: 90%+
New features: 100%

Running Coverage¶

# Generate coverage report
pytest --cov=lda --cov-report=html --cov-report=term

# Coverage for specific module
pytest --cov=lda.core tests/unit/test_core.py

# Exclude files from coverage
# .coveragerc
[run]
omit = 
    */tests/*
    */migrations/*
    */__init__.py

Coverage Reports¶

# View HTML report
open htmlcov/index.html

# Terminal report
pytest --cov=lda --cov-report=term-missing

# XML for CI
pytest --cov=lda --cov-report=xml

Continuous Integration¶

GitHub Actions¶

# .github/workflows/tests.yml
name: Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.8, 3.9, 3.10, 3.11]

    steps:
      - uses: actions/checkout@v3

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        run: |
          pip install -e ".[test]"

      - name: Run tests
        run: |
          pytest --cov=lda --cov-report=xml

      - name: Upload coverage
        uses: codecov/codecov-action@v3
        with:
          file: ./coverage.xml

Test Utilities¶

Helper Functions¶

# tests/utils.py
import json
from pathlib import Path

def create_test_config(config_dict, path):
    """Create test configuration file."""
    import yaml

    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    with open(path, 'w') as f:
        yaml.dump(config_dict, f)

    return path

def create_test_manifest(files, manifest_path):
    """Create test manifest."""
    manifest = {
        "version": "1.0",
        "files": files,
        "created": "2024-01-01T00:00:00Z"
    }

    with open(manifest_path, 'w') as f:
        json.dump(manifest, f, indent=2)

    return manifest_path

def assert_file_tracked(manifest, file_path):
    """Assert file is properly tracked."""
    assert file_path in manifest.files
    file_info = manifest.files[file_path]
    assert "hash" in file_info
    assert "size" in file_info
    assert "modified" in file_info

Testing Checklist¶

Before submitting a PR:

Debugging Tests¶

PyTest Options¶

# Verbose output
pytest -vv

# Show print statements
pytest -s

# Drop to debugger on failure
pytest --pdb

# Run specific test
pytest -k test_specific_function

# Show local variables
pytest -l

# Rerun failed tests
pytest --lf

Debug Fixtures¶

@pytest.fixture
def debug_tracker():
    """Tracker with debug logging enabled."""
    import logging

    logging.basicConfig(level=logging.DEBUG)
    tracker = LDATracker(debug=True)
    yield tracker
    logging.basicConfig(level=logging.WARNING)