Let’s build a TODO app environment from scratch. This hands-on tutorial will teach you the core concepts of MCP environment development.

What We’re Building

A simple TODO list that agents can interact with:
  • Add, complete, and delete tasks
  • List all tasks with their status
  • Evaluate based on task completion
This tutorial assumes basic Python knowledge. Full code is available at environments/todo-example

Step 1: Project Structure

Create a new directory for your environment:
mkdir todo-env
cd todo-env
Create the following structure:
todo-env/
├── Dockerfile
├── pyproject.toml
├── README.md
└── src/
    └── hud_controller/
        ├── __init__.py
        ├── server.py
        └── todo_app.py

Step 2: The TODO Application

First, let’s build the core TODO logic:
src/hud_controller/todo_app.py
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime

@dataclass
class TodoItem:
    id: int
    title: str
    completed: bool = False
    created_at: datetime = None
    
    def __post_init__(self):
        if self.created_at is None:
            self.created_at = datetime.now()

class TodoApp:
    def __init__(self):
        self.todos: List[TodoItem] = []
        self.next_id = 1
    
    def add_todo(self, title: str) -> TodoItem:
        """Add a new TODO item"""
        todo = TodoItem(id=self.next_id, title=title)
        self.todos.append(todo)
        self.next_id += 1
        return todo
    
    def complete_todo(self, todo_id: int) -> bool:
        """Mark a TODO as completed"""
        for todo in self.todos:
            if todo.id == todo_id:
                todo.completed = True
                return True
        return False
    
    def delete_todo(self, todo_id: int) -> bool:
        """Delete a TODO item"""
        for i, todo in enumerate(self.todos):
            if todo.id == todo_id:
                self.todos.pop(i)
                return True
        return False
    
    def list_todos(self) -> List[dict]:
        """Get all TODOs as dictionaries"""
        return [
            {
                "id": todo.id,
                "title": todo.title,
                "completed": todo.completed,
                "created_at": todo.created_at.isoformat()
            }
            for todo in self.todos
        ]
    
    def clear_all(self):
        """Reset the TODO list"""
        self.todos = []
        self.next_id = 1

Step 3: MCP Server Integration

Now wrap the TODO app with MCP:
src/hud_controller/server.py
import sys
import logging
from hud.server import MCPServer
from .todo_app import TodoApp

# Configure logging to stderr
logging.basicConfig(
    stream=sys.stderr,
    level=logging.INFO,
    format='[%(levelname)s] %(asctime)s | %(name)s | %(message)s'
)

# Create server and app instances
mcp = MCPServer(name="TODO Environment")
app = TodoApp()

# Interaction tools (visible to agents)
@mcp.tool()
async def add_todo(title: str) -> dict:
    """Add a new TODO item to the list"""
    todo = app.add_todo(title)
    return {
        "success": True,
        "todo": {
            "id": todo.id,
            "title": todo.title
        },
        "message": f"Added TODO: {title}"
    }

@mcp.tool()
async def complete_todo(todo_id: int) -> dict:
    """Mark a TODO item as completed"""
    success = app.complete_todo(todo_id)
    return {
        "success": success,
        "message": f"{'Completed' if success else 'Failed to complete'} TODO {todo_id}"
    }

@mcp.tool()
async def list_todos() -> dict:
    """List all TODO items"""
    todos = app.list_todos()
    return {
        "todos": todos,
        "total": len(todos),
        "completed": sum(1 for t in todos if t["completed"])
    }

@mcp.tool()
async def delete_todo(todo_id: int) -> dict:
    """Delete a TODO item"""
    success = app.delete_todo(todo_id)
    return {
        "success": success,
        "message": f"{'Deleted' if success else 'Failed to delete'} TODO {todo_id}"
    }

# Setup tool (hidden from agents)
@mcp.tool()
async def setup(initial_todos: list[str] = None) -> dict:
    """Initialize TODO list with optional items"""
    app.clear_all()
    
    if initial_todos:
        for title in initial_todos:
            app.add_todo(title)
    
    return {
        "status": "success",
        "message": f"Initialized with {len(initial_todos or [])} TODOs"
    }

# Evaluation tool (hidden from agents)
@mcp.tool()
async def evaluate(target_completed: int = None) -> dict:
    """Evaluate based on TODO completion"""
    todos = app.list_todos()
    total = len(todos)
    completed = sum(1 for t in todos if t["completed"])
    
    if target_completed is not None:
        # Specific target
        reward = 1.0 if completed >= target_completed else completed / target_completed
    else:
        # General completion rate
        reward = completed / total if total > 0 else 0.0
    
    return {
        "reward": reward,
        "done": reward >= 1.0,
        "info": {
            "completed": completed,
            "total": total,
            "details": f"{completed}/{total} TODOs completed"
        }
    }

# Initialize handler
@mcp.initialize
async def on_initialize(ctx):
    """Called when MCP connection is established"""
    logging.info("TODO environment ready!")

if __name__ == "__main__":
    mcp.run()

Step 4: Package Configuration

Create a pyproject.toml file:
pyproject.toml
[project]
name = "todo-env"
version = "0.1.0"
description = "TODO app MCP environment"
requires-python = ">=3.11"
dependencies = [
    "hud-python>=0.5.0",
]

[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

Step 5: Docker Configuration

Create the Dockerfile:
Dockerfile
FROM python:3.11-slim

# Prevent Python from buffering output
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

WORKDIR /app

# Copy package files
COPY pyproject.toml ./
COPY src/ ./src/

# Install in editable mode for development
RUN pip install --no-cache-dir -e .

# Run as a module
CMD ["python", "-m", "hud_controller.server"]

Step 6: Build and Test

Option 1: Traditional Build & Test

Build your Docker image:
docker build -t todo-env .
Test with HUD CLI:
# Run the 5-phase debug check
hud debug todo-env

# If that passes, analyze the tools
hud analyze todo-env
Use the development proxy for instant feedback:
# Start development server with hot-reload
hud dev --build

# In another terminal, test your environment
hud analyze hud-todo-env:dev
Now you can edit src/hud_controller/server.py and see changes instantly without rebuilding!
Your environment should pass all 5 debug phases and show the available tools

Step 7: Create Tasks

Now create tasks for agents to solve:
from hud import Task

tasks = [
    Task(
        prompt="Add three TODOs: buy groceries, call mom, finish report",
        mcp_config={
            "local": {
                "command": "docker",
                "args": ["run", "--rm", "-i", "todo-env:latest"]
            }
        },
        setup_tool={"name": "setup"},
        evaluate_tool={
            "name": "evaluate",
            "arguments": {"target_completed": 3}
        }
    ),
    Task(
        prompt="Complete all TODOs that contain the word 'urgent'",
        mcp_config={
            "local": {
                "command": "docker",
                "args": ["run", "--rm", "-i", "todo-env:latest"]
            }
        },
        setup_tool={
            "name": "setup",
            "arguments": {
                "initial_todos": [
                    "urgent: fix bug",
                    "buy coffee",
                    "urgent: submit report"
                ]
            }
        },
        evaluate_tool={
            "name": "evaluate",
            "arguments": {"target_completed": 2}
        }
    )
]

Step 8: Test with an Agent

Run your first evaluation:
import asyncio
import hud
from hud.agents import ClaudeAgent

async def test_todo_env():
    task = tasks[0]  # First task
    
    with hud.trace("todo-test"):
        agent = ClaudeAgent()
        result = await agent.run(task)
        
        print(f"Success: {result.reward == 1.0}")
        print(f"Details: {result.info}")

asyncio.run(test_todo_env())

Publishing Your Environment

Once tested, publish to Docker Hub:
# Tag your image
docker tag todo-env your-dockerhub/todo-env:v1.0

# Push to registry
docker push your-dockerhub/todo-env:v1.0

Using Your Environment Remotely

Others can now use your environment at scale:
import os
from hud import Task
from hud.agents import ClaudeAgent

# Remote MCP configuration
task = Task(
    prompt="Create 3 todos and complete the urgent ones",
    mcp_config={
        "hud": {
            "url": "https://mcp.hud.so/v3/mcp",
            "headers": {
                "Authorization": f"Bearer {os.getenv('HUD_API_KEY')}",
                "Mcp-Image": "your-dockerhub/todo-env:v1.0"
            }
        }
    },
    setup_tool={"name": "setup"},
    evaluate_tool={"name": "evaluate", "arguments": {"target_completed": 2}}
)

# Run with any agent
agent = ClaudeAgent()
result = await agent.run(task)

Next Steps

This tutorial covered the basics. For production environments, explore:
The official environments README contains comprehensive implementation details, troubleshooting tips, and reference examples.