import asyncio
from hud import Task, gym, run_job
from hud.types import CustomGym
from hud.agent.base import Agent # Import base Agent
from hud.adapters.common.types import ResponseAction # For submitting text
from hud.utils.common import Observation # For type hinting
from pathlib import Path
# For Langchain LLM
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the CustomGym for our file organizer environment
file_organizer_gym = CustomGym(
location="local",
image_or_build_context=Path("./environments/file_organizer_env")
)
# Define a task for this environment
create_project_task = Task(
prompt=(
"Create a project structure: a 'docs' directory with a README.md inside it containing "
"'Project Documentation', and a 'src' directory with a 'main.py' file inside it containing "
"'print(\"Hello Project!\")'. All files and directories should be inside /workspace/task_data."
),
gym=file_organizer_gym,
setup=("setup_file_task", "File organization area prepared."),
evaluate=(
"evaluate_file_structure",
{
"directories": ["docs", "src"],
"files": {
"docs/README.md": "Project Documentation",
"src/main.py": "print(\"Hello Project!\")"
}
}
)
)
# Agent that uses Langchain OpenAI to generate shell commands
class FileSystemAgentLLM(Agent[Any, Any]): # Generic types for client and raw action
def __init__(self, llm_model_name: str = "gpt-3.5-turbo"):
super().__init__(client=None, adapter=None) # No HUD client/adapter needed for this agent's core logic
self.llm = ChatOpenAI(model=llm_model_name, temperature=0)
self.prompt_template = ChatPromptTemplate.from_messages([
("system", "You are an expert at generating shell commands for Linux to manage files and directories. Given a natural language instruction, provide a sequence of shell commands (one command per line) to achieve the objective. The working directory will be /workspace/task_data. Only output the shell commands."),
("human", "Instruction: {instruction}")
])
self.parser = StrOutputParser()
self.chain = self.prompt_template | self.llm | self.parser
self.env_client = None # To be set if we want to execute commands
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
instruction = observation.text # The task prompt
if not instruction:
return [{"type": "response", "text": "No instruction provided."}], True
print(f"Agent received instruction: {instruction}")
shell_commands_str = await self.chain.ainvoke({"instruction": instruction})
print(f"LLM generated shell commands:\n{shell_commands_str}")
# For this example, we return the commands as text in a ResponseAction.
# In a full system, these would be parsed and executed.
# We can also define a custom action type like {"type": "shell_commands", "commands": shell_commands_list}
# For now, we'll just make the agent respond with the commands it would run.
# The task is considered "done" by the agent after generating commands.
return [{"type": "response", "text": f"# Shell commands to execute:\n{shell_commands_str}"}], True
async def main():
env = await gym.make(create_project_task)
# Initialize the LLM-based agent
# Make sure OPENAI_API_KEY is set in your environment for ChatOpenAI
try:
agent = FileSystemAgentLLM(llm_model_name="gpt-4o") # or "gpt-3.5-turbo"
except Exception as e:
print(f"Failed to initialize FileSystemAgentLLM (check OpenAI API key): {e}")
await env.close()
return
# This agent doesn't directly use env.client for this example, but a real one might.
# If you had env.client.execute() and wanted the agent to use it:
# agent.env_client = env.client
print("Resetting environment (runs setup)...")
obs, _ = await env.reset()
print(f"Initial observation: {obs}") # Will contain output from setup_file_task
print("Agent predicting actions...")
# The agent's predict method (via fetch_response) will call the LLM
# It returns a ResponseAction containing the shell commands as text.
actions, done = await agent.predict(obs)
# For this example, we are not executing the shell commands returned by the agent.
# We will manually create the files to test the evaluation logic.
# In a real scenario, you would parse `actions` if it contained structured commands,
# or if it's a ResponseAction as here, you might have an orchestrator that takes
# the text, identifies commands, and executes them via `env.client.execute()`.
if actions and isinstance(actions[0], dict) and actions[0].get("type") == "response":
print(f"Agent proposed actions (shell commands as text):\n{actions[0].get('text')}")
# --- Manually create expected files for testing the evaluate function ---
# This simulates the SUCCESSFUL execution of the LLM-generated commands.
# This part is for verifying `evaluate_file_structure`.
# In a real end-to-end test of the AGENT, you'd remove this manual creation
# and ensure your agent + execution mechanism correctly creates the files.
print("\nManually creating files to simulate successful agent execution for evaluation testing...")
base_dir = "/workspace/task_data" # This path is INSIDE the Docker container
# We can't directly os.makedirs on the host. We'd use env.client.execute if available.
# For testing the example, we assume these would be executed by the agent.
# If YourClientWithExecute and env.client.execute were available:
# await env.client.execute(["mkdir", "-p", os.path.join(base_dir, "docs")])
# await env.client.execute(["mkdir", "-p", os.path.join(base_dir, "src")])
# await env.client.execute(["sh", "-c", f"echo 'Project Documentation' > {os.path.join(base_dir, 'docs/README.md')}"])
# await env.client.execute(["sh", "-c", f"echo 'print(\"Hello Project!\")' > {os.path.join(base_dir, 'src/main.py')}"])
print("(Skipping actual file creation in this example script - would be done by agent + env.client.execute)")
print("To make this test pass, you'd need to manually create these files inside the running Docker container, ")
print(f"or modify the test to mock the results of env.client.execute() if testing the agent's command generation.")
# To actually test evaluate_file_structure, one would need to ensure files are created in the container.
# For a self-contained doc example, it's hard to show file creation inside the container easily.
# We will mock the file creation for the sake of this example to show evaluation passing.
# MOCKING: To make the `evaluate_file_structure` pass for this example, you might need to mock os.path.isdir and open
# if running this script directly without a mechanism to execute commands in the Docker container.
# This is beyond the scope of a simple documentation example for now.
# We will assume the evaluation might fail here unless files are manually created in the container
# or the evaluation step is also mocked.
print("\nEvaluating task...")
# The evaluate function will run INSIDE the Docker container via the controller.
# If the files were not created (e.g. by the agent via env.client.execute()), it will likely return 0.0.
result = await env.evaluate()
print(f"Evaluation Result: {result}")
await env.close()
# if __name__ == "__main__":
# # Ensure OPENAI_API_KEY is set in your .env file or environment variables
# asyncio.run(main())