Improve agent performance with reinforcement learning
import verifiers as vf # Load environment env = vf.load_environment( env_id="hud-vf-gym", taskset="hud-evals/2048-taskset", config_path="configs/2048.yaml" ) # Train with GRPO model, tokenizer = vf.get_model_and_tokenizer("Qwen/Qwen2.5-3B-Instruct") trainer = vf.GRPOTrainer( model=model, env=env, args=vf.grpo_defaults(), peft_config=vf.lora_defaults() # LoRA for efficiency ) trainer.train()
Was this page helpful?