软件测试和质量保证是确保AI代理系统可靠性和稳定性的关键环节。对于像OpenHands这样的复杂AI系统,测试尤其具有挑战性,因为需要验证系统在各种条件下的行为一致性。本笔记将探讨AI系统测试的独特策略,分析OpenHands的测试需求,并通过实践为关键模块构建测试套件。
测试类型 | 传统软件 | AI系统调整 |
---|---|---|
单元测试 | 测试独立组件 | 测试AI组件接口和边界条件 |
集成测试 | 测试组件间交互 | 测试AI组件与其他系统的交互 |
系统测试 | 测试整个系统 | 测试完整AI流水线和决策链 |
端到端测试 | 测试用户场景 | 测试真实用户交互和完整任务 |
确定性测试:
概率性测试:
对抗性测试:
功能性测试:
非功能性测试:
基于README_CN.md和项目性质,我们可以推断OpenHands需要以下测试策略:
AI代理能力测试:
系统集成测试:
安全性测试:
性能与可靠性测试:
测试技术栈:
项目测试结构:
tests/
├── unit/ # 单元测试
│ ├── services/ # 服务层测试
│ ├── utils/ # 工具函数测试
│ └── controllers/ # 控制器测试
├── integration/ # 集成测试
│ ├── api/ # API端点测试
│ └── services/ # 服务集成测试
├── e2e/ # 端到端测试
│ ├── scenarios/ # 用例场景测试
│ └── workflows/ # 工作流程测试
├── fixtures/ # 测试数据和固定资源
│ ├── responses/ # 模拟API响应
│ ├── commands/ # 命令执行样本
│ └── prompts/ # 提示模板样本
└── helpers/ # 测试辅助函数
├── mocks.js # 模拟对象
├── stubs.js # 存根函数
└── utils.js # 测试工具函数
Jest配置 (jest.config.js):
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
roots: ['/src' , '/tests' ],
testMatch: [
'**/tests/unit/**/*.test.ts',
'**/tests/integration/**/*.test.ts'
],
collectCoverageFrom: [
'src/**/*.ts',
'!src/**/*.d.ts',
'!**/node_modules/**'
],
coveragePathIgnorePatterns: [
'/node_modules/',
'/tests/',
'/dist/'
],
coverageThreshold: {
global: {
branches: 70,
functions: 80,
lines: 80,
statements: 80
}
},
setupFilesAfterEnv: ['/tests/helpers/setup.ts' ],
moduleNameMapper: {
'^@/(.*)$': '/src/$1'
}
};
LLM服务模拟 (tests/helpers/mockLLM.ts):
// tests/helpers/mockLLM.ts
import nock from 'nock';
import path from 'path';
import fs from 'fs';
export interface MockLLMOptions {
statusCode?: number;
delay?: number;
responseFile?: string;
customResponse?: any;
error?: boolean;
}
/**
* 模拟LLM API响应
*/
export function mockLLMResponse(options: MockLLMOptions = {}) {
const {
statusCode = 200,
delay = 0,
responseFile,
customResponse,
error = false
} = options;
let responseBody: any;
if (customResponse) {
responseBody = customResponse;
} else if (responseFile) {
const filePath = path.join(__dirname, '../fixtures/responses', responseFile);
responseBody = JSON.parse(fs.readFileSync(filePath, 'utf8'));
} else {
// 默认响应
responseBody = {
id: 'mock-response-123',
object: 'chat.completion',
created: Date.now(),
model: 'mock-model',
content: [
{
type: 'text',
text: '这是一个模拟的LLM响应。'
}
]
};
}
const scope = nock('https://api.anthropic.com')
.post('/v1/messages')
.delay(delay);
if (error) {
return scope.replyWithError({
message: 'LLM API error',
code: 'ENOTFOUND'
});
}
return scope.reply(statusCode, responseBody, {
'Content-Type': 'application/json'
});
}
/**
* 模拟带有工具调用的LLM响应
*/
export function mockLLMToolCallResponse(toolName: string, toolArgs: any, textResponse?: string) {
return nock('https://api.anthropic.com')
.post('/v1/messages')
.reply(200, {
id: 'mock-tool-call-123',
object: 'chat.completion',
created: Date.now(),
model: 'mock-model',
content: [
{
type: 'text',
text: textResponse || '我需要执行一个工具调用'
},
{
type: 'tool_call',
tool_call: {
name: toolName,
arguments: JSON.stringify(toolArgs)
}
}
]
});
}
/**
* 清除所有模拟
*/
export function clearLLMMocks() {
nock.cleanAll();
}
命令执行模拟 (tests/helpers/mockCommand.ts):
// tests/helpers/mockCommand.ts
import { exec } from 'child_process';
import * as sinon from 'sinon';
interface MockCommandOptions {
stdout?: string;
stderr?: string;
exitCode?: number;
delay?: number;
}
/**
* 模拟命令执行
*/
export function mockCommandExecution(command: string, options: MockCommandOptions = {}) {
const {
stdout = '',
stderr = '',
exitCode = 0,
delay = 0
} = options;
const execStub = sinon.stub(exec);
// 创建模拟的子进程对象
const childProcess = {
stdout: {
on: sinon.stub().callsFake((event, callback) => {
if (event === 'data' && stdout) {
setTimeout(() => callback(stdout), delay);
}
return childProcess.stdout;
})
},
stderr: {
on: sinon.stub().callsFake((event, callback) => {
if (event === 'data' && stderr) {
setTimeout(() => callback(stderr), delay);
}
return childProcess.stderr;
})
},
on: sinon.stub().callsFake((event, callback) => {
if (event === 'close') {
setTimeout(() => callback(exitCode), delay);
}
return childProcess;
})
};
// 配置存根根据命令返回不同结果
execStub.withArgs(command).returns(childProcess as any);
return execStub;
}
/**
* 清除命令模拟
*/
export function clearCommandMocks() {
sinon.restore();
}
工具服务测试 (tests/unit/services/toolService.test.ts):
// tests/unit/services/toolService.test.ts
import { expect } from 'chai';
import * as sinon from 'sinon';
import toolService from '../../../src/services/toolService';
import { mockCommandExecution, clearCommandMocks } from '../../helpers/mockCommand';
describe('Tool Service', () => {
afterEach(() => {
clearCommandMocks();
sinon.restore();
});
describe('executeTool', () => {
it('应该成功执行命令执行工具', async () => {
// 安排
const toolName = 'execute_command';
const toolArgs = { command: 'echo "hello world"' };
const expectedOutput = 'hello world\n';
mockCommandExecution('echo "hello world"', {
stdout: expectedOutput,
exitCode: 0
});
// 执行
const result = await toolService.executeTool(toolName, toolArgs, {
sessionId: 'test-session'
});
// 断言
expect(result).to.have.property('stdout', expectedOutput);
expect(result).to.have.property('success', true);
});
it('应该处理命令执行错误', async () => {
// 安排
const toolName = 'execute_command';
const toolArgs = { command: 'invalid_command' };
const expectedError = 'command not found: invalid_command';
mockCommandExecution('invalid_command', {
stderr: expectedError,
exitCode: 127
});
// 执行
const result = await toolService.executeTool(toolName, toolArgs, {
sessionId: 'test-session'
});
// 断言
expect(result).to.have.property('stderr').that.includes(expectedError);
expect(result).to.have.property('success', false);
expect(result).to.have.property('code', 127);
});
it('应该验证工具参数', async () => {
// 安排
const toolName = 'execute_command';
const toolArgs = {}; // 缺少必要的command参数
try {
// 执行
await toolService.executeTool(toolName, toolArgs, {
sessionId: 'test-session'
});
// 如果没有抛出错误,测试应该失败
expect.fail('应该抛出参数验证错误');
} catch (error) {
// 断言
expect(error.message).to.include('command is required');
}
});
it('应该拒绝执行危险命令', async () => {
// 安排
const toolName = 'execute_command';
const toolArgs = { command: 'rm -rf /' };
try {
// 执行
await toolService.executeTool(toolName, toolArgs, {
sessionId: 'test-session'
});
// 如果没有抛出错误,测试应该失败
expect.fail('应该拒绝危险命令');
} catch (error) {
// 断言
expect(error.message).to.include('危险命令');
}
});
});
describe('getAvailableTools', () => {
it('应该返回可用工具列表', async () => {
// 执行
const tools = toolService.getAvailableTools();
// 断言
expect(tools).to.be.an('array');
expect(tools.length).to.be.greaterThan(0);
expect(tools[0]).to.have.property('name');
expect(tools[0]).to.have.property('description');
});
});
});
LLM服务测试 (tests/unit/services/llmService.test.ts):
// tests/unit/services/llmService.test.ts
import { expect } from 'chai';
import * as sinon from 'sinon';
import llmService from '../../../src/services/llmService';
import { mockLLMResponse, mockLLMToolCallResponse, clearLLMMocks } from '../../helpers/mockLLM';
describe('LLM Service', () => {
afterEach(() => {
clearLLMMocks();
sinon.restore();
});
describe('completeChat', () => {
it('应该成功获取LLM响应', async () => {
// 安排
const input = '你好,请生成一个简单的Python函数';
const expectedResponse = '```python\ndef hello_world():\n print("Hello, World!")\n```';
mockLLMResponse({
customResponse: {
id: 'test-response-123',
content: [{ type: 'text', text: expectedResponse }]
}
});
// 执行
const result = await llmService.completeChat(
'claude',
input,
{ systemPrompt: '你是一个编程助手' },
{}
);
// 断言
expect(result).to.have.property('content', expectedResponse);
expect(result).to.have.property('id', 'test-response-123');
});
it('应该处理LLM错误响应', async () => {
// 安排
const input = '生成代码';
mockLLMResponse({
statusCode: 429,
customResponse: {
error: {
type: 'rate_limit_exceeded',
message: 'Rate limit exceeded'
}
}
});
try {
// 执行
await llmService.completeChat(
'claude',
input,
{ systemPrompt: '你是一个编程助手' },
{}
);
// 如果没有抛出错误,测试应该失败
expect.fail('应该抛出API错误');
} catch (error) {
// 断言
expect(error.message).to.include('Rate limit');
}
});
it('应该正确处理工具调用响应', async () => {
// 安排
const input = '列出当前目录文件';
const toolName = 'execute_command';
const toolArgs = { command: 'ls -la' };
mockLLMToolCallResponse(toolName, toolArgs, '我将帮你列出文件');
// 执行
const result = await llmService.completeChat(
'claude',
input,
{
systemPrompt: '你是一个助手',
availableTools: [
{ name: 'execute_command', description: '执行命令' }
]
},
{}
);
// 断言
expect(result).to.have.property('toolCalls').that.is.an('array');
expect(result.toolCalls[0]).to.have.property('name', toolName);
expect(result.toolCalls[0]).to.have.property('arguments');
expect(JSON.parse(result.toolCalls[0].arguments)).to.deep.equal(toolArgs);
});
});
describe('getDefaultProvider', () => {
it('应该返回默认LLM提供商', () => {
// 执行
const provider = llmService.getDefaultProvider();
// 断言
expect(provider).to.be.a('string');
expect(provider).to.equal('claude'); // 假设默认是Claude
});
});
});
代理服务集成测试 (tests/integration/services/agentService.test.ts):
// tests/integration/services/agentService.test.ts
import { expect } from 'chai';
import * as sinon from 'sinon';
import agentService from '../../../src/services/agentService';
import llmService from '../../../src/services/llmService';
import toolService from '../../../src/services/toolService';
import { mockLLMResponse, mockLLMToolCallResponse, clearLLMMocks } from '../../helpers/mockLLM';
import { mockCommandExecution, clearCommandMocks } from '../../helpers/mockCommand';
describe('Agent Service Integration', () => {
let sessionId: string;
before(async () => {
// 初始化服务
await agentService.initialize();
});
beforeEach(async () => {
// 创建新会话
sessionId = await agentService.createSession({
llmProvider: 'claude',
systemPrompt: '你是OpenHands AI开发助手',
useSandbox: false // 测试中禁用沙箱
});
});
afterEach(() => {
clearLLMMocks();
clearCommandMocks();
sinon.restore();
});
after(async () => {
// 清理会话
await agentService.shutdown();
});
describe('processInput', () => {
it('应该处理文本响应', async () => {
// 安排
const userInput = '你好,请介绍自己';
const expectedResponse = '我是OpenHands AI开发助手,可以帮助你编写代码、运行命令、解决问题。';
mockLLMResponse({
customResponse: {
content: [{ type: 'text', text: expectedResponse }]
}
});
// 执行
const result = await agentService.processInput(sessionId, userInput);
// 断言
expect(result).to.have.property('type', 'text');
expect(result).to.have.property('content', expectedResponse);
expect(result).to.have.property('sessionId', sessionId);
// 验证状态更新
const sessionState = agentService.getSessionState(sessionId);
expect(sessionState).to.have.property('state', 'idle');
});
it('应该处理工具调用并执行命令', async () => {
// 安排
const userInput = '列出当前目录文件';
const toolName = 'execute_command';
const toolArgs = { command: 'ls -la' };
const commandOutput = 'total 20\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 .\n-rw-r--r-- 1 user user 123 Jan 1 12:00 file.txt\n';
// 模拟LLM响应工具调用
mockLLMToolCallResponse(toolName, toolArgs, '我将帮你列出文件');
// 模拟命令执行
mockCommandExecution('ls -la', {
stdout: commandOutput,
exitCode: 0
});
// 模拟LLM处理命令结果的响应
mockLLMResponse({
customResponse: {
content: [{ type: 'text', text: '当前目录包含一个名为file.txt的文件。' }]
}
});
// 执行
const result = await agentService.processInput(sessionId, userInput);
// 断言
expect(result).to.have.property('type', 'tool_execution');
expect(result).to.have.property('toolResults').that.is.an('array');
expect(result.toolResults[0]).to.have.property('tool', toolName);
expect(result.toolResults[0]).to.have.property('status', 'success');
expect(result.toolResults[0].result).to.have.property('stdout', commandOutput);
// 验证状态更新
const sessionState = agentService.getSessionState(sessionId);
expect(sessionState).to.have.property('state', 'idle');
});
it('应该处理工具执行错误', async () => {
// 安排
const userInput = '运行不存在的命令';
const toolName = 'execute_command';
const toolArgs = { command: 'nonexistent_command' };
const commandError = 'command not found: nonexistent_command';
// 模拟LLM响应工具调用
mockLLMToolCallResponse(toolName, toolArgs);
// 模拟命令执行错误
mockCommandExecution('nonexistent_command', {
stderr: commandError,
exitCode: 127
});
// 模拟LLM处理错误的响应
mockLLMResponse({
customResponse: {
content: [{ type: 'text', text: '命令执行失败,该命令不存在。' }]
}
});
// 执行
const result = await agentService.processInput(sessionId, userInput);
// 断言
expect(result).to.have.property('type', 'tool_execution');
expect(result.toolResults[0]).to.have.property('tool', toolName);
expect(result.toolResults[0]).to.have.property('status', 'error');
expect(result.toolResults[0].error).to.include(commandError);
});
});
describe('executeCommand', () => {
it('应该直接执行命令并返回结果', async () => {
// 安排
const command = 'echo "test command"';
const commandOutput = 'test command\n';
// 模拟命令执行
mockCommandExecution(command, {
stdout: commandOutput,
exitCode: 0
});
// 执行
const result = await agentService.executeCommand(sessionId, command);
// 断言
expect(result).to.have.property('type', 'command_execution');
expect(result).to.have.property('command', command);
expect(result.result).to.have.property('stdout', commandOutput);
expect(result.result).to.have.property('success', true);
});
});
});
API端点测试 (tests/integration/api/chatRoutes.test.ts):
// tests/integration/api/chatRoutes.test.ts
import request from 'supertest';
import { expect } from 'chai';
import * as sinon from 'sinon';
import app from '../../../src/app';
import agentService from '../../../src/services/agentService';
import { mockLLMResponse, clearLLMMocks } from '../../helpers/mockLLM';
describe('Chat API Routes', () => {
before(async () => {
// 确保服务已初始化
await agentService.initialize();
});
afterEach(() => {
clearLLMMocks();
sinon.restore();
});
after(async () => {
await agentService.shutdown();
});
describe('POST /api/chat', () => {
it('应该成功处理聊天请求', async () => {
// 安排
const message = '你好,请介绍自己';
const expectedResponse = '我是OpenHands AI开发助手';
// 模拟LLM响应
mockLLMResponse({
customResponse: {
content: [{ type: 'text', text: expectedResponse }]
}
});
// 执行
const response = await request(app)
.post('/api/chat')
.send({ message })
.expect(200);
// 断言
expect(response.body).to.have.property('type', 'text');
expect(response.body).to.have.property('content').that.includes(expectedResponse);
expect(response.body).to.have.property('sessionId').that.is.a('string');
});
it('应该在带有会话ID的情况下继续对话', async () => {
// 安排 - 创建会话
const createSessionRes = await request(app)
.post('/api/sessions')
.expect(201);
const sessionId = createSessionRes.body.id;
// 模拟LLM响应
mockLLMResponse({
customResponse: {
content: [{ type: 'text', text: '我记得你之前问过问题' }]
}
});
// 执行
const response = await request(app)
.post('/api/chat')
.send({
message: '你还记得我吗?',
sessionId
})
.expect(200);
// 断言
expect(response.body).to.have.property('sessionId', sessionId);
expect(response.body.content).to.include('记得');
});
it('应该处理无效的请求', async () => {
// 执行 - 缺少消息
const response = await request(app)
.post('/api/chat')
.send({})
.expect(400);
// 断言
expect(response.body).to.have.property('error').that.includes('message is required');
});
});
describe('POST /api/execute', () => {
it('应该执行命令并返回结果', async () => {
// 安排 - 创建会话
const createSessionRes = await request(app)
.post('/api/sessions')
.expect(201);
const sessionId = createSessionRes.body.id;
// 模拟命令执行
sinon.stub(agentService, 'executeCommand').resolves({
type: 'command_execution',
command: 'echo "test"',
result: {
stdout: 'test\n',
stderr: '',
success: true
},
sessionId
});
// 执行
const response = await request(app)
.post('/api/execute')
.send({
command: 'echo "test"',
sessionId
})
.expect(200);
// 断言
expect(response.body).to.have.property('type', 'command_execution');
expect(response.body.result).to.have.property('stdout', 'test\n');
expect(response.body.result).to.have.property('success', true);
});
});
});
场景测试 (tests/e2e/scenarios/codeGeneration.test.ts):
// tests/e2e/scenarios/codeGeneration.test.ts
import { expect } from 'chai';
import fs from 'fs/promises';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
import { TestAgent } from '../helpers/TestAgent';
import { setupE2ETest, cleanupE2ETest } from '../helpers/e2eSetup';
describe('代码生成场景 E2E测试', () => {
let agent: TestAgent;
let workDir: string;
before(async () => {
// 设置测试环境
const setup = await setupE2ETest();
agent = setup.agent;
workDir = setup.workDir;
});
after(async () => {
// 清理测试环境
await cleanupE2ETest(agent, workDir);
});
it('应该生成Python函数并验证其功能', async function() {
this.timeout(30000); // 增加超时时间
// 步骤1: 请求生成一个计算斐波那契数列的Python函数
const response1 = await agent.sendMessage(
'请生成一个计算斐波那契数列的Python函数,并将其保存到fibonacci.py文件中'
);
// 验证响应包含了Python代码
expect(response1.content).to.include('def fibonacci');
// 步骤2: 验证文件是否已创建
const fileExists = await agent.fileExists('fibonacci.py');
expect(fileExists).to.be.true;
// 步骤3: 读取文件内容
const fileContent = await agent.readFile('fibonacci.py');
expect(fileContent).to.include('def fibonacci');
// 步骤4: 要求测试该函数
const response2 = await agent.sendMessage(
'请编写一个测试脚本测试这个斐波那契函数,对于n=10应该返回55'
);
expect(response2.content).to.include('test');
// 步骤5: 验证测试脚本是否创建
const testFileExists = await agent.fileExists('test_fibonacci.py');
expect(testFileExists).to.be.true;
// 步骤6: 执行测试脚本
const execResult = await agent.executeCommand('python test_fibonacci.py');
// 验证测试通过
expect(execResult.stdout).to.include('PASS') || expect(execResult.stdout).to.not.include('FAIL');
});
it('应该能够修复代码中的错误', async function() {
this.timeout(30000);
// 步骤1: 创建一个包含错误的Python文件
const buggyCode = `
def calculate_average(numbers):
total = 0
for num in numbers:
total += num
# 错误: 除以长度而不是检查空列表
return total / len(numbers)
# 这将导致除零错误
result = calculate_average([])
print(f"The average is {result}")
`;
await agent.writeFile('buggy.py', buggyCode);
// 步骤2: 请求修复代码
const response = await agent.sendMessage(
'这个Python脚本(buggy.py)在尝试计算空列表的平均值时会出错。请修复它,确保它能处理空列表的情况。'
);
// 验证响应中提到了错误和解决方案
expect(response.content).to.include('除零') ||
expect(response.content).to.include('空列表');
// 步骤3: 验证文件是否被修改
const fixedCode = await agent.readFile('buggy.py');
expect(fixedCode).to.include('if len(numbers) == 0') ||
expect(fixedCode).to.include('if not numbers');
// 步骤4: 执行修复后的脚本
const execResult = await agent.executeCommand('python buggy.py');
// 验证执行成功,没有错误
expect(execResult.stderr).to.be.empty;
});
});
工作流测试 (tests/e2e/workflows/projectCreation.test.ts):
// tests/e2e/workflows/projectCreation.test.ts
import { expect } from 'chai';
import fs from 'fs/promises';
import path from 'path';
import { TestAgent } from '../helpers/TestAgent';
import { setupE2ETest, cleanupE2ETest } from '../helpers/e2eSetup';
describe('项目创建工作流 E2E测试', () => {
let agent: TestAgent;
let workDir: string;
before(async () => {
// 设置测试环境
const setup = await setupE2ETest();
agent = setup.agent;
workDir = setup.workDir;
});
after(async () => {
// 清理测试环境
await cleanupE2ETest(agent, workDir);
});
it('应该能够创建完整的Web应用项目', async function() {
this.timeout(120000); //
// 步骤1: 请求创建一个简单的Todo应用
const response1 = await agent.sendMessage(
'请创建一个简单的Todo应用,使用React前端和Express后端。应该包含添加、删除和标记完成的功能。'
);
// 验证响应包含项目创建计划
expect(response1.content).to.include('React') &&
expect(response1.content).to.include('Express');
// 步骤2: 等待项目结构创建
await agent.waitForFileCreation('package.json', 60000);
// 步骤3: 验证关键文件是否存在
const files = [
'package.json',
'src/App.js',
'src/components',
'server.js'
];
for (const file of files) {
const exists = await agent.fileExists(file);
expect(exists, `文件 ${file} 应该存在`).to.be.true;
}
// 步骤4: 验证package.json内容
const packageJson = JSON.parse(await agent.readFile('package.json'));
expect(packageJson.dependencies).to.have.property('react');
expect(packageJson.dependencies).to.have.property('express');
// 步骤5: 请求添加单元测试
const response2 = await agent.sendMessage(
'请为Todo组件添加Jest单元测试'
);
// 验证测试文件创建
await agent.waitForFileCreation('src/components/__tests__', 30000);
// 步骤6: 验证测试文件内容
const testFiles = await agent.listFiles('src/components/__tests__');
expect(testFiles.length).to.be.greaterThan(0);
// 步骤7: 验证README文件
const readmeExists = await agent.fileExists('README.md');
expect(readmeExists).to.be.true;
const readme = await agent.readFile('README.md');
expect(readme).to.include('Todo');
expect(readme).to.include('安装');
expect(readme).to.include('使用');
});
});
测试代理类 (tests/e2e/helpers/TestAgent.ts):
// tests/e2e/helpers/TestAgent.ts
import fs from 'fs/promises';
import path from 'path';
import { exec } from 'child_process';
import { promisify } from 'util';
import axios from 'axios';
const execAsync = promisify(exec);
/**
* 测试代理类
* 封装与OpenHands代理交互的方法
*/
export class TestAgent {
private baseUrl: string;
private sessionId: string | null = null;
private workDir: string;
/**
* 构造函数
* @param baseUrl API基础URL
* @param workDir 工作目录
*/
constructor(baseUrl: string, workDir: string) {
this.baseUrl = baseUrl;
this.workDir = workDir;
}
/**
* 初始化代理会话
*/
async initialize(): Promise<string> {
try {
const response = await axios.post(`${this.baseUrl}/api/sessions`, {
systemPrompt: '你是OpenHands AI开发助手,一个专业的软件开发代理。'
});
this.sessionId = response.data.id;
return this.sessionId;
} catch (error) {
throw new Error(`初始化代理会话失败: ${error.message}`);
}
}
/**
* 发送消息给代理
* @param message 消息内容
*/
async sendMessage(message: string): Promise<any> {
if (!this.sessionId) {
await this.initialize();
}
try {
const response = await axios.post(`${this.baseUrl}/api/chat`, {
message,
sessionId: this.sessionId
});
return response.data;
} catch (error) {
throw new Error(`发送消息失败: ${error.message}`);
}
}
/**
* 执行命令
* @param command 要执行的命令
*/
async executeCommand(command: string): Promise<{stdout: string, stderr: string}> {
if (!this.sessionId) {
await this.initialize();
}
try {
const response = await axios.post(`${this.baseUrl}/api/execute`, {
command,
sessionId: this.sessionId
});
return {
stdout: response.data.result.stdout || '',
stderr: response.data.result.stderr || ''
};
} catch (error) {
throw new Error(`执行命令失败: ${error.message}`);
}
}
/**
* 检查文件是否存在
* @param filePath 文件路径
*/
async fileExists(filePath: string): Promise<boolean> {
const fullPath = path.join(this.workDir, filePath);
try {
await fs.access(fullPath);
return true;
} catch {
return false;
}
}
/**
* 读取文件内容
* @param filePath 文件路径
*/
async readFile(filePath: string): Promise<string> {
const fullPath = path.join(this.workDir, filePath);
try {
return await fs.readFile(fullPath, 'utf8');
} catch (error) {
throw new Error(`读取文件失败: ${error.message}`);
}
}
/**
* 写入文件
* @param filePath 文件路径
* @param content 文件内容
*/
async writeFile(filePath: string, content: string): Promise<void> {
const fullPath = path.join(this.workDir, filePath);
try {
// 确保目录存在
await fs.mkdir(path.dirname(fullPath), { recursive: true });
await fs.writeFile(fullPath, content, 'utf8');
} catch (error) {
throw new Error(`写入文件失败: ${error.message}`);
}
}
/**
* 列出目录中的文件
* @param dirPath 目录路径
*/
async listFiles(dirPath: string): Promise<string[]> {
const fullPath = path.join(this.workDir, dirPath);
try {
return await fs.readdir(fullPath);
} catch (error) {
throw new Error(`列出文件失败: ${error.message}`);
}
}
/**
* 等待文件创建
* @param filePath 文件路径
* @param timeout 超时时间(毫秒)
*/
async waitForFileCreation(filePath: string, timeout: number = 10000): Promise<boolean> {
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
if (await this.fileExists(filePath)) {
return true;
}
// 等待100毫秒
await new Promise(resolve => setTimeout(resolve, 100));
}
return false;
}
/**
* 关闭代理会话
*/
async close(): Promise<void> {
if (this.sessionId) {
try {
await axios.delete(`${this.baseUrl}/api/sessions/${this.sessionId}`);
} catch (error) {
console.error(`关闭代理会话失败: ${error.message}`);
}
}
}
}
E2E测试设置助手 (tests/e2e/helpers/e2eSetup.ts):
// tests/e2e/helpers/e2eSetup.ts
import fs from 'fs/promises';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
import { TestAgent } from './TestAgent';
import { startServer, stopServer } from './serverControl';
/**
* 设置E2E测试环境
*/
export async function setupE2ETest() {
// 创建临时工作目录
const testId = uuidv4();
const workDir = path.join(process.cwd(), 'tmp', `test-${testId}`);
try {
await fs.mkdir(workDir, { recursive: true });
// 启动测试服务器
const serverInfo = await startServer();
// 创建测试代理
const agent = new TestAgent(serverInfo.url, workDir);
await agent.initialize();
return {
agent,
workDir,
serverInfo
};
} catch (error) {
// 清理已创建的资源
try {
await fs.rmdir(workDir, { recursive: true });
} catch (cleanupError) {
console.error('清理工作目录失败:', cleanupError);
}
throw error;
}
}
/**
* 清理E2E测试环境
*/
export async function cleanupE2ETest(agent: TestAgent, workDir: string) {
// 关闭代理会话
await agent.close();
// 停止服务器
await stopServer();
// 删除临时工作目录
try {
await fs.rmdir(workDir, { recursive: true });
} catch (error) {
console.error('清理工作目录失败:', error);
}
}
服务器控制助手 (tests/e2e/helpers/serverControl.ts):
// tests/e2e/helpers/serverControl.ts
import { spawn, ChildProcess } from 'child_process';
import axios from 'axios';
import { waitForPort } from './networkUtils';
let serverProcess: ChildProcess | null = null;
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3001;
const SERVER_URL = `http://localhost:${SERVER_PORT}`;
/**
* 启动测试服务器
*/
export async function startServer() {
if (serverProcess) {
return { url: SERVER_URL, port: SERVER_PORT };
}
return new Promise<{ url: string, port: number }>((resolve, reject) => {
// 启动服务器进程
serverProcess = spawn('node', ['src/server.js'], {
env: {
...process.env,
NODE_ENV: 'test',
PORT: SERVER_PORT.toString(),
LOG_LEVEL: 'error',
SANDBOX_ENABLED: 'false' // 测试中禁用沙箱
},
stdio: 'pipe'
});
// 捕获错误
serverProcess.on('error', (err) => {
reject(new Error(`启动服务器失败: ${err.message}`));
});
// 捕获标准输出
serverProcess.stdout?.on('data', (data) => {
const output = data.toString();
console.log(`[Server]: ${output}`);
// 检测服务器启动成功
if (output.includes('Server running on port')) {
// 等待端口可用
waitForPort(SERVER_PORT, 5000)
.then(() => resolve({ url: SERVER_URL, port: SERVER_PORT }))
.catch(reject);
}
});
// 捕获标准错误
serverProcess.stderr?.on('data', (data) => {
console.error(`[Server Error]: ${data.toString()}`);
});
// 设置超时
setTimeout(() => {
if (serverProcess) {
reject(new Error('启动服务器超时'));
}
}, 10000);
});
}
/**
* 停止测试服务器
*/
export async function stopServer() {
if (!serverProcess) {
return;
}
return new Promise<void>((resolve) => {
// 优雅关闭
serverProcess?.kill('SIGTERM');
// 设置超时强制关闭
const forceKillTimeout = setTimeout(() => {
if (serverProcess) {
console.warn('强制关闭服务器进程');
serverProcess.kill('SIGKILL');
}
}, 5000);
// 监听进程退出
serverProcess?.on('exit', () => {
clearTimeout(forceKillTimeout);
serverProcess = null;
resolve();
});
});
}
/**
* 检查服务器健康状态
*/
export async function checkServerHealth(): Promise<boolean> {
try {
const response = await axios.get(`${SERVER_URL}/api/health`);
return response.status === 200;
} catch {
return false;
}
}
在为OpenHands这样的AI代理系统设计测试策略时,应考虑以下关键点:
分层测试策略:
测试数据管理:
模拟与存根策略:
测试覆盖目标:
持续集成实践:
AI系统测试面临的主要挑战之一是输出的不确定性。以下策略可以帮助管理这种不确定性:
输出验证策略:
统计测试方法:
模糊测试:
回归测试库:
将测试自动化纳入开发流程对于维护OpenHands的质量至关重要:
# .github/workflows/test.yml
name: Test Suite
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Node.js
uses: actions/setup-node@v2
with:
node-version: '16'
- name: Install dependencies
run: npm ci
- name: Run linting
run: npm run lint
- name: Run unit tests
run: npm run test:unit
- name: Run integration tests
run: npm run test:integration
- name: Upload coverage
uses: codecov/codecov-action@v2
with:
file: ./coverage/lcov.info
e2e-tests:
runs-on: ubuntu-latest
needs: test
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
steps:
- uses: actions/checkout@v2
- name: Setup Node.js
uses: actions/setup-node@v2
with:
node-version: '16'
- name: Install dependencies
run: npm ci
- name: Run E2E tests
run: npm run test:e2e
测试报告与可视化:
自动化测试调度:
为了持续监控OpenHands的质量,应建立以下指标和监控机制:
关键质量指标:
性能指标:
用户体验指标:
监控与警报:
测试OpenHands这样的AI代理系统面临几个独特挑战:
确定性与可重现性:
测试范围与深度:
测试效率与资源消耗:
测试有效性评估:
在为OpenHands设计测试策略时,我们做出了以下权衡:
模拟vs真实LLM调用:
测试粒度:
测试环境隔离:
测试数据管理:
测试不应是孤立的活动,而应与整个开发流程紧密集成:
测试驱动开发(TDD)适应:
持续测试文化:
测试作为文档:
质量反馈循环:
为了进一步提高OpenHands的测试和质量保证能力,可以探索以下方向:
高级测试技术:
AI特定测试方法:
测试基础设施改进:
质量监控与分析:
测试自动化高级主题: