chore: clean spec tests (#5517)

This commit is contained in:
jif-oai
2025-10-22 18:30:33 +01:00
committed by GitHub
parent 3c90728a29
commit bac7acaa7c

View File

@@ -1019,21 +1019,24 @@ mod tests {
} }
} }
fn assert_eq_tool_names(tools: &[ConfiguredToolSpec], expected_names: &[&str]) { // Avoid order-based assertions; compare via set containment instead.
let tool_names = tools fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[&str]) {
.iter() use std::collections::HashSet;
.map(|tool| tool_name(&tool.spec)) let mut names = HashSet::new();
.collect::<Vec<_>>(); let mut duplicates = Vec::new();
for name in tools.iter().map(|t| tool_name(&t.spec)) {
assert_eq!( if !names.insert(name) {
tool_names.len(), duplicates.push(name);
expected_names.len(), }
"tool_name mismatch, {tool_names:?}, {expected_names:?}", }
assert!(
duplicates.is_empty(),
"duplicate tool entries detected: {duplicates:?}"
); );
for (name, expected_name) in tool_names.iter().zip(expected_names.iter()) { for expected in expected_subset {
assert_eq!( assert!(
name, expected_name, names.contains(expected),
"tool_name mismatch, {name:?}, {expected_name:?}" "expected tool {expected} to be present; had: {names:?}"
); );
} }
} }
@@ -1056,8 +1059,105 @@ mod tests {
.unwrap_or_else(|| panic!("expected tool {expected_name}")) .unwrap_or_else(|| panic!("expected tool {expected_name}"))
} }
fn strip_descriptions_schema(schema: &mut JsonSchema) {
match schema {
JsonSchema::Boolean { description }
| JsonSchema::String { description }
| JsonSchema::Number { description } => {
*description = None;
}
JsonSchema::Array { items, description } => {
strip_descriptions_schema(items);
*description = None;
}
JsonSchema::Object {
properties,
required: _,
additional_properties,
} => {
for v in properties.values_mut() {
strip_descriptions_schema(v);
}
if let Some(AdditionalProperties::Schema(s)) = additional_properties {
strip_descriptions_schema(s);
}
}
}
}
fn strip_descriptions_tool(spec: &mut ToolSpec) {
match spec {
ToolSpec::Function(ResponsesApiTool { parameters, .. }) => {
strip_descriptions_schema(parameters);
}
ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {}
}
}
#[test] #[test]
fn test_build_specs() { fn test_full_toolset_specs_for_gpt5_codex() {
let model_family = find_family_for_model("gpt-5-codex")
.expect("gpt-5-codex should be a valid model family");
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::WebSearchRequest);
features.enable(Feature::ViewImageTool);
let config = ToolsConfig::new(&ToolsConfigParams {
model_family: &model_family,
features: &features,
});
let (tools, _) = build_specs(&config, None).build();
// Build actual map name -> spec
use std::collections::BTreeMap;
use std::collections::HashSet;
let mut actual: BTreeMap<String, ToolSpec> = BTreeMap::new();
let mut duplicate_names = Vec::new();
for t in &tools {
let name = tool_name(&t.spec).to_string();
if actual.insert(name.clone(), t.spec.clone()).is_some() {
duplicate_names.push(name);
}
}
assert!(
duplicate_names.is_empty(),
"duplicate tool entries detected: {duplicate_names:?}"
);
// Build expected from the same helpers used by the builder.
let mut expected: BTreeMap<String, ToolSpec> = BTreeMap::new();
for spec in [
create_exec_command_tool(),
create_write_stdin_tool(),
create_shell_tool(),
create_list_mcp_resources_tool(),
create_list_mcp_resource_templates_tool(),
create_read_mcp_resource_tool(),
PLAN_TOOL.clone(),
create_apply_patch_freeform_tool(),
ToolSpec::WebSearch {},
create_view_image_tool(),
] {
expected.insert(tool_name(&spec).to_string(), spec);
}
// Exact name set match — this is the only test allowed to fail when tools change.
let actual_names: HashSet<_> = actual.keys().cloned().collect();
let expected_names: HashSet<_> = expected.keys().cloned().collect();
assert_eq!(actual_names, expected_names, "tool name set mismatch");
// Compare specs ignoring human-readable descriptions.
for name in expected.keys() {
let mut a = actual.get(name).expect("present").clone();
let mut e = expected.get(name).expect("present").clone();
strip_descriptions_tool(&mut a);
strip_descriptions_tool(&mut e);
assert_eq!(a, e, "spec mismatch for {name}");
}
}
#[test]
fn test_build_specs_contains_expected_basics() {
let model_family = find_family_for_model("codex-mini-latest") let model_family = find_family_for_model("codex-mini-latest")
.expect("codex-mini-latest should be a valid model family"); .expect("codex-mini-latest should be a valid model family");
let mut features = Features::with_defaults(); let mut features = Features::with_defaults();
@@ -1068,25 +1168,25 @@ mod tests {
features: &features, features: &features,
}); });
let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
let tool_names = tools.iter().map(|t| t.spec.name()).collect::<Vec<_>>();
let mut expected = vec!["exec_command", "write_stdin"]; assert_eq!(
if let Some(shell_tool) = shell_tool_name(&config) { &tool_names,
expected.push(shell_tool); &[
} "exec_command",
expected.extend([ "write_stdin",
"list_mcp_resources", "local_shell",
"list_mcp_resource_templates", "list_mcp_resources",
"read_mcp_resource", "list_mcp_resource_templates",
"update_plan", "read_mcp_resource",
"web_search", "update_plan",
"view_image", "web_search",
]); "view_image",
]
assert_eq_tool_names(&tools, &expected); );
} }
#[test] #[test]
fn test_build_specs_default_shell() { fn test_build_specs_default_shell_present() {
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
let mut features = Features::with_defaults(); let mut features = Features::with_defaults();
features.enable(Feature::WebSearchRequest); features.enable(Feature::WebSearchRequest);
@@ -1097,20 +1197,12 @@ mod tests {
}); });
let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
let mut expected = vec!["exec_command", "write_stdin"]; // Only check the shell variant and a couple of core tools.
let mut subset = vec!["exec_command", "write_stdin", "update_plan"];
if let Some(shell_tool) = shell_tool_name(&config) { if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool); subset.push(shell_tool);
} }
expected.extend([ assert_contains_tool_names(&tools, &subset);
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
]);
assert_eq_tool_names(&tools, &expected);
} }
#[test] #[test]
@@ -1165,7 +1257,7 @@ mod tests {
} }
#[test] #[test]
fn test_build_specs_mcp_tools() { fn test_build_specs_mcp_tools_converted() {
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
let mut features = Features::with_defaults(); let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec); features.enable(Feature::UnifiedExec);
@@ -1213,22 +1305,6 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
}
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
"test_server/do_something_cool",
]);
assert_eq_tool_names(&tools, &expected);
let tool = find_tool(&tools, "test_server/do_something_cool"); let tool = find_tool(&tools, "test_server/do_something_cool");
assert_eq!( assert_eq!(
&tool.spec, &tool.spec,
@@ -1334,23 +1410,19 @@ mod tests {
]); ]);
let (tools, _) = build_specs(&config, Some(tools_map)).build(); let (tools, _) = build_specs(&config, Some(tools_map)).build();
// Expect exec_command/write_stdin first, followed by MCP tools sorted by fully-qualified name.
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
}
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
"test_server/cool",
"test_server/do",
"test_server/something",
]);
assert_eq_tool_names(&tools, &expected); // Only assert that the MCP tools themselves are sorted by fully-qualified name.
let mcp_names: Vec<_> = tools
.iter()
.map(|t| tool_name(&t.spec).to_string())
.filter(|n| n.starts_with("test_server/"))
.collect();
let expected = vec![
"test_server/cool".to_string(),
"test_server/do".to_string(),
"test_server/something".to_string(),
];
assert_eq!(mcp_names, expected);
} }
#[test] #[test]
@@ -1389,28 +1461,9 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"]; let tool = find_tool(&tools, "dash/search");
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/search",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!( assert_eq!(
tools[if has_shell { 10 } else { 9 }].spec, tool.spec,
ToolSpec::Function(ResponsesApiTool { ToolSpec::Function(ResponsesApiTool {
name: "dash/search".to_string(), name: "dash/search".to_string(),
parameters: JsonSchema::Object { parameters: JsonSchema::Object {
@@ -1463,27 +1516,9 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"]; let tool = find_tool(&tools, "dash/paginate");
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/paginate",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!( assert_eq!(
tools[if has_shell { 10 } else { 9 }].spec, tool.spec,
ToolSpec::Function(ResponsesApiTool { ToolSpec::Function(ResponsesApiTool {
name: "dash/paginate".to_string(), name: "dash/paginate".to_string(),
parameters: JsonSchema::Object { parameters: JsonSchema::Object {
@@ -1535,26 +1570,9 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"]; let tool = find_tool(&tools, "dash/tags");
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/tags",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!( assert_eq!(
tools[if has_shell { 10 } else { 9 }].spec, tool.spec,
ToolSpec::Function(ResponsesApiTool { ToolSpec::Function(ResponsesApiTool {
name: "dash/tags".to_string(), name: "dash/tags".to_string(),
parameters: JsonSchema::Object { parameters: JsonSchema::Object {
@@ -1608,26 +1626,9 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"]; let tool = find_tool(&tools, "dash/value");
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/value",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!( assert_eq!(
tools[if has_shell { 10 } else { 9 }].spec, tool.spec,
ToolSpec::Function(ResponsesApiTool { ToolSpec::Function(ResponsesApiTool {
name: "dash/value".to_string(), name: "dash/value".to_string(),
parameters: JsonSchema::Object { parameters: JsonSchema::Object {
@@ -1718,28 +1719,9 @@ mod tests {
) )
.build(); .build();
let mut expected = vec!["exec_command", "write_stdin"]; let tool = find_tool(&tools, "test_server/do_something_cool");
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"test_server/do_something_cool",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!( assert_eq!(
tools[if has_shell { 10 } else { 9 }].spec, tool.spec,
ToolSpec::Function(ResponsesApiTool { ToolSpec::Function(ResponsesApiTool {
name: "test_server/do_something_cool".to_string(), name: "test_server/do_something_cool".to_string(),
parameters: JsonSchema::Object { parameters: JsonSchema::Object {