clawbench/baselines/basic_usage_query_summary.json
2026-04-09 11:15:30 -07:00

127 lines
3.5 KiB
JSON

{
"source_dataset": "基础使用场景测试集.xlsx",
"source_version": "1.0",
"summary": {
"query_total": 72,
"primary_scene_total": 12,
"secondary_scene_total": 55,
"atomic_capability_total": 139,
"difficulty_distribution": {
"l1": 22,
"l2": 39,
"l3": 11
},
"design_principles": [
"mece_atomic_capabilities",
"parameterized_case_expansion",
"clear_and_ambiguous_query_variants",
"dual_channel_delivery_judging"
]
},
"scenario_catalog": [
{
"scenario": "file_system_ops",
"source_label_zh": "文件与系统操作",
"query_count": 8,
"weight": 0.13,
"difficulty_distribution": {"l1": 4, "l2": 4, "l3": 0}
},
{
"scenario": "web_info_ops",
"source_label_zh": "信息查询与网页操作",
"query_count": 6,
"weight": 0.1,
"difficulty_distribution": {"l1": 2, "l2": 3, "l3": 1}
},
{
"scenario": "calendar_reminders",
"source_label_zh": "日程与提醒",
"query_count": 5,
"weight": 0.08,
"difficulty_distribution": {"l1": 3, "l2": 2, "l3": 0}
},
{
"scenario": "communication_messaging",
"source_label_zh": "通讯与消息",
"query_count": 5,
"weight": 0.09,
"difficulty_distribution": {"l1": 0, "l2": 5, "l3": 0}
},
{
"scenario": "data_processing_analysis",
"source_label_zh": "数据处理与分析",
"query_count": 8,
"weight": 0.11,
"difficulty_distribution": {"l1": 2, "l2": 6, "l3": 0}
},
{
"scenario": "coding_dev_assist",
"source_label_zh": "编程与开发辅助",
"query_count": 7,
"weight": 0.09,
"difficulty_distribution": {"l1": 3, "l2": 4, "l3": 0}
},
{
"scenario": "personal_life_assistant",
"source_label_zh": "个人生活助理",
"query_count": 5,
"weight": 0.06,
"difficulty_distribution": {"l1": 4, "l2": 1, "l3": 0}
},
{
"scenario": "multi_step_compound",
"source_label_zh": "多步骤复合任务",
"query_count": 7,
"weight": 0.12,
"difficulty_distribution": {"l1": 0, "l2": 0, "l3": 7}
},
{
"scenario": "context_continuation",
"source_label_zh": "上下文理解与连续对话",
"query_count": 7,
"weight": 0.05,
"difficulty_distribution": {"l1": 0, "l2": 5, "l3": 2}
},
{
"scenario": "error_boundary_cases",
"source_label_zh": "错误处理与边界情况",
"query_count": 6,
"weight": 0.05,
"difficulty_distribution": {"l1": 3, "l2": 2, "l3": 1}
},
{
"scenario": "skill_calling",
"source_label_zh": "Skill调用",
"query_count": 4,
"weight": 0.07,
"difficulty_distribution": {"l1": 0, "l2": 4, "l3": 0}
},
{
"scenario": "system_capabilities",
"source_label_zh": "系统能力",
"query_count": 4,
"weight": 0.05,
"difficulty_distribution": {"l1": 1, "l2": 3, "l3": 0}
}
],
"current_corpus_alignment": {
"mapped_task_total": 20,
"covered_scenarios": {
"coding_dev_assist": 9,
"data_processing_analysis": 2,
"web_info_ops": 2,
"multi_step_compound": 3,
"context_continuation": 1,
"error_boundary_cases": 2,
"system_capabilities": 1
},
"missing_scenarios": [
"file_system_ops",
"calendar_reminders",
"communication_messaging",
"personal_life_assistant",
"skill_calling"
]
}
}