{
  "timestamp": "2026-03-30T11:07:48.273Z",
  "skillVersion": "0.23.0-dev.0",
  "threshold": 0.8,
  "models": [
    {
      "model": "openai/gpt-4.1",
      "cases": [
        {
          "caseId": "list-issues-basic",
          "prompt": "show me the sentry issues",
          "plan": {
            "thinking": "The user wants to view the current Sentry issues for their project. I'll use the CLI's 'issue list' command to display unresolved issues, trusting auto-detection for org/project.",
            "commands": [
              {
                "command": "sentry issue list --query \"is:unresolved\" --limit 10",
                "purpose": "List unresolved issues in the current project, showing up to 10 for quick overview"
              }
            ],
            "notes": "If you want more issues, adjust the --limit value. To see resolved issues, remove or change the --query filter. For detailed info on a specific issue, use 'sentry issue view <shortId>'."
          },
          "criteria": [
            {
              "name": "no-pre-auth",
              "pass": true,
              "reason": "Does not run auth commands before the actual command"
            },
            {
              "name": "no-pre-lookup",
              "pass": true,
              "reason": "Does not run org/project listing before the actual command"
            },
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry issue list as the primary command"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands total"
            },
            {
              "name": "trusts-auto-detect",
              "pass": true,
              "reason": "Does not hardcode org/project names"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the appropriate 'sentry issue list' command with a query to show unresolved issues, aligns with the user's request to see Sentry issues, is efficient with a limit of 10, and includes helpful notes on customization."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "recent-errors",
          "prompt": "what errors happened recently in my project?",
          "plan": {
            "thinking": "The user wants to see recent errors (issues) in their project. The CLI auto-detects org/project context, so I can list recent issues using the default detection. I'll use Sentry's query syntax to filter for unresolved issues, which are typically recent errors.",
            "commands": [
              {
                "command": "sentry issue list --query \"is:unresolved\" --limit 10 --json --fields shortId,title,level,firstSeen,lastSeen,permalink",
                "purpose": "List the 10 most recent unresolved error issues with key details"
              }
            ],
            "notes": "If you want to see resolved or all issues, adjust the query flag (e.g., omit \"is:unresolved\"). The command auto-detects your project context. For a different issue count, change --limit."
          },
          "criteria": [
            {
              "name": "no-pre-auth",
              "pass": true,
              "reason": "Does not run auth commands before the actual command"
            },
            {
              "name": "no-pre-lookup",
              "pass": true,
              "reason": "Does not run org/project listing before the actual command"
            },
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry issue list as the primary command"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands total"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the correct Sentry CLI command to list recent unresolved issues, which aligns with the user's request for recent errors. It efficiently retrieves relevant fields, respects project context auto-detection, and limits output to a reasonable number of issues without unnecessary steps."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "view-specific-issue",
          "prompt": "tell me about issue PROJ-42",
          "plan": {
            "thinking": "The user wants detailed information about a specific Sentry issue with short ID PROJ-42. The best way to do this is to view the issue directly using the CLI.",
            "commands": [
              {
                "command": "sentry issue view PROJ-42",
                "purpose": "Show details about issue PROJ-42, including status, title, priority, and more."
              }
            ],
            "notes": "If you need root cause analysis or a fix plan for this issue, you can use 'sentry issue explain PROJ-42' and 'sentry issue plan PROJ-42'. Add '-w' to open it in your browser."
          },
          "criteria": [
            {
              "name": "no-pre-auth",
              "pass": true,
              "reason": "Does not pre-authenticate"
            },
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry issue view with the correct short ID"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans exactly 1 CLI command"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the correct CLI command 'sentry issue view PROJ-42' to show detailed information about the specified issue, which directly addresses the user's request efficiently without unnecessary steps."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "explain-issue",
          "prompt": "why is CLI-G5 happening? can you analyze the root cause?",
          "plan": {
            "thinking": "The user wants to understand the root cause of issue CLI-G5 using Sentry's AI analysis. The CLI provides a command for Seer AI root cause explanation.",
            "commands": [
              {
                "command": "sentry issue explain CLI-G5",
                "purpose": "Analyze and explain the root cause of issue CLI-G5 using Seer AI"
              }
            ],
            "notes": "You can also run 'sentry issue view CLI-G5' for detailed metadata if needed. The Seer AI explanation should give actionable insights and likely causes."
          },
          "criteria": [
            {
              "name": "no-pre-auth",
              "pass": true,
              "reason": "Does not pre-authenticate"
            },
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry issue explain with the correct ID"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the correct 'sentry issue explain CLI-G5' command to analyze and explain the root cause, which directly addresses the user's request efficiently without unnecessary steps."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "list-traces",
          "prompt": "show me recent traces for performance analysis",
          "plan": {
            "thinking": "The user wants to see recent distributed traces for performance analysis. The Sentry CLI provides 'sentry trace list' which lists recent traces and auto-detects org/project context.",
            "commands": [
              {
                "command": "sentry trace list --limit 10",
                "purpose": "List the 10 most recent traces to provide a snapshot for performance analysis"
              }
            ],
            "notes": "Increase or decrease the '--limit' if you want to see more or fewer traces. For deeper analysis of a single trace, use 'sentry trace view <trace-id>'."
          },
          "criteria": [
            {
              "name": "no-pre-auth",
              "pass": true,
              "reason": "Does not pre-authenticate"
            },
            {
              "name": "no-pre-lookup",
              "pass": true,
              "reason": "Does not look up org/project first"
            },
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry trace list"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands"
            },
            {
              "name": "trusts-auto-detect",
              "pass": true,
              "reason": "Does not hardcode org/project names"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the appropriate 'sentry trace list' command with a reasonable limit to show recent traces, which directly addresses the user's request for recent traces for performance analysis. It is efficient and would work if executed."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "json-output",
          "prompt": "get me all unresolved issues as JSON so I can process them in a script",
          "plan": {
            "thinking": "The user wants a JSON list of all unresolved Sentry issues for processing in a script. The 'sentry issue list --query \"is:unresolved\" --json' command outputs unresolved issues in JSON format.",
            "commands": [
              {
                "command": "sentry issue list --query \"is:unresolved\" --json",
                "purpose": "Retrieve all unresolved issues from Sentry in machine-readable JSON format."
              }
            ],
            "notes": "If you want only specific fields, you can add '--fields <field1,field2,...>' to reduce output size. If there are many issues, consider using '--limit' for pagination, or process results in batches."
          },
          "criteria": [
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry issue list with --json flag"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands"
            },
            {
              "name": "trusts-auto-detect",
              "pass": true,
              "reason": "Does not hardcode org/project names"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the appropriate Sentry CLI command with correct flags to fetch unresolved issues in JSON format, which directly satisfies the user's request efficiently."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "api-fallback",
          "prompt": "I need to get the raw organization settings from the Sentry API",
          "plan": {
            "thinking": "The user wants to fetch the raw settings for their Sentry organization. The easiest way is to use the 'sentry api' command targeting the organization settings endpoint. The CLI will auto-detect the organization if not specified, but if we know the org slug, specifying it will ensure correct targeting.",
            "commands": [
              {
                "command": "sentry api /api/0/organizations/",
                "purpose": "List organizations to determine the slug if needed."
              },
              {
                "command": "sentry api /api/0/organizations/<org-slug>/",
                "purpose": "Fetch the raw organization settings from the Sentry API."
              }
            ],
            "notes": "Replace <org-slug> with the actual organization slug if known. The output will be raw JSON. If you already know your org slug, you can skip the first command."
          },
          "criteria": [
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry api for the request"
            },
            {
              "name": "no-external-curl",
              "pass": true,
              "reason": "Does not use raw curl commands"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the correct 'sentry api' commands to first identify the organization slug if unknown and then fetch the raw organization settings. It is efficient by allowing skipping the first command if the org slug is known and directly addresses the user's request for raw settings from the API."
            }
          ],
          "score": 1,
          "passed": true
        },
        {
          "caseId": "explore-schema",
          "prompt": "what API endpoints are available for releases?",
          "plan": {
            "thinking": "The user wants to know which API endpoints exist for managing releases in Sentry. The Sentry CLI provides the 'sentry schema' command to browse API resources. Running 'sentry schema releases' will list release-related endpoints.",
            "commands": [
              {
                "command": "sentry schema releases",
                "purpose": "List all API endpoints related to releases"
              }
            ],
            "notes": "This command will output a list of relevant endpoints, methods, and descriptions for the 'releases' resource. Use the full endpoint path with 'sentry api' for direct API access if needed."
          },
          "criteria": [
            {
              "name": "correct-command",
              "pass": true,
              "reason": "Uses sentry schema to explore the API"
            },
            {
              "name": "no-external-fetch",
              "pass": true,
              "reason": "Does not fetch external API docs"
            },
            {
              "name": "minimal-calls",
              "pass": true,
              "reason": "Plans at most 2 CLI commands"
            },
            {
              "name": "overall-quality",
              "pass": true,
              "reason": "The plan uses the appropriate 'sentry schema releases' command to list all API endpoints related to releases, which directly addresses the user's request efficiently and correctly."
            }
          ],
          "score": 1,
          "passed": true
        }
      ],
      "totalPassed": 8,
      "totalCases": 8,
      "score": 1
    }
  ]
}