mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-05-14 02:10:07 +08:00
docs: add evaluator billing readiness scenario (#1825)
This commit is contained in:
parent
dcf5668b27
commit
863519eecf
@ -199,7 +199,7 @@ is not complete unless the evidence column exists and has been freshly verified.
|
|||||||
| AgentShield enterprise iteration | Policy gates, SARIF, packs, provenance, corpus, HTML reports, exception lifecycle audit | PRs #53, #55-#62 landed with test evidence | Needs PDF/export decision or next enterprise signal |
|
| AgentShield enterprise iteration | Policy gates, SARIF, packs, provenance, corpus, HTML reports, exception lifecycle audit | PRs #53, #55-#62 landed with test evidence | Needs PDF/export decision or next enterprise signal |
|
||||||
| ECC Tools next-level app | Billing audit, PR checks, deep analyzer, sync backlog | PRs #26-#39 landed with test evidence | Needs capacity-backed Linear rollout / broader evaluator corpus |
|
| ECC Tools next-level app | Billing audit, PR checks, deep analyzer, sync backlog | PRs #26-#39 landed with test evidence | Needs capacity-backed Linear rollout / broader evaluator corpus |
|
||||||
| GitGuardian/Dependabot/CodeRabbit-style checks | Non-blocking taxonomy and deterministic follow-up checks | ECC-Tools risk taxonomy check plus follow-up signals landed, including Skill Quality, Deep Analyzer Evidence, Analyzer Corpus Evidence, RAG/Evaluator Evidence, and PR Review/Salvage Evidence | Partially complete |
|
| GitGuardian/Dependabot/CodeRabbit-style checks | Non-blocking taxonomy and deterministic follow-up checks | ECC-Tools risk taxonomy check plus follow-up signals landed, including Skill Quality, Deep Analyzer Evidence, Analyzer Corpus Evidence, RAG/Evaluator Evidence, and PR Review/Salvage Evidence | Partially complete |
|
||||||
| Harness-agnostic learning system | Audit, adapter matrix, observability, traces, promotion loop | Audit/adapters/observability gates plus `docs/architecture/evaluator-rag-prototype.md` and `examples/evaluator-rag-prototype/` define the first read-only scenario, trace, report, playbook, and verifier result | Needs broader evaluator corpus |
|
| Harness-agnostic learning system | Audit, adapter matrix, observability, traces, promotion loop | Audit/adapters/observability gates plus `docs/architecture/evaluator-rag-prototype.md` and `examples/evaluator-rag-prototype/` define read-only stale-salvage and billing-readiness scenarios with trace, report, playbook, and verifier result artifacts | Needs broader evaluator corpus |
|
||||||
| Linear roadmap is detailed | Linear project status plus repo mirror | Repo mirror exists; issue creation was retried on 2026-05-12 and remains blocked by the workspace free issue limit | Needs recurring status updates after each merge batch |
|
| Linear roadmap is detailed | Linear project status plus repo mirror | Repo mirror exists; issue creation was retried on 2026-05-12 and remains blocked by the workspace free issue limit | Needs recurring status updates after each merge batch |
|
||||||
| Flow separation and progress tracking | Flow lanes with owner artifacts and update cadence | This roadmap defines lanes below | Active |
|
| Flow separation and progress tracking | Flow lanes with owner artifacts and update cadence | This roadmap defines lanes below | Active |
|
||||||
| Realtime Linear sync | Project updates while issue limit is blocked; issues later | ECC-Tools #39 implements opt-in Linear API sync for deferred follow-up backlog items | Needs workspace capacity/config rollout |
|
| Realtime Linear sync | Project updates while issue limit is blocked; issues later | ECC-Tools #39 implements opt-in Linear API sync for deferred follow-up backlog items | Needs workspace capacity/config rollout |
|
||||||
@ -218,7 +218,7 @@ back to the repo evidence and merge commits.
|
|||||||
| Queue hygiene and salvage | GitHub PR/issue state, salvage ledger | Append ledger entries for any future stale closures | Every cleanup batch |
|
| Queue hygiene and salvage | GitHub PR/issue state, salvage ledger | Append ledger entries for any future stale closures | Every cleanup batch |
|
||||||
| Release and publication | rc.1 release docs, publication readiness doc | Naming matrix and plugin submission/contact checklist | Before any tag |
|
| Release and publication | rc.1 release docs, publication readiness doc | Naming matrix and plugin submission/contact checklist | Before any tag |
|
||||||
| Harness OS core | Audit, adapter matrix, observability docs, `ecc2/` | HUD/session-control acceptance spec | Weekly until GA |
|
| Harness OS core | Audit, adapter matrix, observability docs, `ecc2/` | HUD/session-control acceptance spec | Weekly until GA |
|
||||||
| Evaluation and RAG | Reference-set validation, harness audit, traces | Read-only evaluator/RAG prototype plus fixture contract | Expand to CI, billing, harness-config, and AgentShield scenarios |
|
| Evaluation and RAG | Reference-set validation, harness audit, traces | Read-only evaluator/RAG prototype plus stale-salvage and billing-readiness fixtures | Expand to CI, harness-config, and AgentShield scenarios |
|
||||||
| AgentShield enterprise | AgentShield PR evidence and roadmap notes | PDF-export decision or next enterprise signal | After value decision |
|
| AgentShield enterprise | AgentShield PR evidence and roadmap notes | PDF-export decision or next enterprise signal | After value decision |
|
||||||
| ECC Tools app | ECC-Tools PR evidence, billing audit, risk taxonomy | Capacity-backed Linear rollout or broader evaluator/RAG corpus slice | Next implementation batch |
|
| ECC Tools app | ECC-Tools PR evidence, billing audit, risk taxonomy | Capacity-backed Linear rollout or broader evaluator/RAG corpus slice | Next implementation batch |
|
||||||
| Linear progress | Linear project status updates and this mirror | Status update with queue/evidence/missing gates | Every significant merge batch |
|
| Linear progress | Linear project status updates and this mirror | Status update with queue/evidence/missing gates | Every significant merge batch |
|
||||||
@ -356,6 +356,11 @@ Acceptance:
|
|||||||
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, Reference Set
|
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, Reference Set
|
||||||
Validation, Deep Analyzer Evidence, RAG/Evaluator Evidence,
|
Validation, Deep Analyzer Evidence, RAG/Evaluator Evidence,
|
||||||
PR Review/Salvage Evidence, Skill Quality, and Agent Config Review.
|
PR Review/Salvage Evidence, Skill Quality, and Agent Config Review.
|
||||||
|
- Evaluator/RAG billing readiness fixture
|
||||||
|
`examples/evaluator-rag-prototype/billing-marketplace-readiness/` records the
|
||||||
|
read-only claim-verification path for Marketplace, App, subscription, seat,
|
||||||
|
entitlement, and plan language before launch copy can treat those claims as
|
||||||
|
live.
|
||||||
- Cost/token-risk predictive follow-ups flag AI routing, model-call, usage,
|
- Cost/token-risk predictive follow-ups flag AI routing, model-call, usage,
|
||||||
quota, and budget changes when budget evidence is missing.
|
quota, and budget changes when budget evidence is missing.
|
||||||
- Reference-set validation follow-ups flag analyzer, skill, agent, command, and
|
- Reference-set validation follow-ups flag analyzer, skill, agent, command, and
|
||||||
@ -412,6 +417,6 @@ Acceptance:
|
|||||||
executive report, corpus benchmark output, and exception lifecycle audit.
|
executive report, corpus benchmark output, and exception lifecycle audit.
|
||||||
2. Enable/configure the merged Linear backlog sync path after workspace issue
|
2. Enable/configure the merged Linear backlog sync path after workspace issue
|
||||||
capacity clears or the Linear workspace is upgraded.
|
capacity clears or the Linear workspace is upgraded.
|
||||||
3. Expand the evaluator/RAG corpus beyond the first stale-salvage prototype to
|
3. Expand the evaluator/RAG corpus beyond the stale-salvage and billing
|
||||||
CI failure diagnosis, harness-config drift, billing readiness, and
|
prototypes to CI failure diagnosis, harness-config drift, and AgentShield
|
||||||
AgentShield policy exception scenarios.
|
policy exception scenarios.
|
||||||
|
|||||||
@ -7,9 +7,10 @@ that loop.
|
|||||||
|
|
||||||
The fixture set lives in
|
The fixture set lives in
|
||||||
[`examples/evaluator-rag-prototype/`](../../examples/evaluator-rag-prototype/).
|
[`examples/evaluator-rag-prototype/`](../../examples/evaluator-rag-prototype/).
|
||||||
It uses the May 2026 stale-PR cleanup and salvage lane as the first concrete
|
It started with the May 2026 stale-PR cleanup and salvage lane because that
|
||||||
scenario because that lane has real inputs, real accepted work, and real
|
lane has real inputs, real accepted work, and real rejected work. The corpus now
|
||||||
rejected work.
|
also includes a billing/Marketplace readiness scenario so launch copy cannot
|
||||||
|
treat dry-run release evidence or roadmap intent as live billing state.
|
||||||
|
|
||||||
## Reference Pressure
|
## Reference Pressure
|
||||||
|
|
||||||
@ -83,6 +84,19 @@ The verifier rejects a blind cherry-pick proposal that:
|
|||||||
- lacks tests or ledger updates;
|
- lacks tests or ledger updates;
|
||||||
- mutates release or plugin publication state.
|
- mutates release or plugin publication state.
|
||||||
|
|
||||||
|
## Corpus Fixtures
|
||||||
|
|
||||||
|
The root fixture files preserve the original
|
||||||
|
`stale-pr-salvage-maintainer-branch` prototype. Additional scenarios can live in
|
||||||
|
subdirectories when they reuse the same five-artifact contract.
|
||||||
|
|
||||||
|
Current corpus:
|
||||||
|
|
||||||
|
- `stale-pr-salvage-maintainer-branch`: recovers useful closed PR work through
|
||||||
|
maintainer-owned branches with attribution and validation.
|
||||||
|
- `billing-marketplace-readiness`: verifies billing, App, and Marketplace
|
||||||
|
launch claims before public copy says they are live.
|
||||||
|
|
||||||
## ECC Tools Mapping
|
## ECC Tools Mapping
|
||||||
|
|
||||||
ECC Tools already flags missing RAG/evaluator evidence for retrieval,
|
ECC Tools already flags missing RAG/evaluator evidence for retrieval,
|
||||||
@ -117,6 +131,4 @@ The next evaluator/RAG corpus should add:
|
|||||||
|
|
||||||
- a CI-failure diagnosis scenario with captured logs and a known fix;
|
- a CI-failure diagnosis scenario with captured logs and a known fix;
|
||||||
- a harness-config quality scenario covering MCP/plugin/hook drift;
|
- a harness-config quality scenario covering MCP/plugin/hook drift;
|
||||||
- a billing-readiness scenario that separates verified Marketplace claims from
|
|
||||||
launch-copy assumptions;
|
|
||||||
- an AgentShield policy exception scenario with SARIF and report evidence.
|
- an AgentShield policy exception scenario with SARIF and report evidence.
|
||||||
|
|||||||
@ -0,0 +1,41 @@
|
|||||||
|
# Billing Marketplace Readiness Playbook
|
||||||
|
|
||||||
|
Use this playbook when release copy or roadmap text mentions ECC Tools
|
||||||
|
billing, Marketplace availability, account recovery, plans, seats,
|
||||||
|
entitlements, or subscription state.
|
||||||
|
|
||||||
|
## Accepted Path
|
||||||
|
|
||||||
|
1. Start from `docs/releases/2.0.0-rc.1/publication-readiness.md`.
|
||||||
|
2. Check the current repo and public listing surfaces:
|
||||||
|
- `gh api repos/ECC-Tools/ECC-Tools`
|
||||||
|
- `https://github.com/marketplace/ecc-tools`
|
||||||
|
3. Classify every billing or Marketplace claim as:
|
||||||
|
- `verified`
|
||||||
|
- `blocked`
|
||||||
|
- `remove-before-publication`
|
||||||
|
4. Keep roadmap acceptance criteria separate from live product claims.
|
||||||
|
5. Update release copy only after the evidence points to a live URL or command
|
||||||
|
result.
|
||||||
|
6. Leave tag creation, npm publish, plugin submission, marketplace edits,
|
||||||
|
subscription changes, and announcement posting approval-gated.
|
||||||
|
|
||||||
|
## Rejected Path
|
||||||
|
|
||||||
|
Do not say billing is live because a roadmap item exists, a dry run passed, or a
|
||||||
|
Marketplace URL is known. Roadmap intent and dry-run publication evidence are
|
||||||
|
not a billing state.
|
||||||
|
|
||||||
|
Do not edit plan limits, subscriptions, seats, entitlements, or Marketplace
|
||||||
|
metadata from the evaluator run. Those are product/operator actions and require
|
||||||
|
their own approval path.
|
||||||
|
|
||||||
|
## Validation Gates
|
||||||
|
|
||||||
|
- `rg -n "billing|Billing|Marketplace|marketplace|subscription|seat|entitlement|plan" README.md docs/releases/2.0.0-rc.1 docs/ECC-2.0-GA-ROADMAP.md`
|
||||||
|
- `gh api repos/ECC-Tools/ECC-Tools`
|
||||||
|
- Manual live check of `https://github.com/marketplace/ecc-tools`
|
||||||
|
- `npx --yes markdownlint-cli docs/releases/2.0.0-rc.1/*.md docs/ECC-2.0-GA-ROADMAP.md`
|
||||||
|
- `git diff --check`
|
||||||
|
|
||||||
|
Record the evidence in a maintainer-owned PR before release copy is published.
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||||
|
"scenario_id": "billing-marketplace-readiness",
|
||||||
|
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||||
|
"result": "prototype_passed",
|
||||||
|
"read_only": true,
|
||||||
|
"scores": {
|
||||||
|
"claim_evidence": 0.82,
|
||||||
|
"publication_safety": 1,
|
||||||
|
"marketplace_specificity": 0.84,
|
||||||
|
"billing_scope_control": 1,
|
||||||
|
"announcement_safety": 1
|
||||||
|
},
|
||||||
|
"findings": [
|
||||||
|
{
|
||||||
|
"id": "billing-claim-gate-needed",
|
||||||
|
"severity": "warning",
|
||||||
|
"summary": "Release docs require a fresh ECC Tools billing/App/Marketplace check before launch copy can claim live billing readiness."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "dry-run-not-live-state",
|
||||||
|
"severity": "warning",
|
||||||
|
"summary": "May 12 evidence proves package/plugin dry runs and clean install smoke, but it does not prove a live Marketplace billing state."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "safe-next-action",
|
||||||
|
"severity": "info",
|
||||||
|
"summary": "The reusable next action is a read-only evidence checklist that classifies each launch-copy billing claim before publication."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"recommended_next_action": {
|
||||||
|
"candidate_id": "evidence-backed-billing-check",
|
||||||
|
"action": "Run the promoted billing/Marketplace claim-verification checklist before any launch copy, GitHub release text, or social copy says billing is live."
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,55 @@
|
|||||||
|
{
|
||||||
|
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||||
|
"scenario_id": "billing-marketplace-readiness",
|
||||||
|
"title": "Verify billing and Marketplace claims before launch copy",
|
||||||
|
"mode": "read_only_prototype",
|
||||||
|
"objective": "Given rc.1 release docs and ECC Tools billing roadmap evidence, separate verified Marketplace/App/billing state from assumptions before any announcement or publication action.",
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"kind": "repo_doc",
|
||||||
|
"path": "docs/releases/2.0.0-rc.1/publication-readiness.md",
|
||||||
|
"purpose": "Release gate that blocks billing and Marketplace claims until fresh evidence exists"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"kind": "repo_doc",
|
||||||
|
"path": "docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md",
|
||||||
|
"purpose": "Dry-run publication evidence and explicit remaining blocker list"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"kind": "roadmap",
|
||||||
|
"path": "docs/ECC-2.0-GA-ROADMAP.md",
|
||||||
|
"purpose": "ECC Tools billing audit acceptance criteria and remaining release blockers"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"kind": "github_api",
|
||||||
|
"command": "gh api repos/ECC-Tools/ECC-Tools",
|
||||||
|
"purpose": "Fresh repository access and app-surface evidence before launch claims"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"kind": "public_url",
|
||||||
|
"url": "https://github.com/marketplace/ecc-tools",
|
||||||
|
"purpose": "Marketplace listing that must be checked live before copy says billing is ready"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"retrieval_questions": [
|
||||||
|
"Which billing or Marketplace claims are already backed by repo evidence?",
|
||||||
|
"Which claims still need a live Marketplace, App, subscription, plan, or entitlement check?",
|
||||||
|
"Which announcement docs mention billing or Marketplace status?",
|
||||||
|
"Which publication actions remain approval-gated and must not run during this evaluator pass?"
|
||||||
|
],
|
||||||
|
"forbidden_actions": [
|
||||||
|
"creating or editing GitHub Marketplace listings",
|
||||||
|
"changing plan limits, subscriptions, seats, or entitlements",
|
||||||
|
"creating release tags",
|
||||||
|
"publishing packages or plugins",
|
||||||
|
"posting announcement copy",
|
||||||
|
"claiming live billing readiness from dry-run evidence alone"
|
||||||
|
],
|
||||||
|
"acceptance_gates": [
|
||||||
|
"launch-copy claims are classified as verified, blocked, or remove-before-publication",
|
||||||
|
"Marketplace and App checks name the exact URL or command needed",
|
||||||
|
"billing claims link to fresh evidence rather than roadmap intent",
|
||||||
|
"publication actions remain approval-gated",
|
||||||
|
"at least one overclaim candidate is rejected"
|
||||||
|
]
|
||||||
|
}
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||||
|
"scenario_id": "billing-marketplace-readiness",
|
||||||
|
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||||
|
"read_only": true,
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"phase": "observation",
|
||||||
|
"summary": "Publication readiness still marks ECC Tools billing references and announcement copy as pending. Dry-run publication evidence says billing/App/Marketplace claims must be verified before launch copy uses them.",
|
||||||
|
"evidence": [
|
||||||
|
"docs/releases/2.0.0-rc.1/publication-readiness.md",
|
||||||
|
"docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"phase": "retrieval",
|
||||||
|
"summary": "Retrieved the release gate, dry-run evidence, roadmap billing acceptance criteria, and the public Marketplace URL that requires a live operator check.",
|
||||||
|
"evidence": [
|
||||||
|
"docs/ECC-2.0-GA-ROADMAP.md",
|
||||||
|
"gh api repos/ECC-Tools/ECC-Tools",
|
||||||
|
"https://github.com/marketplace/ecc-tools"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"phase": "proposal",
|
||||||
|
"summary": "Generated two candidate playbooks: evidence-backed billing claim verification, and announcement-first billing copy that treats roadmap intent as live billing readiness.",
|
||||||
|
"candidate_ids": [
|
||||||
|
"evidence-backed-billing-check",
|
||||||
|
"announcement-first-billing-copy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"phase": "verification",
|
||||||
|
"summary": "Accepted the evidence-backed check and rejected announcement-first copy because billing and Marketplace surfaces remain pending until verified by fresh URLs or API output.",
|
||||||
|
"evidence": [
|
||||||
|
"examples/evaluator-rag-prototype/billing-marketplace-readiness/verifier-result.json"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"phase": "promotion",
|
||||||
|
"summary": "Promoted only the read-only verification playbook. No Marketplace edits, subscription changes, tags, package publishes, plugin submission, or announcement posts are performed.",
|
||||||
|
"promoted_candidate_id": "evidence-backed-billing-check"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||||
|
"scenario_id": "billing-marketplace-readiness",
|
||||||
|
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||||
|
"read_only": true,
|
||||||
|
"candidates": [
|
||||||
|
{
|
||||||
|
"candidate_id": "evidence-backed-billing-check",
|
||||||
|
"decision": "accepted",
|
||||||
|
"score": 0.91,
|
||||||
|
"reasons": [
|
||||||
|
"keeps the run read-only",
|
||||||
|
"requires fresh Marketplace or GitHub API evidence",
|
||||||
|
"classifies launch-copy claims before publication",
|
||||||
|
"separates roadmap intent from live billing state",
|
||||||
|
"keeps release, package, plugin, billing, and announcement actions approval-gated"
|
||||||
|
],
|
||||||
|
"rollback": "Remove or revert any release-copy edits that cite unverified billing claims; no live billing state is changed by this playbook."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"candidate_id": "announcement-first-billing-copy",
|
||||||
|
"decision": "rejected",
|
||||||
|
"score": 0.18,
|
||||||
|
"reasons": [
|
||||||
|
"treats roadmap acceptance criteria as live billing evidence",
|
||||||
|
"does not require a fresh Marketplace listing check",
|
||||||
|
"could publish announcement copy before release URLs exist",
|
||||||
|
"does not classify unsupported claims for removal",
|
||||||
|
"risks implying subscription or entitlement readiness without proof"
|
||||||
|
],
|
||||||
|
"rollback": "Do not publish this copy; keep billing and Marketplace language blocked until the evidence checklist passes."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"promoted_candidate_id": "evidence-backed-billing-check"
|
||||||
|
}
|
||||||
@ -30,6 +30,10 @@ function readJson(fileName) {
|
|||||||
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, fileName), 'utf8'));
|
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, fileName), 'utf8'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function readFixtureJson(relativePath) {
|
||||||
|
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, relativePath), 'utf8'));
|
||||||
|
}
|
||||||
|
|
||||||
console.log('\n=== Testing evaluator RAG prototype ===\n');
|
console.log('\n=== Testing evaluator RAG prototype ===\n');
|
||||||
|
|
||||||
test('architecture doc records the artifact contract and reference pressure', () => {
|
test('architecture doc records the artifact contract and reference pressure', () => {
|
||||||
@ -134,6 +138,43 @@ test('roadmap points to the evaluator RAG prototype and keeps broader corpus wor
|
|||||||
assert.ok(roadmap.includes('Needs broader evaluator corpus'));
|
assert.ok(roadmap.includes('Needs broader evaluator corpus'));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('billing readiness scenario rejects launch copy overclaims', () => {
|
||||||
|
const scenario = readFixtureJson('billing-marketplace-readiness/scenario.json');
|
||||||
|
const trace = readFixtureJson('billing-marketplace-readiness/trace.json');
|
||||||
|
const report = readFixtureJson('billing-marketplace-readiness/report.json');
|
||||||
|
const verifier = readFixtureJson('billing-marketplace-readiness/verifier-result.json');
|
||||||
|
const playbook = read('examples/evaluator-rag-prototype/billing-marketplace-readiness/candidate-playbook.md');
|
||||||
|
|
||||||
|
assert.strictEqual(scenario.scenario_id, 'billing-marketplace-readiness');
|
||||||
|
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||||
|
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||||
|
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||||
|
assert.strictEqual(trace.read_only, true);
|
||||||
|
assert.strictEqual(report.read_only, true);
|
||||||
|
assert.strictEqual(verifier.read_only, true);
|
||||||
|
|
||||||
|
for (const blocked of [
|
||||||
|
'creating or editing GitHub Marketplace listings',
|
||||||
|
'changing plan limits, subscriptions, seats, or entitlements',
|
||||||
|
'posting announcement copy',
|
||||||
|
'claiming live billing readiness from dry-run evidence alone'
|
||||||
|
]) {
|
||||||
|
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing billing forbidden action: ${blocked}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'evidence-backed-billing-check');
|
||||||
|
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'announcement-first-billing-copy');
|
||||||
|
|
||||||
|
assert.ok(accepted, 'Missing accepted billing evidence candidate');
|
||||||
|
assert.ok(rejected, 'Missing rejected announcement-overclaim candidate');
|
||||||
|
assert.strictEqual(accepted.decision, 'accepted');
|
||||||
|
assert.strictEqual(rejected.decision, 'rejected');
|
||||||
|
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||||
|
assert.ok(rejected.reasons.join('\n').includes('roadmap acceptance criteria'));
|
||||||
|
assert.ok(playbook.includes('remove-before-publication'));
|
||||||
|
assert.ok(playbook.includes('https://github.com/marketplace/ecc-tools'));
|
||||||
|
});
|
||||||
|
|
||||||
if (failed > 0) {
|
if (failed > 0) {
|
||||||
console.log(`\nFailed: ${failed}`);
|
console.log(`\nFailed: ${failed}`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user