fix: address remaining review nits

- Add top-level hooks wrapper to second JSON example (consistent with hooks.json format)
- Extract hardcoded thresholds as module-level constants (WALL_OF_TEXT_WORDS,
  SUMMARY_CHECK_WORDS, SUMMARY_CHECK_FIRST_N, TASK_OUTPUT_RATIO_HIGH/MEDIUM)

Skipped (not applicable):
- 'Scoring defaults to 5/5' — by design for heuristic fallback; SKILL.md already
  documents pairing with LLM judge for production use
- '--output silently ignored' — already fixed by _read_input refactor (checks
  args.output directly, not elif args.task and args.output)
This commit is contained in:
Hawthorn 2026-06-10 18:27:27 +05:30
parent f65ab491be
commit 8d360fb466
2 changed files with 26 additions and 17 deletions

View File

@ -38,6 +38,7 @@ To integrate it into hooks, capture the last agent output to a file first, then
```json ```json
{ {
"hooks": {
"PostToolUse": [ "PostToolUse": [
{ {
"matcher": "Bash", "matcher": "Bash",
@ -51,6 +52,7 @@ To integrate it into hooks, capture the last agent output to a file first, then
} }
] ]
} }
}
``` ```
This avoids documenting unsupported command-expression matcher syntax. If your harness supports command-level matcher expressions, prefer a word-boundary regex such as `\b(pytest|npm test|go test)\b` rather than a broad `test` substring. This avoids documenting unsupported command-expression matcher syntax. If your harness supports command-level matcher expressions, prefer a word-boundary regex such as `\b(pytest|npm test|go test)\b` rather than a broad `test` substring.

View File

@ -24,6 +24,13 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
# Tunable thresholds for evaluation heuristics
WALL_OF_TEXT_WORDS = 200
SUMMARY_CHECK_WORDS = 300
SUMMARY_CHECK_FIRST_N = 100
TASK_OUTPUT_RATIO_HIGH = 15
TASK_OUTPUT_RATIO_MEDIUM = 8
@dataclass @dataclass
class AxisScore: class AxisScore:
@ -144,8 +151,8 @@ def _check_jargon(text: str) -> tuple[int, list[str]]:
def _check_summary(text: str) -> tuple[int, list[str]]: def _check_summary(text: str) -> tuple[int, list[str]]:
"""Return clarity deduction when long output lacks an early summary.""" """Return clarity deduction when long output lacks an early summary."""
summary_terms = ["summary", "tldr", "overview", "in short"] summary_terms = ["summary", "tldr", "overview", "in short"]
has_early_summary = any(term in ' '.join(text.split()[:100]).lower() for term in summary_terms) has_early_summary = any(term in ' '.join(text.split()[:SUMMARY_CHECK_FIRST_N]).lower() for term in summary_terms)
if not has_early_summary and count_words(text) > 300: if not has_early_summary and count_words(text) > SUMMARY_CHECK_WORDS:
return 1, ["- No summary/TLDR in first 100 words (text is 300+ words)"] return 1, ["- No summary/TLDR in first 100 words (text is 300+ words)"]
return 0, [] return 0, []
@ -163,7 +170,7 @@ def check_clarity(text: str) -> AxisScore:
evidence.append("+ Uses bullet points") evidence.append("+ Uses bullet points")
for paragraph in [p for p in text.split("\n\n") if p.strip()]: for paragraph in [p for p in text.split("\n\n") if p.strip()]:
if count_words(paragraph) > 200: if count_words(paragraph) > WALL_OF_TEXT_WORDS:
deductions += 1 deductions += 1
evidence.append("- Wall-of-text paragraph (>200 words without break)") evidence.append("- Wall-of-text paragraph (>200 words without break)")
break break
@ -245,10 +252,10 @@ def check_conciseness(text: str, task: Optional[str] = None) -> AxisScore:
if task: if task:
task_wc = count_words(task) task_wc = count_words(task)
ratio = wc / max(task_wc, 1) ratio = wc / max(task_wc, 1)
if ratio > 15: if ratio > TASK_OUTPUT_RATIO_HIGH:
evidence.append(f"- Output is {ratio:.0f}x longer than task description (high ratio)") evidence.append(f"- Output is {ratio:.0f}x longer than task description (high ratio)")
score = min(score, 3) score = min(score, 3)
elif ratio > 8: elif ratio > TASK_OUTPUT_RATIO_MEDIUM:
evidence.append(f"- Output is {ratio:.0f}x longer than task description") evidence.append(f"- Output is {ratio:.0f}x longer than task description")
score = min(score, 4) score = min(score, 4)