mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-06-16 16:36:53 +08:00
fix: address remaining review nits
- Add top-level hooks wrapper to second JSON example (consistent with hooks.json format) - Extract hardcoded thresholds as module-level constants (WALL_OF_TEXT_WORDS, SUMMARY_CHECK_WORDS, SUMMARY_CHECK_FIRST_N, TASK_OUTPUT_RATIO_HIGH/MEDIUM) Skipped (not applicable): - 'Scoring defaults to 5/5' — by design for heuristic fallback; SKILL.md already documents pairing with LLM judge for production use - '--output silently ignored' — already fixed by _read_input refactor (checks args.output directly, not elif args.task and args.output)
This commit is contained in:
parent
f65ab491be
commit
8d360fb466
@ -38,6 +38,7 @@ To integrate it into hooks, capture the last agent output to a file first, then
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
"hooks": {
|
||||||
"PostToolUse": [
|
"PostToolUse": [
|
||||||
{
|
{
|
||||||
"matcher": "Bash",
|
"matcher": "Bash",
|
||||||
@ -50,6 +51,7 @@ To integrate it into hooks, capture the last agent output to a file first, then
|
|||||||
"description": "Remind agent to self-evaluate after shell verification"
|
"description": "Remind agent to self-evaluate after shell verification"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -24,6 +24,13 @@ import sys
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
# Tunable thresholds for evaluation heuristics
|
||||||
|
WALL_OF_TEXT_WORDS = 200
|
||||||
|
SUMMARY_CHECK_WORDS = 300
|
||||||
|
SUMMARY_CHECK_FIRST_N = 100
|
||||||
|
TASK_OUTPUT_RATIO_HIGH = 15
|
||||||
|
TASK_OUTPUT_RATIO_MEDIUM = 8
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AxisScore:
|
class AxisScore:
|
||||||
@ -144,8 +151,8 @@ def _check_jargon(text: str) -> tuple[int, list[str]]:
|
|||||||
def _check_summary(text: str) -> tuple[int, list[str]]:
|
def _check_summary(text: str) -> tuple[int, list[str]]:
|
||||||
"""Return clarity deduction when long output lacks an early summary."""
|
"""Return clarity deduction when long output lacks an early summary."""
|
||||||
summary_terms = ["summary", "tldr", "overview", "in short"]
|
summary_terms = ["summary", "tldr", "overview", "in short"]
|
||||||
has_early_summary = any(term in ' '.join(text.split()[:100]).lower() for term in summary_terms)
|
has_early_summary = any(term in ' '.join(text.split()[:SUMMARY_CHECK_FIRST_N]).lower() for term in summary_terms)
|
||||||
if not has_early_summary and count_words(text) > 300:
|
if not has_early_summary and count_words(text) > SUMMARY_CHECK_WORDS:
|
||||||
return 1, ["- No summary/TLDR in first 100 words (text is 300+ words)"]
|
return 1, ["- No summary/TLDR in first 100 words (text is 300+ words)"]
|
||||||
return 0, []
|
return 0, []
|
||||||
|
|
||||||
@ -163,7 +170,7 @@ def check_clarity(text: str) -> AxisScore:
|
|||||||
evidence.append("+ Uses bullet points")
|
evidence.append("+ Uses bullet points")
|
||||||
|
|
||||||
for paragraph in [p for p in text.split("\n\n") if p.strip()]:
|
for paragraph in [p for p in text.split("\n\n") if p.strip()]:
|
||||||
if count_words(paragraph) > 200:
|
if count_words(paragraph) > WALL_OF_TEXT_WORDS:
|
||||||
deductions += 1
|
deductions += 1
|
||||||
evidence.append("- Wall-of-text paragraph (>200 words without break)")
|
evidence.append("- Wall-of-text paragraph (>200 words without break)")
|
||||||
break
|
break
|
||||||
@ -245,10 +252,10 @@ def check_conciseness(text: str, task: Optional[str] = None) -> AxisScore:
|
|||||||
if task:
|
if task:
|
||||||
task_wc = count_words(task)
|
task_wc = count_words(task)
|
||||||
ratio = wc / max(task_wc, 1)
|
ratio = wc / max(task_wc, 1)
|
||||||
if ratio > 15:
|
if ratio > TASK_OUTPUT_RATIO_HIGH:
|
||||||
evidence.append(f"- Output is {ratio:.0f}x longer than task description (high ratio)")
|
evidence.append(f"- Output is {ratio:.0f}x longer than task description (high ratio)")
|
||||||
score = min(score, 3)
|
score = min(score, 3)
|
||||||
elif ratio > 8:
|
elif ratio > TASK_OUTPUT_RATIO_MEDIUM:
|
||||||
evidence.append(f"- Output is {ratio:.0f}x longer than task description")
|
evidence.append(f"- Output is {ratio:.0f}x longer than task description")
|
||||||
score = min(score, 4)
|
score = min(score, 4)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user