diff --git a/skills/agent-self-evaluation/references/hook-integration.md b/skills/agent-self-evaluation/references/hook-integration.md
index e56455f3..2bb3c3ed 100644
--- a/skills/agent-self-evaluation/references/hook-integration.md
+++ b/skills/agent-self-evaluation/references/hook-integration.md
@@ -38,18 +38,20 @@ To integrate it into hooks, capture the last agent output to a file first, then
 
 ```json
 {
-  "PostToolUse": [
-    {
-      "matcher": "Bash",
-      "hooks": [
-        {
-          "type": "command",
-          "command": "echo '[Self-Eval] If this command completed verification for a non-trivial task, consider running agent-self-evaluation.'"
-        }
-      ],
-      "description": "Remind agent to self-evaluate after shell verification"
-    }
-  ]
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "echo '[Self-Eval] If this command completed verification for a non-trivial task, consider running agent-self-evaluation.'"
+          }
+        ],
+        "description": "Remind agent to self-evaluate after shell verification"
+      }
+    ]
+  }
 }
 ```
 
diff --git a/skills/agent-self-evaluation/scripts/evaluate.py b/skills/agent-self-evaluation/scripts/evaluate.py
index f560dc98..2d129c40 100755
--- a/skills/agent-self-evaluation/scripts/evaluate.py
+++ b/skills/agent-self-evaluation/scripts/evaluate.py
@@ -24,6 +24,13 @@ import sys
 from dataclasses import dataclass, field
 from typing import Optional
 
+# Tunable thresholds for evaluation heuristics
+WALL_OF_TEXT_WORDS = 200
+SUMMARY_CHECK_WORDS = 300
+SUMMARY_CHECK_FIRST_N = 100
+TASK_OUTPUT_RATIO_HIGH = 15
+TASK_OUTPUT_RATIO_MEDIUM = 8
+
 
 @dataclass
 class AxisScore:
@@ -144,8 +151,8 @@ def _check_jargon(text: str) -> tuple[int, list[str]]:
 def _check_summary(text: str) -> tuple[int, list[str]]:
     """Return clarity deduction when long output lacks an early summary."""
     summary_terms = ["summary", "tldr", "overview", "in short"]
-    has_early_summary = any(term in ' '.join(text.split()[:100]).lower() for term in summary_terms)
-    if not has_early_summary and count_words(text) > 300:
+    has_early_summary = any(term in ' '.join(text.split()[:SUMMARY_CHECK_FIRST_N]).lower() for term in summary_terms)
+    if not has_early_summary and count_words(text) > SUMMARY_CHECK_WORDS:
         return 1, ["- No summary/TLDR in first 100 words (text is 300+ words)"]
     return 0, []
 
@@ -163,7 +170,7 @@ def check_clarity(text: str) -> AxisScore:
         evidence.append("+ Uses bullet points")
 
     for paragraph in [p for p in text.split("\n\n") if p.strip()]:
-        if count_words(paragraph) > 200:
+        if count_words(paragraph) > WALL_OF_TEXT_WORDS:
             deductions += 1
             evidence.append("- Wall-of-text paragraph (>200 words without break)")
             break
@@ -245,10 +252,10 @@ def check_conciseness(text: str, task: Optional[str] = None) -> AxisScore:
     if task:
         task_wc = count_words(task)
         ratio = wc / max(task_wc, 1)
-        if ratio > 15:
+        if ratio > TASK_OUTPUT_RATIO_HIGH:
             evidence.append(f"- Output is {ratio:.0f}x longer than task description (high ratio)")
             score = min(score, 3)
-        elif ratio > 8:
+        elif ratio > TASK_OUTPUT_RATIO_MEDIUM:
             evidence.append(f"- Output is {ratio:.0f}x longer than task description")
             score = min(score, 4)