mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-06-19 11:20:48 +08:00
fix(continuous-learning): eliminate _SECRET_RE catastrophic backtracking + orphaned-CPU hang (#2278)
The observe hook's secret-scrub regex used a generic ([A-Za-z]+\s+)? group
that overlapped the separator and value classes, causing exponential
backtracking on identifier-dense tool I/O — an orphaned python child then
pegged a core at ~100% CPU for days because the async hook timed out without
killing it.
- Rewrite _SECRET_RE as a linear matcher: bounded separator {1,8}, a fixed
set of auth schemes (bearer|basic|token|bot) instead of [A-Za-z]+, and a
bounded value {8,256}. Pathological input drops from hang to <1ms; real
secrets still redact (verified incl. 'Bearer <token>').
- Add a signal.alarm(8) self-timeout to both scrub blocks so any runaway
child self-terminates before the 10s async-hook timeout can orphan it.
This commit is contained in:
parent
3cdc69a0ea
commit
51184b692e
@ -268,13 +268,25 @@ if [ "$PARSED_OK" != "True" ]; then
|
||||
echo "$INPUT_JSON" | "$PYTHON_CMD" -c '
|
||||
import json, sys, os, re
|
||||
|
||||
# Linear-time secret matcher. Bounded quantifiers and a fixed set of auth
|
||||
# schemes (instead of a generic [A-Za-z]+\s+ that overlapped the value class)
|
||||
# prevent the catastrophic backtracking that pegged python at 100% CPU (#2278).
|
||||
_SECRET_RE = re.compile(
|
||||
r"(?i)(api[_-]?key|token|secret|password|authorization|credentials?|auth)"
|
||||
r"""(["'"'"'\s:=]+)"""
|
||||
r"([A-Za-z]+\s+)?"
|
||||
r"([A-Za-z0-9_\-/.+=]{8,})"
|
||||
r"""(["'"'"'\s:=]{1,8})"""
|
||||
r"((?:bearer|basic|token|bot)\s+)?"
|
||||
r"([A-Za-z0-9_\-/.+=]{8,256})"
|
||||
)
|
||||
|
||||
import signal
|
||||
def _ecc_bail(*_):
|
||||
sys.exit(0)
|
||||
try:
|
||||
signal.signal(signal.SIGALRM, _ecc_bail)
|
||||
signal.alarm(8) # self-terminate before the async hook 10s timeout can orphan us (#2278)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raw = sys.stdin.read()[:2000]
|
||||
raw = _SECRET_RE.sub(lambda m: m.group(1) + m.group(2) + (m.group(3) or "") + "[REDACTED]", raw)
|
||||
print(json.dumps({"timestamp": os.environ["TIMESTAMP"], "event": "parse_error", "raw": raw}))
|
||||
@ -302,6 +314,15 @@ export TIMESTAMP="$timestamp"
|
||||
|
||||
echo "$PARSED" | "$PYTHON_CMD" -c '
|
||||
import json, sys, os, re
|
||||
import signal
|
||||
|
||||
def _ecc_bail(*_):
|
||||
sys.exit(0)
|
||||
try:
|
||||
signal.signal(signal.SIGALRM, _ecc_bail)
|
||||
signal.alarm(8) # self-terminate before the async hook 10s timeout can orphan us (#2278)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
parsed = json.load(sys.stdin)
|
||||
observation = {
|
||||
@ -315,11 +336,14 @@ observation = {
|
||||
|
||||
# Scrub secrets: match common key=value, key: value, and key"value patterns
|
||||
# Includes optional auth scheme (e.g., "Bearer", "Basic") before token
|
||||
# Linear-time secret matcher. Bounded quantifiers and a fixed set of auth
|
||||
# schemes (instead of a generic [A-Za-z]+\s+ that overlapped the value class)
|
||||
# prevent the catastrophic backtracking that pegged python at 100% CPU (#2278).
|
||||
_SECRET_RE = re.compile(
|
||||
r"(?i)(api[_-]?key|token|secret|password|authorization|credentials?|auth)"
|
||||
r"""(["'"'"'\s:=]+)"""
|
||||
r"([A-Za-z]+\s+)?"
|
||||
r"([A-Za-z0-9_\-/.+=]{8,})"
|
||||
r"""(["'"'"'\s:=]{1,8})"""
|
||||
r"((?:bearer|basic|token|bot)\s+)?"
|
||||
r"([A-Za-z0-9_\-/.+=]{8,256})"
|
||||
)
|
||||
|
||||
def scrub(val):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user