diff --git a/src/tools/hashline-edit/edit-text-normalization.ts b/src/tools/hashline-edit/edit-text-normalization.ts index b508bf57..beb6ac87 100644 --- a/src/tools/hashline-edit/edit-text-normalization.ts +++ b/src/tools/hashline-edit/edit-text-normalization.ts @@ -1,5 +1,5 @@ -const HASHLINE_PREFIX_RE = /^\s*(?:>>>|>>)?\s*\d+#[A-Z]{2}:/ -const DIFF_PLUS_RE = /^[+-](?![+-])/ +const HASHLINE_PREFIX_RE = /^\s*(?:>>>|>>)?\s*\d+\s*#\s*[ZPMQVRWSNKTXJBYH]{2}:/ +const DIFF_PLUS_RE = /^[+](?![+])/ function equalsIgnoringWhitespace(a: string, b: string): boolean { if (a === b) return true diff --git a/src/tools/hashline-edit/hash-computation.test.ts b/src/tools/hashline-edit/hash-computation.test.ts index bbca1baa..d73a2db2 100644 --- a/src/tools/hashline-edit/hash-computation.test.ts +++ b/src/tools/hashline-edit/hash-computation.test.ts @@ -21,7 +21,7 @@ describe("computeLineHash", () => { expect(hash1).toMatch(/^[ZPMQVRWSNKTXJBYH]{2}$/) }) - it("produces different hashes for same content on different lines", () => { + it("produces same hashes for significant content on different lines", () => { //#given const content = "function hello() {" @@ -29,6 +29,18 @@ describe("computeLineHash", () => { const hash1 = computeLineHash(1, content) const hash2 = computeLineHash(2, content) + //#then + expect(hash1).toBe(hash2) + }) + + it("mixes line number for non-significant lines", () => { + //#given + const punctuationOnly = "{}" + + //#when + const hash1 = computeLineHash(1, punctuationOnly) + const hash2 = computeLineHash(2, punctuationOnly) + //#then expect(hash1).not.toBe(hash2) }) diff --git a/src/tools/hashline-edit/hash-computation.ts b/src/tools/hashline-edit/hash-computation.ts index 374bd56d..8371887d 100644 --- a/src/tools/hashline-edit/hash-computation.ts +++ b/src/tools/hashline-edit/hash-computation.ts @@ -1,10 +1,12 @@ import { HASHLINE_DICT } from "./constants" import { createHashlineChunkFormatter } from "./hashline-chunk-formatter" +const RE_SIGNIFICANT = /[\p{L}\p{N}]/u + export function computeLineHash(lineNumber: number, content: string): string { - const stripped = content.replace(/\s+/g, "") - const hashInput = `${lineNumber}:${stripped}` - const hash = Bun.hash.xxHash32(hashInput) + const stripped = content.endsWith("\r") ? content.slice(0, -1).replace(/\s+/g, "") : content.replace(/\s+/g, "") + const seed = RE_SIGNIFICANT.test(stripped) ? 0 : lineNumber + const hash = Bun.hash.xxHash32(stripped, seed) const index = hash % 256 return HASHLINE_DICT[index] }