diff --git a/CHANGELOG.md b/CHANGELOG.md index e0a6830..a10be7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## Unreleased + +### Fixes + +- Harden temp filename prefixes, local-root reads, private store imports, durable queue reads, and regular-file byte caps against Deepsec-reported path traversal, symlink, and oversized-read races. +- Harden sidecar lock cleanup and stale-lock handling so stale third-party locks fail closed instead of being deleted by path. + +### Tests + +- Added Deepsec regression coverage for unsafe temp tokens, dangling symlinks, default read caps, private `copyIn()` races, symlinked queue entries, oversized queue entries, and fresh sidecar lock preservation. + ## 0.1.2 - 2026-05-06 ### Fixes diff --git a/docs/config.md b/docs/config.md index 83e5895..accf119 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1,22 +1,25 @@ --- title: Config -description: "Process-global configuration for the optional Python helper used by fs-safe on POSIX." +description: "Process-global defaults for optional fs-safe helpers." --- # `@openclaw/fs-safe/config` -Process-global configuration knobs for the optional persistent Python helper that backs POSIX fd-relative operations in `root()`. The whole helper policy is described in the [Python helper policy](python-helper.md); this page is the API reference. +Process-global configuration knobs for optional fs-safe helpers. The Python helper policy is described in the [Python helper policy](python-helper.md); this page is the API reference. ```ts import { configureFsSafePython, + configureFsSafeLocks, getFsSafePythonConfig, + getFsSafeLockConfig, + type FsSafeLockConfig, type FsSafePythonConfig, type FsSafePythonMode, } from "@openclaw/fs-safe/config"; ``` -`configureFsSafePython` is also re-exported from the main entry point, so `import { configureFsSafePython } from "@openclaw/fs-safe"` works too. Prefer the subpath when you only need helper configuration and want the smallest import surface. +These functions are also re-exported from the main entry point. Prefer the subpath when you only need helper configuration and want the smallest import surface. ## `configureFsSafePython(config)` @@ -47,6 +50,31 @@ function getFsSafePythonConfig(): FsSafePythonConfig; Return the effective configuration: programmatic overrides win, then env vars, then the package default (`auto`). +## `configureFsSafeLocks(config)` + +```ts +function configureFsSafeLocks(config: Partial): void; + +type FsSafeLockConfig = { + staleRecovery: "fail-closed"; + staleMs?: number; + timeoutMs?: number; + retry?: FileLockRetryOptions; +}; +``` + +Set process-wide defaults for sidecar lock options. This does **not** turn locking on globally; callers still need to pass `lock: true` or a lock options object for the specific JSON store/resource that needs cross-process coordination. + +`staleRecovery` currently supports `"fail-closed"` only. Stale third-party sidecars are not deleted by path because Node cannot atomically bind that deletion to the file that was inspected. + +## `getFsSafeLockConfig()` + +```ts +function getFsSafeLockConfig(): FsSafeLockConfig; +``` + +Return the current sidecar lock defaults. + ## Environment variables The same policy can be set without code: @@ -61,5 +89,6 @@ OpenClaw compatibility aliases are accepted: `OPENCLAW_FS_SAFE_PYTHON_MODE`, `OP ## Related pages - [Python helper policy](python-helper.md) — when to pick `auto`, `off`, or `require`, and what each mode protects. +- [File lock](sidecar-lock.md) — the per-resource lock API that consumes lock defaults. - [Root API](root.md) — the API whose POSIX hardening the helper backs. - [Errors](errors.md) — `helper-unavailable` and `helper-failed`. diff --git a/docs/index.md b/docs/index.md index 3fc2b26..fd4c8f0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -49,7 +49,7 @@ await fs.remove("notes/archive/today.txt"); | Surface | Use it for | |---|---| | [`root()`](root.md) | One boundary for read/write/move/remove inside a trusted directory. | -| [`@openclaw/fs-safe/config`](config.md) | Process-global Python helper configuration (`configureFsSafePython`, `getFsSafePythonConfig`). | +| [`@openclaw/fs-safe/config`](config.md) | Process-global Python helper and lock-option defaults. | | [Python helper policy](python-helper.md) | Choose `auto`, `off`, or `require` for POSIX fd-relative hardening. | | [`replaceFileAtomic`](atomic.md) | Sibling-temp + rename, fsync hooks, mode preservation, copy fallback. | | [`writeExternalFileWithinRoot`](output.md) | Stage external-library file output in private temp storage, then finalize under a root. | @@ -64,7 +64,7 @@ await fs.remove("notes/archive/today.txt"); | [`extractArchive`](archive.md) | ZIP/TAR extraction with size, count, link, and traversal limits. | | [Secret files](secret-file.md) | Mode-0600 credentials with size and TOCTOU defense. | | [Permissions](permissions.md) | POSIX mode and Windows ACL inspection/remediation helpers. | -| [`acquireFileLock`](sidecar-lock.md) | Cross-process file lock with retry and stale-lock recovery. | +| [`acquireFileLock`](sidecar-lock.md) | Cross-process file lock with retry and fail-closed stale-lock handling. | | [`FsSafeError`](errors.md) | Closed code union (with `policy` / `operational` category) you can branch on. | | [`pathScope()`](path-scope.md) | Lower-level absolute-path boundary helper; lives behind `@openclaw/fs-safe/advanced`. | | [`@openclaw/fs-safe/advanced`](advanced.md) | Directory of lower-level composition helpers (path scopes, regular-file I/O, install paths, sibling-temp writes, …). | diff --git a/docs/json-store.md b/docs/json-store.md index 1e83100..0c1057c 100644 --- a/docs/json-store.md +++ b/docs/json-store.md @@ -53,6 +53,7 @@ type JsonStoreLockOptions = { staleMs?: number; // default 30_000 timeoutMs?: number; // default 30_000 retry?: FileLockRetryOptions; + staleRecovery?: "fail-closed"; managerKey?: string; // default `fs-safe.json-store:` }; @@ -130,6 +131,7 @@ const counter = jsonStore<{ count: number }>({ lock: { staleMs: 60_000, timeoutMs: 10_000, + staleRecovery: "fail-closed", retry: { retries: 30, minTimeout: 100, maxTimeout: 5_000, randomize: true }, }, }); @@ -137,6 +139,8 @@ const counter = jsonStore<{ count: number }>({ When `lock` is falsy, `read` / `write` / `update` are unlocked. The `update` shape is still useful — it gives you a single function for the read-modify-write pattern — but it offers no concurrency guarantees if other processes also write to the file. +Process-wide lock defaults from `configureFsSafeLocks()` apply only after locking is explicitly enabled. They do not make JSON stores lock by default. + The default `managerKey` namespaces the in-process `FileLockManager` per absolute file path, so two `jsonStore` calls on the same file share lock state automatically. ## Common patterns diff --git a/docs/sidecar-lock.md b/docs/sidecar-lock.md index 3d1c8e9..4c72a9c 100644 --- a/docs/sidecar-lock.md +++ b/docs/sidecar-lock.md @@ -1,6 +1,6 @@ # File lock -`acquireFileLock()` and `withFileLock()` provide a cross-process file lock with retry, stale-lock reclaim, and process-exit cleanup. The lock is implemented as a sidecar file (e.g. `state.json` ↔ `state.json.lock`) — only one acquirer can create the sidecar with `O_CREAT | O_EXCL` at a time. +`acquireFileLock()` and `withFileLock()` provide a cross-process file lock with retry and process-exit cleanup. The lock is implemented as a sidecar file (e.g. `state.json` ↔ `state.json.lock`) — only one acquirer can create the sidecar with `O_CREAT | O_EXCL` at a time. ```ts import { acquireFileLock } from "@openclaw/fs-safe/file-lock"; @@ -19,9 +19,9 @@ try { ## Why sidecar? -The lock file sits next to the protected resource. If a process crashes mid-lock, the next acquirer notices the held entry, inspects its payload (PID, host, acquired-at timestamp), and decides — via `shouldReclaim` (defaulting to "is the lock older than `staleMs`?") — whether to take it over. +The lock file sits next to the protected resource. If a process crashes mid-lock, the next acquirer notices the held entry, inspects its payload (PID, host, acquired-at timestamp), and decides — via `shouldReclaim` (defaulting to "is the lock older than `staleMs`?") — whether it should keep waiting or fail. -The library installs a `process.on("exit")` handler that releases all currently-held locks synchronously, so well-behaved exits leave no stale sidecars. Crashes still need the reclaim path. +The library installs a `process.on("exit")` handler that releases all currently-held locks synchronously, so well-behaved exits leave no stale sidecars. Crashed holders leave their sidecar behind; remove those through an application-owned recovery path after you have proved the holder cannot still be writing. ## API @@ -48,9 +48,10 @@ function createFileLockManager(key: string): FileLockManager; type FileLockAcquireOptions> = { managerKey?: string; // optional in-process manager namespace lockPath?: string; // override; defaults to `${targetPath}.lock` - staleMs: number; // how long until a held lock is considered stale + staleMs?: number; // default 30_000 timeoutMs?: number; // overall acquire deadline; default unbounded retry?: FileLockRetryOptions; + staleRecovery?: "fail-closed"; // default allowReentrant?: boolean; // if this process already holds it, increment a count instead of failing payload: () => TPayload | Promise; shouldReclaim?: (params: { @@ -100,7 +101,7 @@ try { } ``` -If your process dies before `release()` runs and skips the exit handler, the next acquirer reclaims the lock once `staleMs` elapses (or your `shouldReclaim` returns true). +If your process dies before `release()` runs and skips the exit handler, the sidecar remains. Once `staleMs` elapses (or your `shouldReclaim` returns true), acquisition fails closed instead of deleting by path, because Node cannot atomically bind that deletion to the file that was inspected. ## `withFileLock` — common shape made one-liner @@ -139,9 +140,9 @@ await handle.release(); await locks.drain(); ``` -## Reclaim policy: `shouldReclaim` +## Stale policy: `shouldReclaim` -The default policy reclaims locks whose `acquiredAt` is older than `staleMs`. Pass a custom callback when you want a richer notion of "is the holder still alive": +The default policy treats locks whose `createdAt` is older than `staleMs` as stale. Pass a custom callback when you want a richer notion of "is the holder still alive": ```ts import { kill } from "node:process"; @@ -155,26 +156,26 @@ const handle = await acquireFileLock(targetPath, { if (!Number.isFinite(pid)) return true; try { kill(pid, 0); - return false; // process still alive — don't reclaim + return false; // process still alive — keep waiting } catch { - return true; // process gone — reclaim + return true; // process gone — fail closed for recovery } }, }); ``` -`heldByThisProcess` is true when this manager already holds the lock (relevant for the reentrant case). +`heldByThisProcess` is true when this manager already holds the lock (relevant for the reentrant case). A `true` result does not delete the sidecar; it lets the acquire loop stop waiting once the retry/timeout policy says to give up. ## What sidecar locks defend against - **Two processes writing the same file at once.** `acquire` serializes the critical section. -- **A crashed holder leaving a stale lock.** `staleMs` plus optional `shouldReclaim` recovers it. +- **Accidentally deleting a fresh lock during stale recovery.** Stale third-party locks fail closed because safe compare-and-unlink is not available through Node's path APIs. - **Race between simultaneous acquire attempts.** `O_CREAT | O_EXCL` ensures one wins. ## What they do **not** defend against - **Misbehaving holders that ignore the lock.** Locks are advisory — only callers that go through `acquire` are bound. -- **Holders that never call `release` and have no liveness check.** Without a real `shouldReclaim`, the lock relies on `staleMs` alone — pick a deadline that is comfortably longer than your real work but short enough to recover from crashes. +- **Automatic stale lock deletion.** If a process crashes, use the payload and your own supervisor/process table to decide when to remove the sidecar. - **Multi-host coordination over network filesystems.** Behavior depends on the underlying filesystem's `O_EXCL` semantics; treat as best-effort. ## Common patterns diff --git a/src/advanced.ts b/src/advanced.ts index 627db9b..764253b 100644 --- a/src/advanced.ts +++ b/src/advanced.ts @@ -34,6 +34,11 @@ export { trySafeFileURLToPath, } from "./local-file-access.js"; export { formatPosixMode } from "./mode.js"; +export { + configureFsSafeLocks, + getFsSafeLockConfig, + type FsSafeLockConfig, +} from "./lock-config.js"; export { assertNoHardlinkedFinalPath, assertNoPathAliasEscape, diff --git a/src/config.ts b/src/config.ts index 3e42f5a..52550e9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -4,3 +4,8 @@ export { type FsSafePythonConfig, type FsSafePythonMode, } from "./pinned-python-config.js"; +export { + configureFsSafeLocks, + getFsSafeLockConfig, + type FsSafeLockConfig, +} from "./lock-config.js"; diff --git a/src/file-lock.ts b/src/file-lock.ts index 65d6cdf..972c78f 100644 --- a/src/file-lock.ts +++ b/src/file-lock.ts @@ -4,15 +4,19 @@ import { type SidecarLockHandle, type SidecarLockHeldEntry, type SidecarLockRetryOptions, + type SidecarLockStaleRecovery, } from "./sidecar-lock.js"; +import { getFsSafeLockConfig } from "./lock-config.js"; export type FileLockRetryOptions = SidecarLockRetryOptions; +export type FileLockStaleRecovery = SidecarLockStaleRecovery; export type FileLockAcquireOptions> = Omit< SidecarLockAcquireOptions, - "targetPath" + "targetPath" | "staleMs" > & { managerKey?: string; + staleMs?: number; }; export type FileLockHandle = SidecarLockHandle; @@ -37,6 +41,19 @@ function resolveFileLockManagerKey(targetPath: string, managerKey?: string): str return managerKey ?? `fs-safe.file-lock:${targetPath}`; } +function withLockDefaults>( + options: FileLockAcquireOptions, +): Omit, "targetPath"> { + const defaults = getFsSafeLockConfig(); + return { + ...options, + retry: options.retry ?? defaults.retry, + staleMs: options.staleMs ?? defaults.staleMs ?? 30_000, + staleRecovery: options.staleRecovery ?? defaults.staleRecovery, + timeoutMs: options.timeoutMs ?? defaults.timeoutMs, + }; +} + export async function acquireFileLock>( targetPath: string, options: FileLockAcquireOptions, @@ -59,11 +76,11 @@ export function createFileLockManager(key: string): FileLockManager { return { acquire: async (targetPath, options) => { const { managerKey: _managerKey, ...acquireOptions } = options; - return await manager.acquire({ ...acquireOptions, targetPath }); + return await manager.acquire({ ...withLockDefaults(acquireOptions), targetPath }); }, withLock: async (targetPath, options, fn) => { const { managerKey: _managerKey, ...acquireOptions } = options; - return await manager.withLock({ ...acquireOptions, targetPath }, fn); + return await manager.withLock({ ...withLockDefaults(acquireOptions), targetPath }, fn); }, drain: manager.drain, reset: manager.reset, diff --git a/src/index.ts b/src/index.ts index 652eaa3..93c7f2f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,3 +37,8 @@ export { type ExternalFileWriteOptions, type ExternalFileWriteResult, } from "./output.js"; +export { + configureFsSafeLocks, + getFsSafeLockConfig, + type FsSafeLockConfig, +} from "./lock-config.js"; diff --git a/src/json-document-store.ts b/src/json-document-store.ts index 1218de1..a889db1 100644 --- a/src/json-document-store.ts +++ b/src/json-document-store.ts @@ -1,10 +1,13 @@ import type { FileLockRetryOptions } from "./file-lock.js"; +import { getFsSafeLockConfig } from "./lock-config.js"; import { createSidecarLockManager } from "./sidecar-lock.js"; +import type { SidecarLockStaleRecovery } from "./sidecar-lock.js"; export type JsonStoreLockOptions = { staleMs?: number; timeoutMs?: number; retry?: FileLockRetryOptions; + staleRecovery?: SidecarLockStaleRecovery; managerKey?: string; }; @@ -45,11 +48,13 @@ function resolveLockOptions( return null; } const lockOptions = options.lock === true ? {} : options.lock; + const defaults = getFsSafeLockConfig(); return { managerKey: lockOptions.managerKey ?? `fs-safe.json-store:${filePath}`, - retry: lockOptions.retry ?? {}, - staleMs: lockOptions.staleMs ?? 30_000, - timeoutMs: lockOptions.timeoutMs ?? 30_000, + retry: lockOptions.retry ?? defaults.retry ?? {}, + staleMs: lockOptions.staleMs ?? defaults.staleMs ?? 30_000, + staleRecovery: lockOptions.staleRecovery ?? defaults.staleRecovery, + timeoutMs: lockOptions.timeoutMs ?? defaults.timeoutMs ?? 30_000, }; } @@ -84,6 +89,7 @@ export function createJsonStore( staleMs: lockOptions.staleMs, timeoutMs: lockOptions.timeoutMs, retry: lockOptions.retry, + staleRecovery: lockOptions.staleRecovery, allowReentrant: true, payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }), }, diff --git a/src/lock-config.ts b/src/lock-config.ts new file mode 100644 index 0000000..c692686 --- /dev/null +++ b/src/lock-config.ts @@ -0,0 +1,25 @@ +import type { SidecarLockRetryOptions, SidecarLockStaleRecovery } from "./sidecar-lock.js"; + +export type FsSafeLockConfig = { + staleRecovery: SidecarLockStaleRecovery; + staleMs?: number; + timeoutMs?: number; + retry?: SidecarLockRetryOptions; +}; + +const DEFAULT_LOCK_CONFIG: FsSafeLockConfig = { + staleRecovery: "fail-closed", +}; + +let lockConfig: FsSafeLockConfig = { ...DEFAULT_LOCK_CONFIG }; + +export function configureFsSafeLocks(config: Partial): void { + // Process defaults only fill lock options after a caller explicitly enables + // locking for a resource; this must never turn sidecar locks on globally. + lockConfig = { ...lockConfig, ...config }; +} + +export function getFsSafeLockConfig(): FsSafeLockConfig { + return { ...lockConfig, retry: lockConfig.retry ? { ...lockConfig.retry } : undefined }; +} + diff --git a/src/sidecar-lock.ts b/src/sidecar-lock.ts index 1d6514f..482d2ee 100644 --- a/src/sidecar-lock.ts +++ b/src/sidecar-lock.ts @@ -1,7 +1,9 @@ import fsSync from "node:fs"; +import type { Stats } from "node:fs"; import type { FileHandle } from "node:fs/promises"; import fs from "node:fs/promises"; import path from "node:path"; +import { sameFileIdentity } from "./file-identity.js"; export type SidecarLockRetryOptions = { retries?: number; @@ -11,12 +13,15 @@ export type SidecarLockRetryOptions = { randomize?: boolean; }; +export type SidecarLockStaleRecovery = "fail-closed"; + export type SidecarLockAcquireOptions> = { targetPath: string; lockPath?: string; staleMs: number; timeoutMs?: number; retry?: SidecarLockRetryOptions; + staleRecovery?: SidecarLockStaleRecovery; allowReentrant?: boolean; payload: () => TPayload | Promise; shouldReclaim?: (params: { @@ -56,6 +61,7 @@ type HeldLock = { count: number; handle: FileHandle; lockPath: string; + snapshot: LockSnapshot; acquiredAt: number; metadata: Record; releasePromise?: Promise; @@ -88,17 +94,78 @@ function resolveManagerState(key: string): SidecarLockManagerState { return state; } -async function readJsonPayload(lockPath: string): Promise | null> { +type LockSnapshot = { + raw?: string; + payload: Record | null; + stat?: Stats; +}; + +async function readLockSnapshot(lockPath: string): Promise { try { - const parsed = JSON.parse(await fs.readFile(lockPath, "utf8")) as unknown; - return parsed && typeof parsed === "object" && !Array.isArray(parsed) - ? (parsed as Record) - : null; + const stat = await fs.lstat(lockPath); + const raw = await fs.readFile(lockPath, "utf8"); + try { + const parsed = JSON.parse(raw) as unknown; + const payload = + parsed && typeof parsed === "object" && !Array.isArray(parsed) + ? (parsed as Record) + : null; + return { raw, payload, stat }; + } catch { + return { raw, payload: null, stat }; + } } catch { return null; } } +function snapshotMatches(current: LockSnapshot, observed: LockSnapshot): boolean { + if (observed.stat && current.stat && !sameFileIdentity(observed.stat, current.stat)) { + return false; + } + if (observed.raw !== undefined) { + return current.raw === observed.raw; + } + return observed.stat !== undefined && current.stat !== undefined; +} + +async function removeLockIfUnchanged( + lockPath: string, + observed: LockSnapshot | null, +): Promise { + const current = await readLockSnapshot(lockPath); + if (!current || !observed) { + return false; + } + if (!snapshotMatches(current, observed)) { + // The lock changed after we decided it was stale. Leave the fresh holder's + // file alone; deleting by path here would break mutual exclusion. + return false; + } + await fs.rm(lockPath, { force: true }).catch(() => undefined); + return true; +} + +async function lockSnapshotStillPresent( + lockPath: string, + observed: LockSnapshot | null, +): Promise { + const current = await readLockSnapshot(lockPath); + return !!current && !!observed && snapshotMatches(current, observed); +} + +function snapshotMatchesSync(lockPath: string, observed: LockSnapshot): boolean { + try { + const stat = fsSync.lstatSync(lockPath); + if (observed.stat && !sameFileIdentity(observed.stat, stat)) { + return false; + } + return observed.raw === undefined || fsSync.readFileSync(lockPath, "utf8") === observed.raw; + } catch { + return false; + } +} + async function resolveNormalizedTargetPath(targetPath: string): Promise { const resolved = path.resolve(targetPath); const dir = path.dirname(resolved); @@ -142,7 +209,9 @@ function releaseAllLocksSync(state: SidecarLockManagerState): void { for (const [normalizedTargetPath, held] of state.held) { void held.handle.close().catch(() => undefined); try { - fsSync.rmSync(held.lockPath, { force: true }); + if (snapshotMatchesSync(held.lockPath, held.snapshot)) { + fsSync.rmSync(held.lockPath, { force: true }); + } } catch { // Best-effort process-exit cleanup. } @@ -175,7 +244,7 @@ async function releaseHeldLock( state.held.delete(normalizedTargetPath); held.releasePromise = (async () => { await held.handle.close().catch(() => undefined); - await fs.rm(held.lockPath, { force: true }).catch(() => undefined); + await removeLockIfUnchanged(held.lockPath, held.snapshot); })(); try { await held.releasePromise; @@ -222,15 +291,19 @@ export function createSidecarLockManager(key: string) { let handle: FileHandle | null = null; try { handle = await fs.open(lockPath, "wx"); + const payload = await options.payload(); + const raw = `${JSON.stringify(payload, null, 2)}\n`; + await handle.writeFile(raw, "utf8"); + const snapshot = { raw, payload, stat: await handle.stat() }; const createdHeld: HeldLock = { count: 1, handle, lockPath, + snapshot, acquiredAt: Date.now(), metadata: options.metadata ?? {}, }; state.held.set(normalizedTargetPath, createdHeld); - await handle.writeFile(`${JSON.stringify(await options.payload(), null, 2)}\n`, "utf8"); const release = () => releaseHeldLock(state, normalizedTargetPath, createdHeld).then(() => undefined); return { @@ -241,31 +314,57 @@ export function createSidecarLockManager(key: string) { }; } catch (err) { if (handle) { + const failedSnapshot: LockSnapshot = { payload: null }; + try { + failedSnapshot.stat = await handle.stat(); + } catch { + // Best-effort cleanup of a failed exclusive create. + } const current = state.held.get(normalizedTargetPath); if (current?.handle === handle) { state.held.delete(normalizedTargetPath); } - await handle.close().catch(() => undefined); + // If payload serialization/write fails, the file may be empty or + // partial JSON, so remove while our exclusive handle is still open. await fs.rm(lockPath, { force: true }).catch(() => undefined); + await handle.close().catch(() => undefined); + // Windows can refuse removing an open file; retry after close but + // only if the path still points at the file identity we created. + await removeLockIfUnchanged(lockPath, failedSnapshot); } if ((err as { code?: unknown }).code !== "EEXIST") { throw err; } const nowMs = Date.now(); - const payload = await readJsonPayload(lockPath); + const snapshot = await readLockSnapshot(lockPath); + if (!snapshot) { + continue; + } const shouldReclaim = options.shouldReclaim ?? defaultShouldReclaim; if ( await shouldReclaim({ lockPath, normalizedTargetPath, - payload, + payload: snapshot?.payload ?? null, staleMs: options.staleMs, nowMs, heldByThisProcess: state.held.has(normalizedTargetPath), }) ) { - await fs.rm(lockPath, { force: true }).catch(() => undefined); - continue; + if (!(await lockSnapshotStillPresent(lockPath, snapshot))) { + continue; + } + // Node exposes only path-based unlink/rename here. A stale-lock + // reclaimer cannot bind the delete to the file it inspected, so a + // concurrent release+fresh-acquire could otherwise lose its lock. + // Fail closed and let callers choose a higher-level recovery path. + if ((options.staleRecovery ?? "fail-closed") === "fail-closed") { + throw Object.assign(new Error(`file lock stale for ${normalizedTargetPath}`), { + code: "file_lock_stale", + lockPath, + normalizedTargetPath, + }); + } } const elapsed = Date.now() - startedAt; if ( diff --git a/test/coverage-gaps.test.ts b/test/coverage-gaps.test.ts index 14a53c7..40de7fa 100644 --- a/test/coverage-gaps.test.ts +++ b/test/coverage-gaps.test.ts @@ -329,20 +329,24 @@ describe("sidecar lock manager", () => { manager.reset(); }); - it("times out and reclaims stale locks", async () => { + it("times out on stale locks without deleting them by path", async () => { const root = await tempRoot("fs-safe-sidecar-timeout-"); const targetPath = path.join(root, "state.json"); const lockPath = `${targetPath}.lock`; const manager = createSidecarLockManager(`coverage-timeout-${Date.now()}-${Math.random()}`); await fs.writeFile(lockPath, "{\"createdAt\":\"2000-01-01T00:00:00.000Z\"}\n", "utf8"); - const reclaimed = await manager.acquire({ - targetPath, - lockPath, - staleMs: 1, - payload: () => ({ owner: "coverage" }), - }); - await reclaimed.release(); + await expect( + manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + timeoutMs: 1, + retry: { retries: 0, minTimeout: 1, maxTimeout: 1 }, + payload: () => ({ owner: "coverage" }), + }), + ).rejects.toMatchObject({ code: "file_lock_stale" }); + await expect(fs.readFile(lockPath, "utf8")).resolves.toContain("2000"); await fs.writeFile(lockPath, "{\"createdAt\":\"2999-01-01T00:00:00.000Z\"}\n", "utf8"); await expect( diff --git a/test/sidecar-lock-regression.test.ts b/test/sidecar-lock-regression.test.ts new file mode 100644 index 0000000..2d39ef5 --- /dev/null +++ b/test/sidecar-lock-regression.test.ts @@ -0,0 +1,181 @@ +import fsp from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { fileStore } from "../src/file-store.js"; +import { acquireFileLock } from "../src/file-lock.js"; +import { configureFsSafeLocks, getFsSafeLockConfig } from "../src/lock-config.js"; +import { createSidecarLockManager } from "../src/sidecar-lock.js"; + +const tempDirs: string[] = []; + +async function tempRoot(prefix: string): Promise { + const dir = await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + configureFsSafeLocks({ + retry: undefined, + staleMs: undefined, + staleRecovery: "fail-closed", + timeoutMs: undefined, + }); + await Promise.all(tempDirs.splice(0).map((dir) => fsp.rm(dir, { recursive: true, force: true }))); +}); + +describe("sidecar lock regressions", () => { + it("does not delete a fresh sidecar lock during stale reclaim or old release", async () => { + const base = await tempRoot("fs-safe-sidecar-token-"); + const targetPath = path.join(base, "state.json"); + const lockPath = `${targetPath}.lock`; + const manager = createSidecarLockManager(`fs-safe-test-${Date.now()}`); + const held = await manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + payload: async () => ({ createdAt: "2000-01-01T00:00:00.000Z" }), + }); + + await fsp.writeFile(lockPath, JSON.stringify({ createdAt: new Date().toISOString() })); + await held.release(); + await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("createdAt"); + + let replaced = false; + await expect( + manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + timeoutMs: 1, + retry: { retries: 0 }, + payload: async () => ({ createdAt: new Date().toISOString() }), + shouldReclaim: async () => { + if (!replaced) { + replaced = true; + await fsp.writeFile(lockPath, JSON.stringify({ createdAt: "2999-01-01T00:00:00.000Z" })); + return true; + } + return false; + }, + }), + ).rejects.toMatchObject({ code: "file_lock_timeout" }); + await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("2999"); + }); + + it("keeps internal sidecar lock identity out of user payloads", async () => { + const base = await tempRoot("fs-safe-sidecar-payload-"); + const targetPath = path.join(base, "state.json"); + const lockPath = `${targetPath}.lock`; + const manager = createSidecarLockManager(`fs-safe-payload-test-${Date.now()}`); + const lock = await manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + payload: async () => ({ createdAt: "2999-01-01T00:00:00.000Z", owner: "caller" }), + }); + const raw = await fsp.readFile(lockPath, "utf8"); + expect(JSON.parse(raw)).toEqual({ + createdAt: "2999-01-01T00:00:00.000Z", + owner: "caller", + }); + await lock.release(); + + const payloads: Array | null> = []; + await fsp.writeFile(lockPath, raw, "utf8"); + await expect( + manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + timeoutMs: 1, + retry: { retries: 0 }, + payload: async () => ({ createdAt: new Date().toISOString() }), + shouldReclaim: async ({ payload }) => { + payloads.push(payload); + return false; + }, + }), + ).rejects.toMatchObject({ code: "file_lock_timeout" }); + expect(payloads).toEqual([{ createdAt: "2999-01-01T00:00:00.000Z", owner: "caller" }]); + }); + + it("retries when a contended sidecar disappears during stale detection", async () => { + const base = await tempRoot("fs-safe-sidecar-vanish-"); + const targetPath = path.join(base, "state.json"); + const lockPath = `${targetPath}.lock`; + const manager = createSidecarLockManager(`fs-safe-vanish-test-${Date.now()}`); + await fsp.writeFile(lockPath, JSON.stringify({ createdAt: "2000-01-01T00:00:00.000Z" })); + + const lock = await manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + timeoutMs: 1_000, + retry: { retries: 3, minTimeout: 1, maxTimeout: 1 }, + payload: async () => ({ createdAt: new Date().toISOString(), owner: "next" }), + shouldReclaim: async () => { + await fsp.rm(lockPath, { force: true }); + return true; + }, + }); + try { + await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("next"); + } finally { + await lock.release(); + } + }); + + it("cleans failed sidecar locks and preserves stale corrupt locks", async () => { + const base = await tempRoot("fs-safe-sidecar-corrupt-"); + const targetPath = path.join(base, "state.json"); + const lockPath = `${targetPath}.lock`; + const manager = createSidecarLockManager(`fs-safe-corrupt-test-${Date.now()}`); + + await expect( + manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + payload: async () => { + throw new Error("payload failed"); + }, + }), + ).rejects.toThrow("payload failed"); + await expect(fsp.stat(lockPath)).rejects.toMatchObject({ code: "ENOENT" }); + + await fsp.writeFile(lockPath, "{", "utf8"); + await fsp.utimes(lockPath, new Date(0), new Date(0)); + await expect( + manager.acquire({ + targetPath, + lockPath, + staleMs: 1, + timeoutMs: 1, + retry: { retries: 0 }, + payload: async () => ({ createdAt: new Date().toISOString() }), + }), + ).rejects.toMatchObject({ code: "file_lock_stale" }); + await expect(fsp.readFile(lockPath, "utf8")).resolves.toBe("{"); + }); + + it("keeps lock config as explicit defaults, not global auto-locking", async () => { + const base = await tempRoot("fs-safe-lock-config-"); + const statePath = path.join(base, "state.json"); + configureFsSafeLocks({ staleMs: 1, timeoutMs: 1, retry: { retries: 0 } }); + + const unlocked = fileStore({ rootDir: base }).json<{ count: number }>("state.json"); + await unlocked.write({ count: 1 }); + await expect(fsp.stat(`${statePath}.lock`)).rejects.toMatchObject({ code: "ENOENT" }); + + const config = getFsSafeLockConfig(); + expect(config.staleRecovery).toBe("fail-closed"); + expect(config.timeoutMs).toBe(1); + + const lock = await acquireFileLock(path.join(base, "direct.json"), { + payload: async () => ({ owner: "direct" }), + }); + await lock.release(); + }); +});