fix: fail closed on stale sidecar locks
This commit is contained in:
parent
02897e6879
commit
c382eafdb2
11
CHANGELOG.md
11
CHANGELOG.md
@ -1,5 +1,16 @@
|
||||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Fixes
|
||||
|
||||
- Harden temp filename prefixes, local-root reads, private store imports, durable queue reads, and regular-file byte caps against Deepsec-reported path traversal, symlink, and oversized-read races.
|
||||
- Harden sidecar lock cleanup and stale-lock handling so stale third-party locks fail closed instead of being deleted by path.
|
||||
|
||||
### Tests
|
||||
|
||||
- Added Deepsec regression coverage for unsafe temp tokens, dangling symlinks, default read caps, private `copyIn()` races, symlinked queue entries, oversized queue entries, and fresh sidecar lock preservation.
|
||||
|
||||
## 0.1.2 - 2026-05-06
|
||||
|
||||
### Fixes
|
||||
|
||||
@ -1,22 +1,25 @@
|
||||
---
|
||||
title: Config
|
||||
description: "Process-global configuration for the optional Python helper used by fs-safe on POSIX."
|
||||
description: "Process-global defaults for optional fs-safe helpers."
|
||||
---
|
||||
|
||||
# `@openclaw/fs-safe/config`
|
||||
|
||||
Process-global configuration knobs for the optional persistent Python helper that backs POSIX fd-relative operations in `root()`. The whole helper policy is described in the [Python helper policy](python-helper.md); this page is the API reference.
|
||||
Process-global configuration knobs for optional fs-safe helpers. The Python helper policy is described in the [Python helper policy](python-helper.md); this page is the API reference.
|
||||
|
||||
```ts
|
||||
import {
|
||||
configureFsSafePython,
|
||||
configureFsSafeLocks,
|
||||
getFsSafePythonConfig,
|
||||
getFsSafeLockConfig,
|
||||
type FsSafeLockConfig,
|
||||
type FsSafePythonConfig,
|
||||
type FsSafePythonMode,
|
||||
} from "@openclaw/fs-safe/config";
|
||||
```
|
||||
|
||||
`configureFsSafePython` is also re-exported from the main entry point, so `import { configureFsSafePython } from "@openclaw/fs-safe"` works too. Prefer the subpath when you only need helper configuration and want the smallest import surface.
|
||||
These functions are also re-exported from the main entry point. Prefer the subpath when you only need helper configuration and want the smallest import surface.
|
||||
|
||||
## `configureFsSafePython(config)`
|
||||
|
||||
@ -47,6 +50,31 @@ function getFsSafePythonConfig(): FsSafePythonConfig;
|
||||
|
||||
Return the effective configuration: programmatic overrides win, then env vars, then the package default (`auto`).
|
||||
|
||||
## `configureFsSafeLocks(config)`
|
||||
|
||||
```ts
|
||||
function configureFsSafeLocks(config: Partial<FsSafeLockConfig>): void;
|
||||
|
||||
type FsSafeLockConfig = {
|
||||
staleRecovery: "fail-closed";
|
||||
staleMs?: number;
|
||||
timeoutMs?: number;
|
||||
retry?: FileLockRetryOptions;
|
||||
};
|
||||
```
|
||||
|
||||
Set process-wide defaults for sidecar lock options. This does **not** turn locking on globally; callers still need to pass `lock: true` or a lock options object for the specific JSON store/resource that needs cross-process coordination.
|
||||
|
||||
`staleRecovery` currently supports `"fail-closed"` only. Stale third-party sidecars are not deleted by path because Node cannot atomically bind that deletion to the file that was inspected.
|
||||
|
||||
## `getFsSafeLockConfig()`
|
||||
|
||||
```ts
|
||||
function getFsSafeLockConfig(): FsSafeLockConfig;
|
||||
```
|
||||
|
||||
Return the current sidecar lock defaults.
|
||||
|
||||
## Environment variables
|
||||
|
||||
The same policy can be set without code:
|
||||
@ -61,5 +89,6 @@ OpenClaw compatibility aliases are accepted: `OPENCLAW_FS_SAFE_PYTHON_MODE`, `OP
|
||||
## Related pages
|
||||
|
||||
- [Python helper policy](python-helper.md) — when to pick `auto`, `off`, or `require`, and what each mode protects.
|
||||
- [File lock](sidecar-lock.md) — the per-resource lock API that consumes lock defaults.
|
||||
- [Root API](root.md) — the API whose POSIX hardening the helper backs.
|
||||
- [Errors](errors.md) — `helper-unavailable` and `helper-failed`.
|
||||
|
||||
@ -49,7 +49,7 @@ await fs.remove("notes/archive/today.txt");
|
||||
| Surface | Use it for |
|
||||
|---|---|
|
||||
| [`root()`](root.md) | One boundary for read/write/move/remove inside a trusted directory. |
|
||||
| [`@openclaw/fs-safe/config`](config.md) | Process-global Python helper configuration (`configureFsSafePython`, `getFsSafePythonConfig`). |
|
||||
| [`@openclaw/fs-safe/config`](config.md) | Process-global Python helper and lock-option defaults. |
|
||||
| [Python helper policy](python-helper.md) | Choose `auto`, `off`, or `require` for POSIX fd-relative hardening. |
|
||||
| [`replaceFileAtomic`](atomic.md) | Sibling-temp + rename, fsync hooks, mode preservation, copy fallback. |
|
||||
| [`writeExternalFileWithinRoot`](output.md) | Stage external-library file output in private temp storage, then finalize under a root. |
|
||||
@ -64,7 +64,7 @@ await fs.remove("notes/archive/today.txt");
|
||||
| [`extractArchive`](archive.md) | ZIP/TAR extraction with size, count, link, and traversal limits. |
|
||||
| [Secret files](secret-file.md) | Mode-0600 credentials with size and TOCTOU defense. |
|
||||
| [Permissions](permissions.md) | POSIX mode and Windows ACL inspection/remediation helpers. |
|
||||
| [`acquireFileLock`](sidecar-lock.md) | Cross-process file lock with retry and stale-lock recovery. |
|
||||
| [`acquireFileLock`](sidecar-lock.md) | Cross-process file lock with retry and fail-closed stale-lock handling. |
|
||||
| [`FsSafeError`](errors.md) | Closed code union (with `policy` / `operational` category) you can branch on. |
|
||||
| [`pathScope()`](path-scope.md) | Lower-level absolute-path boundary helper; lives behind `@openclaw/fs-safe/advanced`. |
|
||||
| [`@openclaw/fs-safe/advanced`](advanced.md) | Directory of lower-level composition helpers (path scopes, regular-file I/O, install paths, sibling-temp writes, …). |
|
||||
|
||||
@ -53,6 +53,7 @@ type JsonStoreLockOptions = {
|
||||
staleMs?: number; // default 30_000
|
||||
timeoutMs?: number; // default 30_000
|
||||
retry?: FileLockRetryOptions;
|
||||
staleRecovery?: "fail-closed";
|
||||
managerKey?: string; // default `fs-safe.json-store:<filePath>`
|
||||
};
|
||||
|
||||
@ -130,6 +131,7 @@ const counter = jsonStore<{ count: number }>({
|
||||
lock: {
|
||||
staleMs: 60_000,
|
||||
timeoutMs: 10_000,
|
||||
staleRecovery: "fail-closed",
|
||||
retry: { retries: 30, minTimeout: 100, maxTimeout: 5_000, randomize: true },
|
||||
},
|
||||
});
|
||||
@ -137,6 +139,8 @@ const counter = jsonStore<{ count: number }>({
|
||||
|
||||
When `lock` is falsy, `read` / `write` / `update` are unlocked. The `update` shape is still useful — it gives you a single function for the read-modify-write pattern — but it offers no concurrency guarantees if other processes also write to the file.
|
||||
|
||||
Process-wide lock defaults from `configureFsSafeLocks()` apply only after locking is explicitly enabled. They do not make JSON stores lock by default.
|
||||
|
||||
The default `managerKey` namespaces the in-process `FileLockManager` per absolute file path, so two `jsonStore` calls on the same file share lock state automatically.
|
||||
|
||||
## Common patterns
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# File lock
|
||||
|
||||
`acquireFileLock()` and `withFileLock()` provide a cross-process file lock with retry, stale-lock reclaim, and process-exit cleanup. The lock is implemented as a sidecar file (e.g. `state.json` ↔ `state.json.lock`) — only one acquirer can create the sidecar with `O_CREAT | O_EXCL` at a time.
|
||||
`acquireFileLock()` and `withFileLock()` provide a cross-process file lock with retry and process-exit cleanup. The lock is implemented as a sidecar file (e.g. `state.json` ↔ `state.json.lock`) — only one acquirer can create the sidecar with `O_CREAT | O_EXCL` at a time.
|
||||
|
||||
```ts
|
||||
import { acquireFileLock } from "@openclaw/fs-safe/file-lock";
|
||||
@ -19,9 +19,9 @@ try {
|
||||
|
||||
## Why sidecar?
|
||||
|
||||
The lock file sits next to the protected resource. If a process crashes mid-lock, the next acquirer notices the held entry, inspects its payload (PID, host, acquired-at timestamp), and decides — via `shouldReclaim` (defaulting to "is the lock older than `staleMs`?") — whether to take it over.
|
||||
The lock file sits next to the protected resource. If a process crashes mid-lock, the next acquirer notices the held entry, inspects its payload (PID, host, acquired-at timestamp), and decides — via `shouldReclaim` (defaulting to "is the lock older than `staleMs`?") — whether it should keep waiting or fail.
|
||||
|
||||
The library installs a `process.on("exit")` handler that releases all currently-held locks synchronously, so well-behaved exits leave no stale sidecars. Crashes still need the reclaim path.
|
||||
The library installs a `process.on("exit")` handler that releases all currently-held locks synchronously, so well-behaved exits leave no stale sidecars. Crashed holders leave their sidecar behind; remove those through an application-owned recovery path after you have proved the holder cannot still be writing.
|
||||
|
||||
## API
|
||||
|
||||
@ -48,9 +48,10 @@ function createFileLockManager(key: string): FileLockManager;
|
||||
type FileLockAcquireOptions<TPayload extends Record<string, unknown>> = {
|
||||
managerKey?: string; // optional in-process manager namespace
|
||||
lockPath?: string; // override; defaults to `${targetPath}.lock`
|
||||
staleMs: number; // how long until a held lock is considered stale
|
||||
staleMs?: number; // default 30_000
|
||||
timeoutMs?: number; // overall acquire deadline; default unbounded
|
||||
retry?: FileLockRetryOptions;
|
||||
staleRecovery?: "fail-closed"; // default
|
||||
allowReentrant?: boolean; // if this process already holds it, increment a count instead of failing
|
||||
payload: () => TPayload | Promise<TPayload>;
|
||||
shouldReclaim?: (params: {
|
||||
@ -100,7 +101,7 @@ try {
|
||||
}
|
||||
```
|
||||
|
||||
If your process dies before `release()` runs and skips the exit handler, the next acquirer reclaims the lock once `staleMs` elapses (or your `shouldReclaim` returns true).
|
||||
If your process dies before `release()` runs and skips the exit handler, the sidecar remains. Once `staleMs` elapses (or your `shouldReclaim` returns true), acquisition fails closed instead of deleting by path, because Node cannot atomically bind that deletion to the file that was inspected.
|
||||
|
||||
## `withFileLock` — common shape made one-liner
|
||||
|
||||
@ -139,9 +140,9 @@ await handle.release();
|
||||
await locks.drain();
|
||||
```
|
||||
|
||||
## Reclaim policy: `shouldReclaim`
|
||||
## Stale policy: `shouldReclaim`
|
||||
|
||||
The default policy reclaims locks whose `acquiredAt` is older than `staleMs`. Pass a custom callback when you want a richer notion of "is the holder still alive":
|
||||
The default policy treats locks whose `createdAt` is older than `staleMs` as stale. Pass a custom callback when you want a richer notion of "is the holder still alive":
|
||||
|
||||
```ts
|
||||
import { kill } from "node:process";
|
||||
@ -155,26 +156,26 @@ const handle = await acquireFileLock(targetPath, {
|
||||
if (!Number.isFinite(pid)) return true;
|
||||
try {
|
||||
kill(pid, 0);
|
||||
return false; // process still alive — don't reclaim
|
||||
return false; // process still alive — keep waiting
|
||||
} catch {
|
||||
return true; // process gone — reclaim
|
||||
return true; // process gone — fail closed for recovery
|
||||
}
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
`heldByThisProcess` is true when this manager already holds the lock (relevant for the reentrant case).
|
||||
`heldByThisProcess` is true when this manager already holds the lock (relevant for the reentrant case). A `true` result does not delete the sidecar; it lets the acquire loop stop waiting once the retry/timeout policy says to give up.
|
||||
|
||||
## What sidecar locks defend against
|
||||
|
||||
- **Two processes writing the same file at once.** `acquire` serializes the critical section.
|
||||
- **A crashed holder leaving a stale lock.** `staleMs` plus optional `shouldReclaim` recovers it.
|
||||
- **Accidentally deleting a fresh lock during stale recovery.** Stale third-party locks fail closed because safe compare-and-unlink is not available through Node's path APIs.
|
||||
- **Race between simultaneous acquire attempts.** `O_CREAT | O_EXCL` ensures one wins.
|
||||
|
||||
## What they do **not** defend against
|
||||
|
||||
- **Misbehaving holders that ignore the lock.** Locks are advisory — only callers that go through `acquire` are bound.
|
||||
- **Holders that never call `release` and have no liveness check.** Without a real `shouldReclaim`, the lock relies on `staleMs` alone — pick a deadline that is comfortably longer than your real work but short enough to recover from crashes.
|
||||
- **Automatic stale lock deletion.** If a process crashes, use the payload and your own supervisor/process table to decide when to remove the sidecar.
|
||||
- **Multi-host coordination over network filesystems.** Behavior depends on the underlying filesystem's `O_EXCL` semantics; treat as best-effort.
|
||||
|
||||
## Common patterns
|
||||
|
||||
@ -34,6 +34,11 @@ export {
|
||||
trySafeFileURLToPath,
|
||||
} from "./local-file-access.js";
|
||||
export { formatPosixMode } from "./mode.js";
|
||||
export {
|
||||
configureFsSafeLocks,
|
||||
getFsSafeLockConfig,
|
||||
type FsSafeLockConfig,
|
||||
} from "./lock-config.js";
|
||||
export {
|
||||
assertNoHardlinkedFinalPath,
|
||||
assertNoPathAliasEscape,
|
||||
|
||||
@ -4,3 +4,8 @@ export {
|
||||
type FsSafePythonConfig,
|
||||
type FsSafePythonMode,
|
||||
} from "./pinned-python-config.js";
|
||||
export {
|
||||
configureFsSafeLocks,
|
||||
getFsSafeLockConfig,
|
||||
type FsSafeLockConfig,
|
||||
} from "./lock-config.js";
|
||||
|
||||
@ -4,15 +4,19 @@ import {
|
||||
type SidecarLockHandle,
|
||||
type SidecarLockHeldEntry,
|
||||
type SidecarLockRetryOptions,
|
||||
type SidecarLockStaleRecovery,
|
||||
} from "./sidecar-lock.js";
|
||||
import { getFsSafeLockConfig } from "./lock-config.js";
|
||||
|
||||
export type FileLockRetryOptions = SidecarLockRetryOptions;
|
||||
export type FileLockStaleRecovery = SidecarLockStaleRecovery;
|
||||
|
||||
export type FileLockAcquireOptions<TPayload extends Record<string, unknown>> = Omit<
|
||||
SidecarLockAcquireOptions<TPayload>,
|
||||
"targetPath"
|
||||
"targetPath" | "staleMs"
|
||||
> & {
|
||||
managerKey?: string;
|
||||
staleMs?: number;
|
||||
};
|
||||
|
||||
export type FileLockHandle = SidecarLockHandle;
|
||||
@ -37,6 +41,19 @@ function resolveFileLockManagerKey(targetPath: string, managerKey?: string): str
|
||||
return managerKey ?? `fs-safe.file-lock:${targetPath}`;
|
||||
}
|
||||
|
||||
function withLockDefaults<TPayload extends Record<string, unknown>>(
|
||||
options: FileLockAcquireOptions<TPayload>,
|
||||
): Omit<SidecarLockAcquireOptions<TPayload>, "targetPath"> {
|
||||
const defaults = getFsSafeLockConfig();
|
||||
return {
|
||||
...options,
|
||||
retry: options.retry ?? defaults.retry,
|
||||
staleMs: options.staleMs ?? defaults.staleMs ?? 30_000,
|
||||
staleRecovery: options.staleRecovery ?? defaults.staleRecovery,
|
||||
timeoutMs: options.timeoutMs ?? defaults.timeoutMs,
|
||||
};
|
||||
}
|
||||
|
||||
export async function acquireFileLock<TPayload extends Record<string, unknown>>(
|
||||
targetPath: string,
|
||||
options: FileLockAcquireOptions<TPayload>,
|
||||
@ -59,11 +76,11 @@ export function createFileLockManager(key: string): FileLockManager {
|
||||
return {
|
||||
acquire: async (targetPath, options) => {
|
||||
const { managerKey: _managerKey, ...acquireOptions } = options;
|
||||
return await manager.acquire({ ...acquireOptions, targetPath });
|
||||
return await manager.acquire({ ...withLockDefaults(acquireOptions), targetPath });
|
||||
},
|
||||
withLock: async (targetPath, options, fn) => {
|
||||
const { managerKey: _managerKey, ...acquireOptions } = options;
|
||||
return await manager.withLock({ ...acquireOptions, targetPath }, fn);
|
||||
return await manager.withLock({ ...withLockDefaults(acquireOptions), targetPath }, fn);
|
||||
},
|
||||
drain: manager.drain,
|
||||
reset: manager.reset,
|
||||
|
||||
@ -37,3 +37,8 @@ export {
|
||||
type ExternalFileWriteOptions,
|
||||
type ExternalFileWriteResult,
|
||||
} from "./output.js";
|
||||
export {
|
||||
configureFsSafeLocks,
|
||||
getFsSafeLockConfig,
|
||||
type FsSafeLockConfig,
|
||||
} from "./lock-config.js";
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
import type { FileLockRetryOptions } from "./file-lock.js";
|
||||
import { getFsSafeLockConfig } from "./lock-config.js";
|
||||
import { createSidecarLockManager } from "./sidecar-lock.js";
|
||||
import type { SidecarLockStaleRecovery } from "./sidecar-lock.js";
|
||||
|
||||
export type JsonStoreLockOptions = {
|
||||
staleMs?: number;
|
||||
timeoutMs?: number;
|
||||
retry?: FileLockRetryOptions;
|
||||
staleRecovery?: SidecarLockStaleRecovery;
|
||||
managerKey?: string;
|
||||
};
|
||||
|
||||
@ -45,11 +48,13 @@ function resolveLockOptions(
|
||||
return null;
|
||||
}
|
||||
const lockOptions = options.lock === true ? {} : options.lock;
|
||||
const defaults = getFsSafeLockConfig();
|
||||
return {
|
||||
managerKey: lockOptions.managerKey ?? `fs-safe.json-store:${filePath}`,
|
||||
retry: lockOptions.retry ?? {},
|
||||
staleMs: lockOptions.staleMs ?? 30_000,
|
||||
timeoutMs: lockOptions.timeoutMs ?? 30_000,
|
||||
retry: lockOptions.retry ?? defaults.retry ?? {},
|
||||
staleMs: lockOptions.staleMs ?? defaults.staleMs ?? 30_000,
|
||||
staleRecovery: lockOptions.staleRecovery ?? defaults.staleRecovery,
|
||||
timeoutMs: lockOptions.timeoutMs ?? defaults.timeoutMs ?? 30_000,
|
||||
};
|
||||
}
|
||||
|
||||
@ -84,6 +89,7 @@ export function createJsonStore<T>(
|
||||
staleMs: lockOptions.staleMs,
|
||||
timeoutMs: lockOptions.timeoutMs,
|
||||
retry: lockOptions.retry,
|
||||
staleRecovery: lockOptions.staleRecovery,
|
||||
allowReentrant: true,
|
||||
payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }),
|
||||
},
|
||||
|
||||
25
src/lock-config.ts
Normal file
25
src/lock-config.ts
Normal file
@ -0,0 +1,25 @@
|
||||
import type { SidecarLockRetryOptions, SidecarLockStaleRecovery } from "./sidecar-lock.js";
|
||||
|
||||
export type FsSafeLockConfig = {
|
||||
staleRecovery: SidecarLockStaleRecovery;
|
||||
staleMs?: number;
|
||||
timeoutMs?: number;
|
||||
retry?: SidecarLockRetryOptions;
|
||||
};
|
||||
|
||||
const DEFAULT_LOCK_CONFIG: FsSafeLockConfig = {
|
||||
staleRecovery: "fail-closed",
|
||||
};
|
||||
|
||||
let lockConfig: FsSafeLockConfig = { ...DEFAULT_LOCK_CONFIG };
|
||||
|
||||
export function configureFsSafeLocks(config: Partial<FsSafeLockConfig>): void {
|
||||
// Process defaults only fill lock options after a caller explicitly enables
|
||||
// locking for a resource; this must never turn sidecar locks on globally.
|
||||
lockConfig = { ...lockConfig, ...config };
|
||||
}
|
||||
|
||||
export function getFsSafeLockConfig(): FsSafeLockConfig {
|
||||
return { ...lockConfig, retry: lockConfig.retry ? { ...lockConfig.retry } : undefined };
|
||||
}
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import fsSync from "node:fs";
|
||||
import type { Stats } from "node:fs";
|
||||
import type { FileHandle } from "node:fs/promises";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { sameFileIdentity } from "./file-identity.js";
|
||||
|
||||
export type SidecarLockRetryOptions = {
|
||||
retries?: number;
|
||||
@ -11,12 +13,15 @@ export type SidecarLockRetryOptions = {
|
||||
randomize?: boolean;
|
||||
};
|
||||
|
||||
export type SidecarLockStaleRecovery = "fail-closed";
|
||||
|
||||
export type SidecarLockAcquireOptions<TPayload extends Record<string, unknown>> = {
|
||||
targetPath: string;
|
||||
lockPath?: string;
|
||||
staleMs: number;
|
||||
timeoutMs?: number;
|
||||
retry?: SidecarLockRetryOptions;
|
||||
staleRecovery?: SidecarLockStaleRecovery;
|
||||
allowReentrant?: boolean;
|
||||
payload: () => TPayload | Promise<TPayload>;
|
||||
shouldReclaim?: (params: {
|
||||
@ -56,6 +61,7 @@ type HeldLock = {
|
||||
count: number;
|
||||
handle: FileHandle;
|
||||
lockPath: string;
|
||||
snapshot: LockSnapshot;
|
||||
acquiredAt: number;
|
||||
metadata: Record<string, unknown>;
|
||||
releasePromise?: Promise<void>;
|
||||
@ -88,17 +94,78 @@ function resolveManagerState(key: string): SidecarLockManagerState {
|
||||
return state;
|
||||
}
|
||||
|
||||
async function readJsonPayload(lockPath: string): Promise<Record<string, unknown> | null> {
|
||||
type LockSnapshot = {
|
||||
raw?: string;
|
||||
payload: Record<string, unknown> | null;
|
||||
stat?: Stats;
|
||||
};
|
||||
|
||||
async function readLockSnapshot(lockPath: string): Promise<LockSnapshot | null> {
|
||||
try {
|
||||
const parsed = JSON.parse(await fs.readFile(lockPath, "utf8")) as unknown;
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
||||
? (parsed as Record<string, unknown>)
|
||||
: null;
|
||||
const stat = await fs.lstat(lockPath);
|
||||
const raw = await fs.readFile(lockPath, "utf8");
|
||||
try {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
const payload =
|
||||
parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
||||
? (parsed as Record<string, unknown>)
|
||||
: null;
|
||||
return { raw, payload, stat };
|
||||
} catch {
|
||||
return { raw, payload: null, stat };
|
||||
}
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function snapshotMatches(current: LockSnapshot, observed: LockSnapshot): boolean {
|
||||
if (observed.stat && current.stat && !sameFileIdentity(observed.stat, current.stat)) {
|
||||
return false;
|
||||
}
|
||||
if (observed.raw !== undefined) {
|
||||
return current.raw === observed.raw;
|
||||
}
|
||||
return observed.stat !== undefined && current.stat !== undefined;
|
||||
}
|
||||
|
||||
async function removeLockIfUnchanged(
|
||||
lockPath: string,
|
||||
observed: LockSnapshot | null,
|
||||
): Promise<boolean> {
|
||||
const current = await readLockSnapshot(lockPath);
|
||||
if (!current || !observed) {
|
||||
return false;
|
||||
}
|
||||
if (!snapshotMatches(current, observed)) {
|
||||
// The lock changed after we decided it was stale. Leave the fresh holder's
|
||||
// file alone; deleting by path here would break mutual exclusion.
|
||||
return false;
|
||||
}
|
||||
await fs.rm(lockPath, { force: true }).catch(() => undefined);
|
||||
return true;
|
||||
}
|
||||
|
||||
async function lockSnapshotStillPresent(
|
||||
lockPath: string,
|
||||
observed: LockSnapshot | null,
|
||||
): Promise<boolean> {
|
||||
const current = await readLockSnapshot(lockPath);
|
||||
return !!current && !!observed && snapshotMatches(current, observed);
|
||||
}
|
||||
|
||||
function snapshotMatchesSync(lockPath: string, observed: LockSnapshot): boolean {
|
||||
try {
|
||||
const stat = fsSync.lstatSync(lockPath);
|
||||
if (observed.stat && !sameFileIdentity(observed.stat, stat)) {
|
||||
return false;
|
||||
}
|
||||
return observed.raw === undefined || fsSync.readFileSync(lockPath, "utf8") === observed.raw;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveNormalizedTargetPath(targetPath: string): Promise<string> {
|
||||
const resolved = path.resolve(targetPath);
|
||||
const dir = path.dirname(resolved);
|
||||
@ -142,7 +209,9 @@ function releaseAllLocksSync(state: SidecarLockManagerState): void {
|
||||
for (const [normalizedTargetPath, held] of state.held) {
|
||||
void held.handle.close().catch(() => undefined);
|
||||
try {
|
||||
fsSync.rmSync(held.lockPath, { force: true });
|
||||
if (snapshotMatchesSync(held.lockPath, held.snapshot)) {
|
||||
fsSync.rmSync(held.lockPath, { force: true });
|
||||
}
|
||||
} catch {
|
||||
// Best-effort process-exit cleanup.
|
||||
}
|
||||
@ -175,7 +244,7 @@ async function releaseHeldLock(
|
||||
state.held.delete(normalizedTargetPath);
|
||||
held.releasePromise = (async () => {
|
||||
await held.handle.close().catch(() => undefined);
|
||||
await fs.rm(held.lockPath, { force: true }).catch(() => undefined);
|
||||
await removeLockIfUnchanged(held.lockPath, held.snapshot);
|
||||
})();
|
||||
try {
|
||||
await held.releasePromise;
|
||||
@ -222,15 +291,19 @@ export function createSidecarLockManager(key: string) {
|
||||
let handle: FileHandle | null = null;
|
||||
try {
|
||||
handle = await fs.open(lockPath, "wx");
|
||||
const payload = await options.payload();
|
||||
const raw = `${JSON.stringify(payload, null, 2)}\n`;
|
||||
await handle.writeFile(raw, "utf8");
|
||||
const snapshot = { raw, payload, stat: await handle.stat() };
|
||||
const createdHeld: HeldLock = {
|
||||
count: 1,
|
||||
handle,
|
||||
lockPath,
|
||||
snapshot,
|
||||
acquiredAt: Date.now(),
|
||||
metadata: options.metadata ?? {},
|
||||
};
|
||||
state.held.set(normalizedTargetPath, createdHeld);
|
||||
await handle.writeFile(`${JSON.stringify(await options.payload(), null, 2)}\n`, "utf8");
|
||||
const release = () =>
|
||||
releaseHeldLock(state, normalizedTargetPath, createdHeld).then(() => undefined);
|
||||
return {
|
||||
@ -241,31 +314,57 @@ export function createSidecarLockManager(key: string) {
|
||||
};
|
||||
} catch (err) {
|
||||
if (handle) {
|
||||
const failedSnapshot: LockSnapshot = { payload: null };
|
||||
try {
|
||||
failedSnapshot.stat = await handle.stat();
|
||||
} catch {
|
||||
// Best-effort cleanup of a failed exclusive create.
|
||||
}
|
||||
const current = state.held.get(normalizedTargetPath);
|
||||
if (current?.handle === handle) {
|
||||
state.held.delete(normalizedTargetPath);
|
||||
}
|
||||
await handle.close().catch(() => undefined);
|
||||
// If payload serialization/write fails, the file may be empty or
|
||||
// partial JSON, so remove while our exclusive handle is still open.
|
||||
await fs.rm(lockPath, { force: true }).catch(() => undefined);
|
||||
await handle.close().catch(() => undefined);
|
||||
// Windows can refuse removing an open file; retry after close but
|
||||
// only if the path still points at the file identity we created.
|
||||
await removeLockIfUnchanged(lockPath, failedSnapshot);
|
||||
}
|
||||
if ((err as { code?: unknown }).code !== "EEXIST") {
|
||||
throw err;
|
||||
}
|
||||
const nowMs = Date.now();
|
||||
const payload = await readJsonPayload(lockPath);
|
||||
const snapshot = await readLockSnapshot(lockPath);
|
||||
if (!snapshot) {
|
||||
continue;
|
||||
}
|
||||
const shouldReclaim = options.shouldReclaim ?? defaultShouldReclaim;
|
||||
if (
|
||||
await shouldReclaim({
|
||||
lockPath,
|
||||
normalizedTargetPath,
|
||||
payload,
|
||||
payload: snapshot?.payload ?? null,
|
||||
staleMs: options.staleMs,
|
||||
nowMs,
|
||||
heldByThisProcess: state.held.has(normalizedTargetPath),
|
||||
})
|
||||
) {
|
||||
await fs.rm(lockPath, { force: true }).catch(() => undefined);
|
||||
continue;
|
||||
if (!(await lockSnapshotStillPresent(lockPath, snapshot))) {
|
||||
continue;
|
||||
}
|
||||
// Node exposes only path-based unlink/rename here. A stale-lock
|
||||
// reclaimer cannot bind the delete to the file it inspected, so a
|
||||
// concurrent release+fresh-acquire could otherwise lose its lock.
|
||||
// Fail closed and let callers choose a higher-level recovery path.
|
||||
if ((options.staleRecovery ?? "fail-closed") === "fail-closed") {
|
||||
throw Object.assign(new Error(`file lock stale for ${normalizedTargetPath}`), {
|
||||
code: "file_lock_stale",
|
||||
lockPath,
|
||||
normalizedTargetPath,
|
||||
});
|
||||
}
|
||||
}
|
||||
const elapsed = Date.now() - startedAt;
|
||||
if (
|
||||
|
||||
@ -329,20 +329,24 @@ describe("sidecar lock manager", () => {
|
||||
manager.reset();
|
||||
});
|
||||
|
||||
it("times out and reclaims stale locks", async () => {
|
||||
it("times out on stale locks without deleting them by path", async () => {
|
||||
const root = await tempRoot("fs-safe-sidecar-timeout-");
|
||||
const targetPath = path.join(root, "state.json");
|
||||
const lockPath = `${targetPath}.lock`;
|
||||
const manager = createSidecarLockManager(`coverage-timeout-${Date.now()}-${Math.random()}`);
|
||||
await fs.writeFile(lockPath, "{\"createdAt\":\"2000-01-01T00:00:00.000Z\"}\n", "utf8");
|
||||
|
||||
const reclaimed = await manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
payload: () => ({ owner: "coverage" }),
|
||||
});
|
||||
await reclaimed.release();
|
||||
await expect(
|
||||
manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
timeoutMs: 1,
|
||||
retry: { retries: 0, minTimeout: 1, maxTimeout: 1 },
|
||||
payload: () => ({ owner: "coverage" }),
|
||||
}),
|
||||
).rejects.toMatchObject({ code: "file_lock_stale" });
|
||||
await expect(fs.readFile(lockPath, "utf8")).resolves.toContain("2000");
|
||||
|
||||
await fs.writeFile(lockPath, "{\"createdAt\":\"2999-01-01T00:00:00.000Z\"}\n", "utf8");
|
||||
await expect(
|
||||
|
||||
181
test/sidecar-lock-regression.test.ts
Normal file
181
test/sidecar-lock-regression.test.ts
Normal file
@ -0,0 +1,181 @@
|
||||
import fsp from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { fileStore } from "../src/file-store.js";
|
||||
import { acquireFileLock } from "../src/file-lock.js";
|
||||
import { configureFsSafeLocks, getFsSafeLockConfig } from "../src/lock-config.js";
|
||||
import { createSidecarLockManager } from "../src/sidecar-lock.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
async function tempRoot(prefix: string): Promise<string> {
|
||||
const dir = await fsp.mkdtemp(path.join(os.tmpdir(), prefix));
|
||||
tempDirs.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
configureFsSafeLocks({
|
||||
retry: undefined,
|
||||
staleMs: undefined,
|
||||
staleRecovery: "fail-closed",
|
||||
timeoutMs: undefined,
|
||||
});
|
||||
await Promise.all(tempDirs.splice(0).map((dir) => fsp.rm(dir, { recursive: true, force: true })));
|
||||
});
|
||||
|
||||
describe("sidecar lock regressions", () => {
|
||||
it("does not delete a fresh sidecar lock during stale reclaim or old release", async () => {
|
||||
const base = await tempRoot("fs-safe-sidecar-token-");
|
||||
const targetPath = path.join(base, "state.json");
|
||||
const lockPath = `${targetPath}.lock`;
|
||||
const manager = createSidecarLockManager(`fs-safe-test-${Date.now()}`);
|
||||
const held = await manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
payload: async () => ({ createdAt: "2000-01-01T00:00:00.000Z" }),
|
||||
});
|
||||
|
||||
await fsp.writeFile(lockPath, JSON.stringify({ createdAt: new Date().toISOString() }));
|
||||
await held.release();
|
||||
await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("createdAt");
|
||||
|
||||
let replaced = false;
|
||||
await expect(
|
||||
manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
timeoutMs: 1,
|
||||
retry: { retries: 0 },
|
||||
payload: async () => ({ createdAt: new Date().toISOString() }),
|
||||
shouldReclaim: async () => {
|
||||
if (!replaced) {
|
||||
replaced = true;
|
||||
await fsp.writeFile(lockPath, JSON.stringify({ createdAt: "2999-01-01T00:00:00.000Z" }));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
}),
|
||||
).rejects.toMatchObject({ code: "file_lock_timeout" });
|
||||
await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("2999");
|
||||
});
|
||||
|
||||
it("keeps internal sidecar lock identity out of user payloads", async () => {
|
||||
const base = await tempRoot("fs-safe-sidecar-payload-");
|
||||
const targetPath = path.join(base, "state.json");
|
||||
const lockPath = `${targetPath}.lock`;
|
||||
const manager = createSidecarLockManager(`fs-safe-payload-test-${Date.now()}`);
|
||||
const lock = await manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
payload: async () => ({ createdAt: "2999-01-01T00:00:00.000Z", owner: "caller" }),
|
||||
});
|
||||
const raw = await fsp.readFile(lockPath, "utf8");
|
||||
expect(JSON.parse(raw)).toEqual({
|
||||
createdAt: "2999-01-01T00:00:00.000Z",
|
||||
owner: "caller",
|
||||
});
|
||||
await lock.release();
|
||||
|
||||
const payloads: Array<Record<string, unknown> | null> = [];
|
||||
await fsp.writeFile(lockPath, raw, "utf8");
|
||||
await expect(
|
||||
manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
timeoutMs: 1,
|
||||
retry: { retries: 0 },
|
||||
payload: async () => ({ createdAt: new Date().toISOString() }),
|
||||
shouldReclaim: async ({ payload }) => {
|
||||
payloads.push(payload);
|
||||
return false;
|
||||
},
|
||||
}),
|
||||
).rejects.toMatchObject({ code: "file_lock_timeout" });
|
||||
expect(payloads).toEqual([{ createdAt: "2999-01-01T00:00:00.000Z", owner: "caller" }]);
|
||||
});
|
||||
|
||||
it("retries when a contended sidecar disappears during stale detection", async () => {
|
||||
const base = await tempRoot("fs-safe-sidecar-vanish-");
|
||||
const targetPath = path.join(base, "state.json");
|
||||
const lockPath = `${targetPath}.lock`;
|
||||
const manager = createSidecarLockManager(`fs-safe-vanish-test-${Date.now()}`);
|
||||
await fsp.writeFile(lockPath, JSON.stringify({ createdAt: "2000-01-01T00:00:00.000Z" }));
|
||||
|
||||
const lock = await manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
timeoutMs: 1_000,
|
||||
retry: { retries: 3, minTimeout: 1, maxTimeout: 1 },
|
||||
payload: async () => ({ createdAt: new Date().toISOString(), owner: "next" }),
|
||||
shouldReclaim: async () => {
|
||||
await fsp.rm(lockPath, { force: true });
|
||||
return true;
|
||||
},
|
||||
});
|
||||
try {
|
||||
await expect(fsp.readFile(lockPath, "utf8")).resolves.toContain("next");
|
||||
} finally {
|
||||
await lock.release();
|
||||
}
|
||||
});
|
||||
|
||||
it("cleans failed sidecar locks and preserves stale corrupt locks", async () => {
|
||||
const base = await tempRoot("fs-safe-sidecar-corrupt-");
|
||||
const targetPath = path.join(base, "state.json");
|
||||
const lockPath = `${targetPath}.lock`;
|
||||
const manager = createSidecarLockManager(`fs-safe-corrupt-test-${Date.now()}`);
|
||||
|
||||
await expect(
|
||||
manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
payload: async () => {
|
||||
throw new Error("payload failed");
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow("payload failed");
|
||||
await expect(fsp.stat(lockPath)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
|
||||
await fsp.writeFile(lockPath, "{", "utf8");
|
||||
await fsp.utimes(lockPath, new Date(0), new Date(0));
|
||||
await expect(
|
||||
manager.acquire({
|
||||
targetPath,
|
||||
lockPath,
|
||||
staleMs: 1,
|
||||
timeoutMs: 1,
|
||||
retry: { retries: 0 },
|
||||
payload: async () => ({ createdAt: new Date().toISOString() }),
|
||||
}),
|
||||
).rejects.toMatchObject({ code: "file_lock_stale" });
|
||||
await expect(fsp.readFile(lockPath, "utf8")).resolves.toBe("{");
|
||||
});
|
||||
|
||||
it("keeps lock config as explicit defaults, not global auto-locking", async () => {
|
||||
const base = await tempRoot("fs-safe-lock-config-");
|
||||
const statePath = path.join(base, "state.json");
|
||||
configureFsSafeLocks({ staleMs: 1, timeoutMs: 1, retry: { retries: 0 } });
|
||||
|
||||
const unlocked = fileStore({ rootDir: base }).json<{ count: number }>("state.json");
|
||||
await unlocked.write({ count: 1 });
|
||||
await expect(fsp.stat(`${statePath}.lock`)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
|
||||
const config = getFsSafeLockConfig();
|
||||
expect(config.staleRecovery).toBe("fail-closed");
|
||||
expect(config.timeoutMs).toBe(1);
|
||||
|
||||
const lock = await acquireFileLock(path.join(base, "direct.json"), {
|
||||
payload: async () => ({ owner: "direct" }),
|
||||
});
|
||||
await lock.release();
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user