fix: repair screen recording capture and encoding pipeline
- Fix InvalidCastException in CreateForMonitor: pass IID_IInspectable instead of typeof(GraphicsCaptureItem).GUID, which returns a C#/WinRT- generated GUID unrecognized by the native COM method (E_NOINTERFACE). - Replace PrepareStreamTranscodeAsync with PrepareMediaStreamSourceTranscodeAsync + MediaStreamSource feeding NV12 samples on demand, fixing "Transcode failed: Unknown" on all three screen recording commands. - Add 500 MB frame-buffer cap (MaxFrameBufferBytes) with early stop and warning log to prevent OOM on long or high-fps recordings. - Save encoded MP4 to %TEMP%\openclaw\ and return filePath in the response. - Change ScreenRecordResult.Fps from float to int. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
21b0d315be
commit
f4dbc521df
@ -147,6 +147,7 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
{
|
||||
format = result.Format,
|
||||
base64 = result.Base64,
|
||||
filePath = result.FilePath,
|
||||
durationMs = result.DurationMs,
|
||||
fps = result.Fps,
|
||||
screenIndex = result.ScreenIndex,
|
||||
@ -158,7 +159,7 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("screen.record failed", ex);
|
||||
return Error($"Record failed: {ex.Message}");
|
||||
return Error($"Record failed: {ex.GetType().Name}: {ex.Message} | {ex.StackTrace?.Split('\n').FirstOrDefault()?.Trim()}");
|
||||
}
|
||||
}
|
||||
|
||||
@ -207,6 +208,7 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
{
|
||||
format = result.Format,
|
||||
base64 = result.Base64,
|
||||
filePath = result.FilePath,
|
||||
durationMs = result.DurationMs,
|
||||
fps = result.Fps,
|
||||
screenIndex = result.ScreenIndex,
|
||||
@ -223,6 +225,12 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parameters for a fixed-duration screen recording.
|
||||
/// Memory usage: width × height × 4 bytes × (durationMs/1000 × fps) frames.
|
||||
/// Recommended limits: durationMs ≤ 10 000, fps ≤ 10 for 1080p to stay under 500 MB.
|
||||
/// The service enforces a hard 500 MB frame-buffer cap and stops capture early if exceeded.
|
||||
/// </summary>
|
||||
public class ScreenRecordArgs
|
||||
{
|
||||
public int DurationMs { get; set; } = 5000;
|
||||
@ -230,6 +238,10 @@ public class ScreenRecordArgs
|
||||
public int ScreenIndex { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parameters for an open-ended screen recording session (screen.record.start / screen.record.stop).
|
||||
/// The same 500 MB frame-buffer cap applies; capture stops automatically if the limit is hit.
|
||||
/// </summary>
|
||||
public class ScreenRecordStartArgs
|
||||
{
|
||||
public int Fps { get; set; } = 10;
|
||||
@ -240,8 +252,9 @@ public class ScreenRecordResult
|
||||
{
|
||||
public string Base64 { get; set; } = "";
|
||||
public string Format { get; set; } = "mp4";
|
||||
public string? FilePath { get; set; }
|
||||
public int DurationMs { get; set; }
|
||||
public float Fps { get; set; }
|
||||
public int Fps { get; set; }
|
||||
public int ScreenIndex { get; set; }
|
||||
public int Width { get; set; }
|
||||
public int Height { get; set; }
|
||||
|
||||
@ -8,6 +8,7 @@ using Windows.Graphics.Capture;
|
||||
using Windows.Graphics.DirectX;
|
||||
using Windows.Graphics.DirectX.Direct3D11;
|
||||
using Windows.Graphics.Imaging;
|
||||
using Windows.Media.Core;
|
||||
using Windows.Media.MediaProperties;
|
||||
using Windows.Media.Transcoding;
|
||||
using Windows.Storage.Streams;
|
||||
@ -31,6 +32,11 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
private const int MaxDurationMs = 60_000;
|
||||
private const int PoolBuffers = 2;
|
||||
|
||||
// BGRA frame buffer safety cap: ~500 MB across all queued frames.
|
||||
// At 1080p (8 MB/frame) this allows ~62 frames; at 720p (~4 MB) ~125 frames.
|
||||
// Frames beyond this limit are dropped to prevent OOM on long/high-fps recordings.
|
||||
private const long MaxFrameBufferBytes = 500L * 1024 * 1024;
|
||||
|
||||
public ScreenRecordingService(IOpenClawLogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
@ -56,6 +62,7 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
var latestFrame = (Direct3D11CaptureFrame?)null;
|
||||
using var ready = new SemaphoreSlim(0, 1);
|
||||
var frames = new List<byte[]>();
|
||||
var frameBytes = (long)width * height * 4; // BGRA bytes per frame
|
||||
|
||||
try
|
||||
{
|
||||
@ -96,6 +103,12 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
|
||||
using (frame)
|
||||
{
|
||||
if (frames.Count * frameBytes >= MaxFrameBufferBytes)
|
||||
{
|
||||
_logger.Warn($"[ScreenRecording] Frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping early.");
|
||||
break;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface);
|
||||
@ -120,11 +133,13 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
_logger.Info($"[ScreenRecording] Captured {frames.Count} frames, encoding...");
|
||||
|
||||
var base64 = await EncodeToMp4Async(frames, width, height, fps);
|
||||
var filePath = SaveToTempFile(base64);
|
||||
|
||||
return new ScreenRecordResult
|
||||
{
|
||||
Format = "mp4",
|
||||
Base64 = base64,
|
||||
FilePath = filePath,
|
||||
DurationMs = durationMs,
|
||||
Fps = fps,
|
||||
ScreenIndex = screenIndex,
|
||||
@ -182,11 +197,13 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
|
||||
_logger.Info($"[ScreenRecording] session {recordingId}: {frames.Count} frames, encoding...");
|
||||
var base64 = await EncodeToMp4Async(frames, width, height, fps);
|
||||
var filePath = SaveToTempFile(base64);
|
||||
|
||||
return new ScreenRecordResult
|
||||
{
|
||||
Format = "mp4",
|
||||
Base64 = base64,
|
||||
FilePath = filePath,
|
||||
DurationMs = durationMs,
|
||||
Fps = fps,
|
||||
ScreenIndex = screenIndex,
|
||||
@ -205,82 +222,116 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
// ── Temp file ─────────────────────────────────────────────────────────────
|
||||
|
||||
private string SaveToTempFile(string base64)
|
||||
{
|
||||
var dir = Path.Combine(Path.GetTempPath(), "openclaw");
|
||||
Directory.CreateDirectory(dir);
|
||||
var path = Path.Combine(dir, $"openclaw-screen-record-{Guid.NewGuid()}.mp4");
|
||||
File.WriteAllBytes(path, Convert.FromBase64String(base64));
|
||||
_logger.Info($"[ScreenRecording] Saved to {path}");
|
||||
return path;
|
||||
}
|
||||
|
||||
// ── Encoding ──────────────────────────────────────────────────────────────
|
||||
|
||||
private static async Task<string> EncodeToMp4Async(
|
||||
List<byte[]> frames, int width, int height, int fps)
|
||||
{
|
||||
var output = new InMemoryRandomAccessStream();
|
||||
if (frames.Count == 0)
|
||||
throw new InvalidOperationException("No frames to encode");
|
||||
|
||||
var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.HD720p);
|
||||
profile.Video.Width = (uint)width;
|
||||
profile.Video.Height = (uint)height;
|
||||
profile.Video.FrameRate.Numerator = (uint)fps;
|
||||
profile.Video.FrameRate.Denominator = 1;
|
||||
profile.Audio = null;
|
||||
var encWidth = (uint)(width & ~1);
|
||||
var encHeight = (uint)(height & ~1);
|
||||
var fi = new[] { 0 };
|
||||
|
||||
var input = BuildRawVideoStream(frames, width, height);
|
||||
|
||||
PrepareTranscodeResult? xcode = null;
|
||||
try
|
||||
MediaStreamSource MakeMss()
|
||||
{
|
||||
xcode = await new MediaTranscoder { HardwareAccelerationEnabled = true }
|
||||
.PrepareStreamTranscodeAsync(input, output, profile);
|
||||
}
|
||||
catch
|
||||
{
|
||||
xcode = await new MediaTranscoder { HardwareAccelerationEnabled = false }
|
||||
.PrepareStreamTranscodeAsync(input, output, profile);
|
||||
}
|
||||
|
||||
if (!xcode.CanTranscode)
|
||||
throw new InvalidOperationException($"Transcode failed: {xcode.FailureReason}");
|
||||
|
||||
await xcode.TranscodeAsync();
|
||||
|
||||
output.Seek(0);
|
||||
var reader = new DataReader(output);
|
||||
await reader.LoadAsync((uint)output.Size);
|
||||
var bytes = new byte[output.Size];
|
||||
reader.ReadBytes(bytes);
|
||||
return Convert.ToBase64String(bytes);
|
||||
}
|
||||
|
||||
private static InMemoryRandomAccessStream BuildRawVideoStream(
|
||||
List<byte[]> frames, int width, int height)
|
||||
{
|
||||
var stream = new InMemoryRandomAccessStream();
|
||||
var writer = new DataWriter(stream);
|
||||
foreach (var frame in frames)
|
||||
writer.WriteBytes(BgraToNv12(frame, width, height));
|
||||
writer.StoreAsync().AsTask().Wait();
|
||||
stream.Seek(0);
|
||||
return stream;
|
||||
}
|
||||
|
||||
/// <summary>BT.601 limited-range BGRA→NV12 conversion.</summary>
|
||||
private static byte[] BgraToNv12(byte[] bgra, int width, int height)
|
||||
{
|
||||
var nv12 = new byte[width * height * 3 / 2];
|
||||
int yBase = 0;
|
||||
int uvBase = width * height;
|
||||
|
||||
for (int y = 0; y < height; y++)
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
int i = (y * width + x) * 4;
|
||||
byte b = bgra[i], g = bgra[i + 1], r = bgra[i + 2];
|
||||
|
||||
nv12[yBase++] = (byte)(16 + (66 * r + 129 * g + 25 * b) / 256);
|
||||
|
||||
if ((y & 1) == 0 && (x & 1) == 0)
|
||||
fi[0] = 0;
|
||||
var inputProps = VideoEncodingProperties.CreateUncompressed(
|
||||
MediaEncodingSubtypes.Nv12, encWidth, encHeight);
|
||||
inputProps.FrameRate.Numerator = (uint)fps;
|
||||
inputProps.FrameRate.Denominator = 1;
|
||||
var mss = new MediaStreamSource(new VideoStreamDescriptor(inputProps));
|
||||
mss.BufferTime = TimeSpan.Zero;
|
||||
mss.SampleRequested += (_, e) =>
|
||||
{
|
||||
int uv = uvBase + (y / 2 * width) + (x & ~1);
|
||||
nv12[uv] = (byte)(128 + (-38 * r - 74 * g + 112 * b) / 256);
|
||||
nv12[uv + 1] = (byte)(128 + (112 * r - 94 * g - 18 * b) / 256);
|
||||
}
|
||||
if (fi[0] >= frames.Count) { e.Request.Sample = null; return; }
|
||||
var nv12 = BgraToNv12(frames[fi[0]], width, height, (int)encWidth, (int)encHeight);
|
||||
var ts = TimeSpan.FromTicks((long)(fi[0] * 10_000_000.0 / fps));
|
||||
var dur = TimeSpan.FromTicks((long)(10_000_000.0 / fps));
|
||||
var dw = new DataWriter();
|
||||
dw.WriteBytes(nv12);
|
||||
var sample = MediaStreamSample.CreateFromBuffer(dw.DetachBuffer(), ts);
|
||||
sample.Duration = dur;
|
||||
e.Request.Sample = sample;
|
||||
fi[0]++;
|
||||
};
|
||||
return mss;
|
||||
}
|
||||
|
||||
MediaEncodingProfile MakeProfile()
|
||||
{
|
||||
var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.Auto);
|
||||
profile.Video.Width = encWidth;
|
||||
profile.Video.Height = encHeight;
|
||||
profile.Video.Bitrate = 4_000_000;
|
||||
profile.Video.FrameRate.Numerator = (uint)fps;
|
||||
profile.Video.FrameRate.Denominator = 1;
|
||||
profile.Audio = null;
|
||||
return profile;
|
||||
}
|
||||
|
||||
foreach (var hwEnabled in new[] { true, false })
|
||||
{
|
||||
using var output = new InMemoryRandomAccessStream();
|
||||
var transcoder = new MediaTranscoder { HardwareAccelerationEnabled = hwEnabled };
|
||||
PrepareTranscodeResult result;
|
||||
try
|
||||
{
|
||||
result = await transcoder
|
||||
.PrepareMediaStreamSourceTranscodeAsync(MakeMss(), output, MakeProfile());
|
||||
}
|
||||
catch (System.Runtime.InteropServices.COMException) when (hwEnabled)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (!result.CanTranscode) continue;
|
||||
await result.TranscodeAsync();
|
||||
var size = (uint)output.Size;
|
||||
if (size == 0) continue;
|
||||
var dr = new DataReader(output.GetInputStreamAt(0));
|
||||
await dr.LoadAsync(size);
|
||||
var bytes = new byte[size];
|
||||
dr.ReadBytes(bytes);
|
||||
return Convert.ToBase64String(bytes);
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("No encoder available (hardware or software)");
|
||||
}
|
||||
|
||||
private static byte[] BgraToNv12(byte[] bgra, int srcWidth, int srcHeight,
|
||||
int encWidth, int encHeight)
|
||||
{
|
||||
var nv12 = new byte[encWidth * encHeight * 3 / 2];
|
||||
for (int y = 0; y < encHeight; y++)
|
||||
for (int x = 0; x < encWidth; x++)
|
||||
{
|
||||
int i = (y * srcWidth + x) * 4;
|
||||
int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2];
|
||||
nv12[y * encWidth + x] = (byte)(((66 * r + 129 * g + 25 * b + 128) >> 8) + 16);
|
||||
}
|
||||
int uvBase = encWidth * encHeight;
|
||||
for (int y = 0; y < encHeight; y += 2)
|
||||
for (int x = 0; x < encWidth; x += 2)
|
||||
{
|
||||
int i = (y * srcWidth + x) * 4;
|
||||
int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2];
|
||||
int uvIdx = uvBase + (y / 2) * encWidth + x;
|
||||
nv12[uvIdx] = (byte)(((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128);
|
||||
nv12[uvIdx + 1] = (byte)(((112 * r - 94 * g - 18 * b + 128) >> 8) + 128);
|
||||
}
|
||||
return nv12;
|
||||
}
|
||||
|
||||
@ -324,10 +375,10 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
var factory = (IGraphicsCaptureItemInterop)Marshal.GetObjectForIUnknown(factoryPtr);
|
||||
Marshal.Release(factoryPtr);
|
||||
|
||||
var itemIid = typeof(GraphicsCaptureItem).GUID;
|
||||
var itemIid = new Guid("AF86E2E0-B12D-4C6A-9C5A-D7AA65101E90"); // IInspectable
|
||||
factory.CreateForMonitor(monitors[screenIndex], in itemIid, out var itemPtr);
|
||||
|
||||
var item = MarshalInterface<GraphicsCaptureItem>.FromAbi(itemPtr);
|
||||
var item = MarshalInspectable<GraphicsCaptureItem>.FromAbi(itemPtr);
|
||||
Marshal.Release(itemPtr);
|
||||
return item;
|
||||
}
|
||||
@ -444,6 +495,7 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
{
|
||||
var intervalMs = 1000 / Fps;
|
||||
var nextCapture = DateTime.UtcNow;
|
||||
var frameBytes = (long)Width * Height * 4;
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
@ -461,6 +513,15 @@ internal sealed class ScreenRecordingService : IDisposable
|
||||
|
||||
using (frame)
|
||||
{
|
||||
int frameCount;
|
||||
lock (_framesLock) frameCount = _frames.Count;
|
||||
if (frameCount * frameBytes >= MaxFrameBufferBytes)
|
||||
{
|
||||
_logger.Warn($"[ScreenRecording] Session {Id}: frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping capture.");
|
||||
_cts.Cancel();
|
||||
break;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user