]> git.proxmox.com Git - mirror_novnc.git/commitdiff
Add H.264 decoder
authorAndri Yngvason <andri@yngvason.is>
Sat, 29 Jun 2024 13:41:50 +0000 (13:41 +0000)
committerAndri Yngvason <andri@yngvason.is>
Sun, 18 Aug 2024 14:06:25 +0000 (14:06 +0000)
This adds an H.264 decoder based on WebCodecs.

core/decoders/h264.js [new file with mode: 0644]
core/display.js
core/encodings.js
core/rfb.js
core/util/browser.js

diff --git a/core/decoders/h264.js b/core/decoders/h264.js
new file mode 100644 (file)
index 0000000..db144fc
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * noVNC: HTML5 VNC client
+ * Copyright (C) 2024 The noVNC Authors
+ * Licensed under MPL 2.0 (see LICENSE.txt)
+ *
+ * See README.md for usage and integration instructions.
+ *
+ */
+
+import * as Log from '../util/logging.js';
+
+class H264Parser {
+    constructor(data) {
+        this._data = data;
+        this._index = 0;
+        this.profileIdc = null;
+        this.constraintSet = null;
+        this.levelIdc = null;
+    }
+
+    _getStartSequenceLen(index) {
+        let data = this._data;
+        if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 0 && data[index + 3] == 1) {
+            return 4;
+        }
+        if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 1) {
+            return 3;
+        }
+        return 0;
+    }
+
+    _indexOfNextNalUnit(index) {
+        let data = this._data;
+        for (let i = index; i < data.length; ++i) {
+            if (this._getStartSequenceLen(i) != 0) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    _parseSps(index) {
+        this.profileIdc = this._data[index];
+        this.constraintSet = this._data[index + 1];
+        this.levelIdc = this._data[index + 2];
+    }
+
+    _parseNalUnit(index) {
+        const firstByte = this._data[index];
+        if (firstByte & 0x80) {
+            throw new Error('H264 parsing sanity check failed, forbidden zero bit is set');
+        }
+        const unitType = firstByte & 0x1f;
+
+        switch (unitType) {
+            case 1: // coded slice, non-idr
+                return { slice: true };
+            case 5: // coded slice, idr
+                return { slice: true, key: true };
+            case 6: // sei
+                return {};
+            case 7: // sps
+                this._parseSps(index + 1);
+                return {};
+            case 8: // pps
+                return {};
+            default:
+                Log.Warn("Unhandled unit type: ", unitType);
+                break;
+        }
+        return {};
+    }
+
+    parse() {
+        const startIndex = this._index;
+        let isKey = false;
+
+        while (this._index < this._data.length) {
+            const startSequenceLen = this._getStartSequenceLen(this._index);
+            if (startSequenceLen == 0) {
+                throw new Error('Invalid start sequence in bit stream');
+            }
+
+            const { slice, key } = this._parseNalUnit(this._index + startSequenceLen);
+
+            let nextIndex = this._indexOfNextNalUnit(this._index + startSequenceLen);
+            if (nextIndex == -1) {
+                this._index = this._data.length;
+            } else {
+                this._index = nextIndex;
+            }
+
+            if (key) {
+                isKey = true;
+            }
+            if (slice) {
+                break;
+            }
+        }
+
+        if (startIndex === this._index) {
+            return null;
+        }
+
+        return {
+            frame: this._data.subarray(startIndex, this._index),
+            key: isKey,
+        };
+    }
+}
+
+class H264Context {
+    constructor(width, height) {
+        this.lastUsed = 0;
+        this._width = width;
+        this._height = height;
+        this._profileIdc = null;
+        this._constraintSet = null;
+        this._levelIdc = null;
+        this._decoder = null;
+        this._pendingFrames = [];
+    }
+
+    _handleFrame(frame) {
+        let pending = this._pendingFrames.shift();
+        if (pending === undefined) {
+            throw new Error("Pending frame queue empty when receiving frame from decoder");
+        }
+
+        if (pending.timestamp != frame.timestamp) {
+            throw new Error("Video frame timestamp mismatch. Expected " +
+                frame.timestamp + " but but got " + pending.timestamp);
+        }
+
+        pending.frame = frame;
+        pending.ready = true;
+        pending.resolve();
+
+        if (!pending.keep) {
+            frame.close();
+        }
+    }
+
+    _handleError(e) {
+        throw new Error("Failed to decode frame: " + e.message);
+    }
+
+    _configureDecoder(profileIdc, constraintSet, levelIdc) {
+        if (this._decoder === null || this._decoder.state === 'closed') {
+            this._decoder = new VideoDecoder({
+                output: frame => this._handleFrame(frame),
+                error: e => this._handleError(e),
+            });
+        }
+        const codec = 'avc1.' +
+            profileIdc.toString(16).padStart(2, '0') +
+            constraintSet.toString(16).padStart(2, '0') +
+            levelIdc.toString(16).padStart(2, '0');
+        this._decoder.configure({
+            codec: codec,
+            codedWidth: this._width,
+            codedHeight: this._height,
+            optimizeForLatency: true,
+        });
+    }
+
+    _preparePendingFrame(timestamp) {
+        let pending = {
+            timestamp: timestamp,
+            promise: null,
+            resolve: null,
+            frame: null,
+            ready: false,
+            keep: false,
+        };
+        pending.promise = new Promise((resolve) => {
+            pending.resolve = resolve;
+        });
+        this._pendingFrames.push(pending);
+
+        return pending;
+    }
+
+    decode(payload) {
+        let parser = new H264Parser(payload);
+        let result = null;
+
+        // Ideally, this timestamp should come from the server, but we'll just
+        // approximate it instead.
+        let timestamp = Math.round(window.performance.now() * 1e3);
+
+        while (true) {
+            let encodedFrame = parser.parse();
+            if (encodedFrame === null) {
+                break;
+            }
+
+            if (parser.profileIdc !== null) {
+                self._profileIdc = parser.profileIdc;
+                self._constraintSet = parser.constraintSet;
+                self._levelIdc = parser.levelIdc;
+            }
+
+            if (this._decoder === null || this._decoder.state !== 'configured') {
+                if (!encodedFrame.key) {
+                    Log.Warn("Missing key frame. Can't decode until one arrives");
+                    continue;
+                }
+                if (self._profileIdc === null) {
+                    Log.Warn('Cannot config decoder. Have not received SPS and PPS yet.');
+                    continue;
+                }
+                this._configureDecoder(self._profileIdc, self._constraintSet,
+                                       self._levelIdc);
+            }
+
+            result = this._preparePendingFrame(timestamp);
+
+            const chunk = new EncodedVideoChunk({
+                timestamp: timestamp,
+                type: encodedFrame.key ? 'key' : 'delta',
+                data: encodedFrame.frame,
+            });
+
+            try {
+                this._decoder.decode(chunk);
+            } catch (e) {
+                Log.Warn("Failed to decode:", e);
+            }
+        }
+
+        // We only keep last frame of each payload
+        if (result !== null) {
+            result.keep = true;
+        }
+
+        return result;
+    }
+}
+
+export default class H264Decoder {
+    constructor() {
+        this._tick = 0;
+        this._contexts = {};
+    }
+
+    _contextId(x, y, width, height) {
+        return [x, y, width, height].join(',');
+    }
+
+    _findOldestContextId() {
+        let oldestTick = Number.MAX_VALUE;
+        let oldestKey = undefined;
+        for (const [key, value] of Object.entries(this._contexts)) {
+            if (value.lastUsed < oldestTick) {
+                oldestTick = value.lastUsed;
+                oldestKey = key;
+            }
+        }
+        return oldestKey;
+    }
+
+    _createContext(x, y, width, height) {
+        const maxContexts = 64;
+        if (Object.keys(this._contexts).length >= maxContexts) {
+            let oldestContextId = this._findOldestContextId();
+            delete this._contexts[oldestContextId];
+        }
+        let context = new H264Context(width, height);
+        this._contexts[this._contextId(x, y, width, height)] = context;
+        return context;
+    }
+
+    _getContext(x, y, width, height) {
+        let context = this._contexts[this._contextId(x, y, width, height)];
+        return context !== undefined ? context : this._createContext(x, y, width, height);
+    }
+
+    _resetContext(x, y, width, height) {
+        delete this._contexts[this._contextId(x, y, width, height)];
+    }
+
+    _resetAllContexts() {
+        this._contexts = {};
+    }
+
+    decodeRect(x, y, width, height, sock, display, depth) {
+        const resetContextFlag = 1;
+        const resetAllContextsFlag = 2;
+
+        if (sock.rQwait("h264 header", 8)) {
+            return false;
+        }
+
+        const length = sock.rQshift32();
+        const flags = sock.rQshift32();
+
+        if (sock.rQwait("h264 payload", length, 8)) {
+            return false;
+        }
+
+        if (flags & resetAllContextsFlag) {
+            this._resetAllContexts();
+        } else if (flags & resetContextFlag) {
+            this._resetContext(x, y, width, height);
+        }
+
+        let context = this._getContext(x, y, width, height);
+        context.lastUsed = this._tick++;
+
+        if (length !== 0) {
+            let payload = sock.rQshiftBytes(length, false);
+            let frame = context.decode(payload);
+            if (frame !== null) {
+                display.videoFrame(x, y, width, height, frame);
+            }
+        }
+
+        return true;
+    }
+}
index fcd626999bbd5f118b78e30d565f75e8891efa51..bc0bf2190c8a4c243d17e63331aeb360eab2e234 100644 (file)
@@ -380,6 +380,17 @@ export default class Display {
         });
     }
 
+    videoFrame(x, y, width, height, frame) {
+        this._renderQPush({
+            'type': 'frame',
+            'frame': frame,
+            'x': x,
+            'y': y,
+            'width': width,
+            'height': height
+        });
+    }
+
     blitImage(x, y, width, height, arr, offset, fromQueue) {
         if (this._renderQ.length !== 0 && !fromQueue) {
             // NB(directxman12): it's technically more performant here to use preallocated arrays,
@@ -406,9 +417,16 @@ export default class Display {
         }
     }
 
-    drawImage(img, x, y) {
-        this._drawCtx.drawImage(img, x, y);
-        this._damage(x, y, img.width, img.height);
+    drawImage(img, ...args) {
+        this._drawCtx.drawImage(img, ...args);
+
+        if (args.length <= 4) {
+            const [x, y] = args;
+            this._damage(x, y, img.width, img.height);
+        } else {
+            const [,, sw, sh, dx, dy] = args;
+            this._damage(dx, dy, sw, sh);
+        }
     }
 
     autoscale(containerWidth, containerHeight) {
@@ -511,6 +529,35 @@ export default class Display {
                         ready = false;
                     }
                     break;
+                case 'frame':
+                    if (a.frame.ready) {
+                        // The encoded frame may be larger than the rect due to
+                        // limitations of the encoder, so we need to crop the
+                        // frame.
+                        let frame = a.frame.frame;
+                        if (frame.codedWidth < a.width || frame.codedHeight < a.height) {
+                            Log.Warn("Decoded video frame does not cover its full rectangle area. Expecting at least " +
+                                      a.width + "x" + a.height + " but got " +
+                                      frame.codedWidth + "x" + frame.codedHeight);
+                        }
+                        const sx = 0;
+                        const sy = 0;
+                        const sw = a.width;
+                        const sh = a.height;
+                        const dx = a.x;
+                        const dy = a.y;
+                        const dw = sw;
+                        const dh = sh;
+                        this.drawImage(frame, sx, sy, sw, sh, dx, dy, dw, dh);
+                        frame.close();
+                    } else {
+                        let display = this;
+                        a.frame.promise.then(() => {
+                            display._scanRenderQ();
+                        });
+                        ready = false;
+                    }
+                    break;
             }
 
             if (ready) {
index 1a79989d1ac3d20083e233d48e98a599b8d05f4b..aa1fd4bbc2c9006efd8782502705db02260dad89 100644 (file)
@@ -15,6 +15,7 @@ export const encodings = {
     encodingZRLE: 16,
     encodingTightPNG: -260,
     encodingJPEG: 21,
+    encodingH264: 50,
 
     pseudoEncodingQualityLevel9: -23,
     pseudoEncodingQualityLevel0: -32,
@@ -44,6 +45,7 @@ export function encodingName(num) {
         case encodings.encodingZRLE:     return "ZRLE";
         case encodings.encodingTightPNG: return "TightPNG";
         case encodings.encodingJPEG:     return "JPEG";
+        case encodings.encodingH264:     return "H.264";
         default:                         return "[unknown encoding " + num + "]";
     }
 }
index f2deb0e7b4dcbeca61f3e48d6cc6cb1c042eac9f..9225cb464aca531526ec91ac46c12c0444e7a08c 100644 (file)
@@ -10,7 +10,7 @@
 import { toUnsigned32bit, toSigned32bit } from './util/int.js';
 import * as Log from './util/logging.js';
 import { encodeUTF8, decodeUTF8 } from './util/strings.js';
-import { dragThreshold } from './util/browser.js';
+import { dragThreshold, supportsWebCodecsH264Decode } from './util/browser.js';
 import { clientToElement } from './util/element.js';
 import { setCapture } from './util/events.js';
 import EventTargetMixin from './util/eventtarget.js';
@@ -35,6 +35,7 @@ import TightDecoder from "./decoders/tight.js";
 import TightPNGDecoder from "./decoders/tightpng.js";
 import ZRLEDecoder from "./decoders/zrle.js";
 import JPEGDecoder from "./decoders/jpeg.js";
+import H264Decoder from "./decoders/h264.js";
 
 // How many seconds to wait for a disconnect to finish
 const DISCONNECT_TIMEOUT = 3;
@@ -248,6 +249,7 @@ export default class RFB extends EventTargetMixin {
         this._decoders[encodings.encodingTightPNG] = new TightPNGDecoder();
         this._decoders[encodings.encodingZRLE] = new ZRLEDecoder();
         this._decoders[encodings.encodingJPEG] = new JPEGDecoder();
+        this._decoders[encodings.encodingH264] = new H264Decoder();
 
         // NB: nothing that needs explicit teardown should be done
         // before this point, since this can throw an exception
@@ -2115,6 +2117,9 @@ export default class RFB extends EventTargetMixin {
         encs.push(encodings.encodingCopyRect);
         // Only supported with full depth support
         if (this._fbDepth == 24) {
+            if (supportsWebCodecsH264Decode) {
+                encs.push(encodings.encodingH264);
+            }
             encs.push(encodings.encodingTight);
             encs.push(encodings.encodingTightPNG);
             encs.push(encodings.encodingZRLE);
index bbc9f5c1ebe2641148c299d1bd8afe857175bfcd..1ecded662232ed279ee1b63fb8142b7fa953a42e 100644 (file)
@@ -70,6 +70,26 @@ try {
 }
 export const hasScrollbarGutter = _hasScrollbarGutter;
 
+export let supportsWebCodecsH264Decode = false;
+
+async function _checkWebCodecsH264DecodeSupport() {
+    if (!('VideoDecoder' in window)) {
+        return;
+    }
+
+    // We'll need to make do with some placeholders here
+    const config = {
+        codec: 'avc1.42401f',
+        codedWidth: 1920,
+        codedHeight: 1080,
+        optimizeForLatency: true,
+    };
+
+    const result = await VideoDecoder.isConfigSupported(config);
+    supportsWebCodecsH264Decode = result.supported;
+}
+_checkWebCodecsH264DecodeSupport();
+
 /*
  * The functions for detection of platforms and browsers below are exported
  * but the use of these should be minimized as much as possible.