Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove video tracks on video mute without renegotiating #3091

Merged
merged 8 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion spec/unit/webrtc/call.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ import {
MockMediaStreamTrack,
installWebRTCMocks,
MockRTCPeerConnection,
MockRTCRtpTransceiver,
SCREENSHARE_STREAM_ID,
MockRTCRtpSender,
} from "../../test-utils/webrtc";
import { CallFeed } from "../../../src/webrtc/callFeed";
import { EventType, IContent, ISendEventResponse, MatrixEvent, Room } from "../../../src";
Expand Down Expand Up @@ -536,8 +538,15 @@ describe("Call", function () {
it("if local video", async () => {
call.getOpponentMember = jest.fn().mockReturnValue({ userId: "@bob:bar.uk" });

// since this is testing for the presence of a local sender, we need to add a transciever
// rather than just a source track
const mockTrack = new MockMediaStreamTrack("track_id", "video");
const mockTransceiver = new MockRTCRtpTransceiver(call.peerConn as unknown as MockRTCPeerConnection);
mockTransceiver.sender = new MockRTCRtpSender(mockTrack) as unknown as RTCRtpSender;
(call as any).transceivers.set("m.usermedia:video", mockTransceiver);

(call as any).pushNewLocalFeed(
new MockMediaStream("remote_stream1", [new MockMediaStreamTrack("track_id", "video")]),
new MockMediaStream("remote_stream1", [mockTrack]),
SDPStreamMetadataPurpose.Usermedia,
false,
);
Expand Down Expand Up @@ -829,6 +838,55 @@ describe("Call", function () {
await startVideoCall(client, call);
});

afterEach(() => {
jest.useRealTimers();
});

it("should not remove video sender on video mute", async () => {
await call.setLocalVideoMuted(true);
expect((call as any).hasUserMediaVideoSender).toBe(true);
});

it("should release camera after short delay on video mute", async () => {
jest.useFakeTimers();

await call.setLocalVideoMuted(true);

jest.advanceTimersByTime(500);

expect(call.hasLocalUserMediaVideoTrack).toBe(false);
});

it("should re-request video feed on video unmute if it doesn't have one", async () => {
jest.useFakeTimers();

const mockGetUserMediaStream = jest
.fn()
.mockReturnValue(client.client.getMediaHandler().getUserMediaStream(true, true));

client.client.getMediaHandler().getUserMediaStream = mockGetUserMediaStream;

await call.setLocalVideoMuted(true);

jest.advanceTimersByTime(500);

await call.setLocalVideoMuted(false);

expect(mockGetUserMediaStream).toHaveBeenCalled();
});

it("should not release camera on fast mute and unmute", async () => {
const mockGetUserMediaStream = jest.fn();

client.client.getMediaHandler().getUserMediaStream = mockGetUserMediaStream;

await call.setLocalVideoMuted(true);
await call.setLocalVideoMuted(false);

expect(mockGetUserMediaStream).not.toHaveBeenCalled();
expect(call.hasLocalUserMediaVideoTrack).toBe(true);
});

describe("sending sdp_stream_metadata_changed events", () => {
it("should send sdp_stream_metadata_changed when muting audio", async () => {
await call.setMicrophoneMuted(true);
Expand Down
75 changes: 56 additions & 19 deletions src/webrtc/call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
private opponentSessionId?: string;
public groupCallId?: string;

// Used to keep the timer for the delay before actually stopping our
// video track after muting (see setLocalVideoMuted)
private stopVideoTrackTimer?: ReturnType<typeof setTimeout>;

/**
* Construct a new Matrix Call.
* @param opts - Config options.
Expand Down Expand Up @@ -480,7 +484,9 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
}

public get type(): CallType {
return this.hasLocalUserMediaVideoTrack || this.hasRemoteUserMediaVideoTrack ? CallType.Video : CallType.Voice;
// we may want to look for a video receiver here rather than a track to match the
// sender behaviour, although in practice they should be the same thing
return this.hasUserMediaVideoSender || this.hasRemoteUserMediaVideoTrack ? CallType.Video : CallType.Voice;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sender will be there even if the track isn't, so this could return CallType.Video even if we have no video in the call, afaict

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue this is correct - a video call is still a video call if everyone has their video muted, although really the lines of what is a voice and video call are very blurry.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue that the call type can change during the call by you muting/unmuting but yeah the lines are blurry and I am in favour of getting rid of 1:1 call types as soon as we can

}

public get hasLocalUserMediaVideoTrack(): boolean {
Expand All @@ -503,6 +509,14 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
});
}

private get hasUserMediaAudioSender(): boolean {
return Boolean(this.transceivers.get(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, "audio"))?.sender);
}

private get hasUserMediaVideoSender(): boolean {
return Boolean(this.transceivers.get(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, "video"))?.sender);
}

public get localUsermediaFeed(): CallFeed | undefined {
return this.getLocalFeeds().find((feed) => feed.purpose === SDPStreamMetadataPurpose.Usermedia);
}
Expand Down Expand Up @@ -1292,18 +1306,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.localUsermediaStream!.addTrack(track);
}

// Secondly, we remove tracks that we no longer need from the peer
// connection, if any. This only happens when we mute the video atm.
// This will change the transceiver direction to "inactive" and
// therefore cause re-negotiation.
for (const kind of ["audio", "video"]) {
const sender = this.transceivers.get(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, kind))?.sender;
// Only remove the track if we aren't going to immediately replace it
if (sender && !stream.getTracks().find((t) => t.kind === kind)) {
this.peerConn?.removeTrack(sender);
}
}
// Thirdly, we replace the old tracks, if possible.
// Then replace the old tracks, if possible.
for (const track of stream.getTracks()) {
const tKey = getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, track.kind);

Expand Down Expand Up @@ -1361,22 +1364,51 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
*/
public async setLocalVideoMuted(muted: boolean): Promise<boolean> {
logger.log(`call ${this.callId} setLocalVideoMuted ${muted}`);

// if we were still thinking about stopping and removing the video
// track: don't, because we want it back.
if (!muted && this.stopVideoTrackTimer !== undefined) {
clearTimeout(this.stopVideoTrackTimer);
this.stopVideoTrackTimer = undefined;
}

if (!(await this.client.getMediaHandler().hasVideoDevice())) {
return this.isLocalVideoMuted();
}

if (!this.hasLocalUserMediaVideoTrack && !muted) {
if (!this.hasUserMediaVideoSender && !muted) {
await this.upgradeCall(false, true);
return this.isLocalVideoMuted();
}
if (this.opponentSupportsSDPStreamMetadata()) {
const stream = await this.client.getMediaHandler().getUserMediaStream(true, !muted);

// we may not have a video track - if not, re-request usermedia
if (!muted && this.localUsermediaStream!.getVideoTracks().length === 0) {
const stream = await this.client.getMediaHandler().getUserMediaStream(true, true);
await this.updateLocalUsermediaStream(stream);
} else {
this.localUsermediaFeed?.setAudioVideoMuted(null, muted);
}

this.localUsermediaFeed?.setAudioVideoMuted(null, muted);

this.updateMuteStatus();
await this.sendMetadataUpdate();

// if we're muting video, set a timeout to stop & remove the video track so we release
// the camera. We wait a short time to do this because when we disable a track, WebRTC
// will send black video for it. If we just stop and remove it straight away, the video
// will just freeze which means that when we unmute video, the other side will briefly
// get a static frame of us from before we muted. This way, the still frame is just black.
// A very small delay is not always enough so the theory here is that it needs to be long
// enough for WebRTC to encode a frame: 120ms should be long enough even if we're only
// doing 10fps.
if (muted) {
this.stopVideoTrackTimer = setTimeout(() => {
dbkr marked this conversation as resolved.
Show resolved Hide resolved
for (const t of this.localUsermediaStream!.getVideoTracks()) {
t.stop();
this.localUsermediaStream!.removeTrack(t);
}
}, 120);
}

return this.isLocalVideoMuted();
}

Expand Down Expand Up @@ -1404,7 +1436,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
return this.isMicrophoneMuted();
}

if (!this.hasLocalUserMediaAudioTrack && !muted) {
if (!muted && (!this.hasUserMediaAudioSender || !this.hasLocalUserMediaAudioTrack)) {
await this.upgradeCall(true, false);
return this.isMicrophoneMuted();
}
Expand Down Expand Up @@ -1496,6 +1528,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.localUsermediaStream!.id
} micShouldBeMuted ${micShouldBeMuted} vidShouldBeMuted ${vidShouldBeMuted}`,
);

setTracksEnabled(this.localUsermediaStream!.getAudioTracks(), !micShouldBeMuted);
setTracksEnabled(this.localUsermediaStream!.getVideoTracks(), !vidShouldBeMuted);
}
Expand Down Expand Up @@ -2477,6 +2510,10 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
clearInterval(this.callLengthInterval);
this.callLengthInterval = undefined;
}
if (this.stopVideoTrackTimer !== undefined) {
clearTimeout(this.stopVideoTrackTimer);
this.stopVideoTrackTimer = undefined;
}

for (const [stream, listener] of this.removeTrackListeners) {
stream.removeEventListener("removetrack", listener);
Expand Down