Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove video tracks on video mute without renegotiating #3091

Merged
merged 8 commits into from
Jan 25, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 52 additions & 19 deletions src/webrtc/call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
private opponentSessionId?: string;
public groupCallId?: string;

// Used to keep the timer for the delay before actually stopping our
// video track after muting (see setLocalVideoMuted)
private stopVideoTrackTimer?: ReturnType<typeof setTimeout>;

/**
* Construct a new Matrix Call.
* @param opts - Config options.
Expand Down Expand Up @@ -480,7 +484,9 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
}

public get type(): CallType {
return this.hasLocalUserMediaVideoTrack || this.hasRemoteUserMediaVideoTrack ? CallType.Video : CallType.Voice;
// we may want to look for a video receiver here rather than a track to match the
// sender behaviour, although in practice they should be the same thing
return this.hasUserMediaVideoSender || this.hasRemoteUserMediaVideoTrack ? CallType.Video : CallType.Voice;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sender will be there even if the track isn't, so this could return CallType.Video even if we have no video in the call, afaict

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue this is correct - a video call is still a video call if everyone has their video muted, although really the lines of what is a voice and video call are very blurry.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue that the call type can change during the call by you muting/unmuting but yeah the lines are blurry and I am in favour of getting rid of 1:1 call types as soon as we can

}

public get hasLocalUserMediaVideoTrack(): boolean {
Expand All @@ -503,6 +509,14 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
});
}

private get hasUserMediaAudioSender(): boolean {
return this.transceivers.has(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, "audio"));
SimonBrandner marked this conversation as resolved.
Show resolved Hide resolved
}

private get hasUserMediaVideoSender(): boolean {
return this.transceivers.has(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, "video"));
}
dbkr marked this conversation as resolved.
Show resolved Hide resolved

public get localUsermediaFeed(): CallFeed | undefined {
return this.getLocalFeeds().find((feed) => feed.purpose === SDPStreamMetadataPurpose.Usermedia);
}
Expand Down Expand Up @@ -1292,18 +1306,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.localUsermediaStream!.addTrack(track);
}

// Secondly, we remove tracks that we no longer need from the peer
// connection, if any. This only happens when we mute the video atm.
// This will change the transceiver direction to "inactive" and
// therefore cause re-negotiation.
for (const kind of ["audio", "video"]) {
const sender = this.transceivers.get(getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, kind))?.sender;
// Only remove the track if we aren't going to immediately replace it
if (sender && !stream.getTracks().find((t) => t.kind === kind)) {
this.peerConn?.removeTrack(sender);
}
}
// Thirdly, we replace the old tracks, if possible.
// Then replace the old tracks, if possible.
for (const track of stream.getTracks()) {
const tKey = getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, track.kind);

Expand Down Expand Up @@ -1361,22 +1364,51 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
*/
public async setLocalVideoMuted(muted: boolean): Promise<boolean> {
logger.log(`call ${this.callId} setLocalVideoMuted ${muted}`);

// if we were still thinking about stopping and removing the video
// track: don't, because we want it back.
if (!muted && this.stopVideoTrackTimer !== undefined) {
clearTimeout(this.stopVideoTrackTimer);
this.stopVideoTrackTimer = undefined;
}

if (!(await this.client.getMediaHandler().hasVideoDevice())) {
return this.isLocalVideoMuted();
}

if (!this.hasLocalUserMediaVideoTrack && !muted) {
if (!this.hasUserMediaVideoSender && !muted) {
await this.upgradeCall(false, true);
return this.isLocalVideoMuted();
}
if (this.opponentSupportsSDPStreamMetadata()) {
const stream = await this.client.getMediaHandler().getUserMediaStream(true, !muted);

// we may not have a video track - if not, re-request usermedia
if (!muted && this.localUsermediaStream!.getVideoTracks().length === 0) {
const stream = await this.client.getMediaHandler().getUserMediaStream(true, true);
await this.updateLocalUsermediaStream(stream);
} else {
this.localUsermediaFeed?.setAudioVideoMuted(null, muted);
}

this.localUsermediaFeed?.setAudioVideoMuted(null, muted);

this.updateMuteStatus();
await this.sendMetadataUpdate();

// if we're muting video, set a timeout to stop & remove the video track so we release
// the camera. We wait a short time to do this because when we disable a track, WebRTC
// will send black video for it. If we just stop and remove it straight away, the video
// will just freeze which means that when we unmute video, the other side will briefly
// get a static frame of us from before we muted. This way, the still frame is just black.
// A very small delay is not always enough so the theory here is that it needs to be long
// enough for WebRTC to encode a frame: 120ms should be long enough even if we're only
// doing 10fps.
if (muted) {
this.stopVideoTrackTimer = setTimeout(() => {
dbkr marked this conversation as resolved.
Show resolved Hide resolved
for (const t of this.localUsermediaStream!.getVideoTracks()) {
t.stop();
this.localUsermediaStream!.removeTrack(t);
}
}, 120);
}

return this.isLocalVideoMuted();
}

Expand Down Expand Up @@ -1404,7 +1436,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
return this.isMicrophoneMuted();
}

if (!this.hasLocalUserMediaAudioTrack && !muted) {
if (!this.hasUserMediaAudioSender && !muted) {
dbkr marked this conversation as resolved.
Show resolved Hide resolved
await this.upgradeCall(true, false);
return this.isMicrophoneMuted();
}
Expand Down Expand Up @@ -1496,6 +1528,7 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.localUsermediaStream!.id
} micShouldBeMuted ${micShouldBeMuted} vidShouldBeMuted ${vidShouldBeMuted}`,
);

setTracksEnabled(this.localUsermediaStream!.getAudioTracks(), !micShouldBeMuted);
setTracksEnabled(this.localUsermediaStream!.getVideoTracks(), !vidShouldBeMuted);
}
Expand Down