From 4926e78301e40a9c93394b48dd18e4f74365146c Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Fri, 2 Apr 2021 16:32:50 -0700
Subject: [PATCH 1/9] Make AudioFrame immutable and add reference counting
 semantics.

---
 index.src.html | 136 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 99 insertions(+), 37 deletions(-)
diff --git a/index.src.html b/index.src.html
index b4b9c785..64fccf0f 100644
--- a/index.src.html
+++ b/index.src.html
@@ -68,6 +68,11 @@
 spec: media-capabilities; urlPrefix: https://w3c.github.io/media-capabilities/#
     type: method; text: decodingInfo(); url: dom-mediacapabilities-decodinginfo
     type: attribute; text: powerEfficient; url: dom-mediacapabilitiesinfo-powerefficient
+
+spec: webaudio; urlPrefix: https://www.w3.org/TR/webaudio/
+    type: dfn; text: acquire the content; url: #acquire-the-content
+    for: AudioBuffer
+        type: method;  text: copyToChannel(); url: #dom-audiobuffer-copytochannel
 </pre>
 
 <pre class='biblio'>
@@ -375,10 +380,11 @@
   <dd>
     Run these steps:
     1. For each |output| in |outputs|:
-        1. Let |buffer| be an {{AudioBuffer}} containing the decoded audio data in
-            |output|.
-        2. Let |frame| be an {{AudioFrame}} containing |buffer| and a timestamp for
-            the output.
+        1. Let |frameInit| be an {{AudioFrameInit}} with the following keys:
+            1. Let {{AudioFrameInit/buffer}} be an {{AudioBuffer}} containing
+                the decoded audio data from |output|.
+            2. Let {{AudioFrameInit/timestamp}} be the timestamp from |output|.
+        2. Let |frame| be an {{AudioFrame}} constructed with |frameInit|.
         3. Invoke {{AudioDecoder/[[output callback]]}} with frame.
   </dd>
   <dt><dfn>Reset AudioDecoder</dfn></dt>
@@ -761,7 +767,7 @@
         `true`, throw a {{TypeError}}.
     2. If {{AudioEncoder/state}} is not `"configured"`, throw an
         {{InvalidStateError}}.
-    3. Let |frameClone| hold the result of running the [=Clone Frame=]
+    3. Let |frameClone| hold the result of running the [=Clone AudioFrame=]
         algorithm with |frame|.
     4. Destroy the original |frame| by invoking `frame.destroy()`.
     5. Increment {{AudioEncoder/encodeQueueSize}}.
@@ -769,7 +775,7 @@
 
     Running a control message to encode the frame means performing these steps.
     1. Attempt to use {{AudioEncoder/[[codec implementation]]}} to encode
-        |frameClone|.
+        the [=frame resource=] described by |frameClone|.
     2. If encoding results in an error, queue a task on the [=control thread=]
         event loop to run the [=Close AudioEncoder=] algorithm with
         {{EncodingError}}.
@@ -777,9 +783,8 @@
         {{AudioEncoder/encodeQueueSize}}.
     4. Let |encoded outputs| be a [=list=] of encoded audio data outputs
         emitted by {{AudioEncoder/[[codec implementation]]}}.
-    5. If |encoded outputs| is not empty, queue a task on the [=control thread=]
-        event loop to run the [=Output EncodedAudioChunks=] algorithm with
-        |encoded outputs|.
+    5. If |encoded outputs| is not empty, queue a task on the
+        [=control thread=] event loop to run the [=Output EncodedAudioChunks=] algorithm with |encoded outputs|.
   </dd>
 
   <dt><dfn method for=AudioEncoder>flush()</dfn></dt>
@@ -1866,6 +1871,39 @@
 ====================================================
 These interfaces represent unencoded (raw) media.
 
+Memory Model {#raw-media-memory-model}
+--------------------------------------
+
+### Background ### {#raw-media-memory-model-background}
+
+This section is non-normative.
+
+Decoded media data (frames) may occupy a large amount of system memory. To
+minimize the need for expensive copies, this specification defines a scheme
+for reference counting (`frame.clone()` and `frame.close()`).
+
+### Reference Counting ### {#raw-media-memory-model-reference-counting}
+
+A <dfn>frame resource</dfn> is storage for the actual pixel data or the audio
+sample data described by a {{VideoFrame}} or {{AudioFrame}}.
+
+The {{AudioFrame}} {{AudioFrame/[[resource reference]]}} and {{VideoFrame}}
+{{VideoFrame/[[resource reference]]}} internal slots hold a reference to a
+[=frame resource=].
+
+{{VideoFrame}}.{{VideoFrame/clone()}} and
+{{AudioFrame}}.{{AudioFrame/clone()}} return new frame objects who's
+`[[resource reference]]` points to the same [=frame resource=] as the original
+frame.
+
+{{VideoFrame}}.{{VideoFrame/close()}} and {{AudioFrame}}.{{AudioFrame/close()}}
+will clear their [[resource reference]] slot, releasing the reference their
+[=frame resource=]
+
+A [=frame resource=] must outlive the frames that reference it. When no frames
+reference a given [=frame resource=], the resource must be destroyed
+(freeing associated [=system resources=]).
+
 
 AudioFrame Interface {#audioframe-interface}
 ---------------------------------------------
@@ -1875,8 +1913,11 @@
 [Exposed=(Window,DedicatedWorker)]
 interface AudioFrame {
   constructor(AudioFrameInit init);
+
   readonly attribute unsigned long long timestamp;
   readonly attribute AudioBuffer? buffer;
+
+  AudioFrame clone();
   undefined close();
 };
 
@@ -1888,14 +1929,13 @@
 </pre>
 
 ### Internal Slots ###{#audioframe-internal-slots}
-<dl>
-  <dt><dfn attribute for=AudioFrame>\[[detached]]</dfn></dt>
-  <dd>
-    Boolean indicating whether close() was invoked and underlying resources
-        have been released.
-  </dd>
-</dl>
+: <dfn attribute for=AudioFrame>\[[detached]]</dfn>
+:: Boolean indicating whether close() was invoked and underlying resources
+    have been released.
 
+: <dfn attribute for=AudioFrame>[[resource reference]]</dfn>
+:: A reference to a [=frame resource=] that stores the audio sample data for
+    this frame.
 
 ### Constructors ###{#audioframe-constructors}
 <dfn constructor for=AudioFrame title="AudioFrame(init)">
@@ -1904,33 +1944,55 @@
 1. Let |frame| be a new {{AudioFrame}} object.
 2. Assign `init.timestamp` to `frame.timestamp`.
 3. Assign `init.buffer` to `frame.buffer`.
-4. Assign `false` to the {{AudioFrame/[[detached]]}} internal slot.
-5. Return |frame|.
+4. Let |resource| be a new [=frame resource=].
+5. [=Acquire the content=] [[WEBAUDIO]] of `frame.buffer` and assign the
+    acquired content to |resource|.
 
+    NOTE: The acquired content stored in |resource| is an immutable snapshot
+        {{AudioFrameInit/buffer}}'s content. Mutating {{AudioFrame/buffer}}
+        after construction (e.g. via {{AudioBuffer/copyToChannel()}}) will
+        have no effect on the |resource|.
 
-### Attributes ###{#audioframe-attributes}
-<dl>
-  <dt><dfn attribute for=AudioFrame>timestamp</dfn></dt>
-  <dd>The presentation timestamp, given in microseconds.</dd>
+6. Initialize [[resource reference]] with a reference to |resource|.
+7. Assign `false` to the {{AudioFrame/[[detached]]}} internal slot.
+8. Return |frame|.
 
-  <dt><dfn attribute for=AudioFrame>buffer</dfn></dt>
-  <dd>The buffer containing decoded audio data.</dd>
-</dl>
+### Attributes ###{#audioframe-attributes}
+: <dfn attribute for=AudioFrame>timestamp</dfn>
+:: The presentation timestamp, given in microseconds.
 
+: <dfn attribute for=AudioFrame>buffer</dfn>
+:: The buffer containing decoded audio data.
 
 ### Methods ###{#audioframe-methods}
-<dl>
-  <dt><dfn method for=AudioFrame>close()</dfn></dt>
-  <dd>
-    Immediately frees [=system resources=]. When invoked, run these steps:
-    1. Release [=system resources=] for buffer and set its value to null.
-    2. Assign `true` to the {{AudioFrame/[[detached]]}} internal slot.
+: <dfn method for=AudioFrame>clone()</dfn>
+:: Creates a new AudioFrame with a reference to the same [=frame resource=].
 
-    NOTE: This section needs work. We should use the name and semantics of
-        VideoFrame destroy(). Similarly, we should add clone() to make a deep
-        copy.
-  </dd>
-</dl>
+    When invoked, run these steps:
+    1. If the value of |frame|'s {{AudioFrame/[[detached]]}} internal slot is
+        `true`, throw an {{InvalidStateError}} {{DOMException}}.
+    2. Return the result of running the [=Clone AudioFrame=] algorithm with
+        [=this=].
+
+: <dfn method for=AudioFrame>close()</dfn>
+:: Clears all state and releases the reference to the [=frame resource=].
+    Close is final.
+
+    When invoked, run these steps:
+    1. Assign `null` to {{AudioFrame/[[resource reference]]}}.
+    2. Assign `null` to {{AudioFrame/buffer}}.
+    3. Assign `true` to the {{AudioFrame/[[detached]]}} internal slot.
+
+### Algorithms ### {#audioframe-algorithms}
+
+: <dfn>Clone AudioFrame</dfn> (with |frame|)
+:: Run these steps:
+    1. Let |cloneFrame| be a new {{AudioFrame}} initialized as follows:
+        1. Assign |frame|.{{AudioFrame/[[resource reference]]}} to
+            |cloneFrame|.{{AudioFrame/[[resource reference]]}}.
+        2. Assign |frame|.timestamp to |cloneFrame|.timestamp.
+        3. Assign |frame|.buffer to |cloneFrame|.buffer.
+    2. Return |cloneFrame|.
 
 VideoFrame Interface {#videoframe-interface}
 --------------------------------------------
@@ -2333,7 +2395,7 @@
 operations.
 
 Platform codecs are historically an internal detail of APIs like
-{{HTMLMediaElement}}, [[WebAudio]], and [[WebRTC]]. In this way, it has always
+{{HTMLMediaElement}}, [[WEBAUDIO]], and [[WebRTC]]. In this way, it has always
 been possible to attack the underlying codecs by using malformed media
 files/streams and invoking the various API control methods.
 

From c574352551a1781b9b0b6486af1a2b4a65ea254e Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Fri, 2 Apr 2021 16:48:17 -0700
Subject: [PATCH 2/9] Temporarily allow build warnings so I can split the
 change bewteen PRs

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 46c82d50..b4fa733b 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ SHELL=/bin/bash
 local: local-index local-codec-registry local-avc-codec-registration
 
 local-index: index.src.html
-	bikeshed --die-on=warning spec index.src.html index.html
+	bikeshed -f spec index.src.html index.html
 
 local-codec-registry: codec_registry.src.html
 	bikeshed --die-on=warning spec codec_registry.src.html codec_registry.html

From 09ef789c612046c78873c9b9a3d6bc17722cd328 Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Fri, 2 Apr 2021 17:00:45 -0700
Subject: [PATCH 3/9] Relax warning for make 'ci' rule

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index b4fa733b..b5f3d7f7 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ SHELL=/bin/bash
 local: local-index local-codec-registry local-avc-codec-registration
 
 local-index: index.src.html
-	bikeshed -f spec index.src.html index.html
+	bikeshed --die-on=warning spec index.src.html index.html
 
 local-codec-registry: codec_registry.src.html
 	bikeshed --die-on=warning spec codec_registry.src.html codec_registry.html
@@ -16,7 +16,6 @@ remote-index: index.src.html
 	                       --output index.html \
 	                       --write-out "%{http_code}" \
 	                       --header "Accept: text/plain, text/html" \
-	                       -F die-on=warning \
 	                       -F file=@index.src.html) && \
 	[[ "$$HTTP_STATUS" -eq "200" ]]) || ( \
 		echo ""; cat index.html; echo ""; \

From 15003514622e7623b00c0468d42db701743279ba Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Mon, 5 Apr 2021 00:08:06 -0700
Subject: [PATCH 4/9] Fix typos, small style edits

---
 index.src.html | 65 ++++++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/index.src.html b/index.src.html
index 64fccf0f..0b91b436 100644
--- a/index.src.html
+++ b/index.src.html
@@ -283,21 +283,21 @@
     [=Enqueues a control message=] to decode the given |chunk|.
 
     When invoked, run these steps:
-    1. If {{VideoDecoder/state}} is not `"configured"`, throw an
+    1. If {{AudioDecoder/state}} is not `"configured"`, throw an
         {{InvalidStateError}}.
-    2. Increment {{VideoDecoder/decodeQueueSize}}.
+    2. Increment {{AudioDecoder/decodeQueueSize}}.
     3. [=Queue a control message=] to decode the |chunk|.
 
     Running a control message to decode the chunk means performing these steps:
-    1. Attempt to use {{VideoDecoder/[[codec implementation]]}} to decode the
+    1. Attempt to use {{AudioDecoder/[[codec implementation]]}} to decode the
         chunk.
     2. If decoding results in an error, queue a task on the [=control thread=]
-        event loop to run the [=Close VideoDecoder=] algorithm with
+        event loop to run the [=Close AudioDecoder=] algorithm with
         {{EncodingError}}.
     3. Queue a task on the [=control thread=] event loop to decrement
-        {{VideoDecoder/decodeQueueSize}}
+        {{AudioDecoder/decodeQueueSize}}
     4. Let |decoded outputs| be a [=list=] of decoded video data outputs emitted
-        by {{VideoDecoder/[[codec implementation]]}}.
+        by {{AudioDecoder/[[codec implementation]]}}.
     5. If |decoded outputs| is not empty, queue a task on the [=control thread=]
         event loop to run the [=Output VideoFrames=] algorithm with
         |decoded outputs|.
@@ -1930,8 +1930,7 @@
 
 ### Internal Slots ###{#audioframe-internal-slots}
 : <dfn attribute for=AudioFrame>\[[detached]]</dfn>
-:: Boolean indicating whether close() was invoked and underlying resources
-    have been released.
+:: Boolean indicating whether {{AudioFrame/close()}} was invoked.
 
 : <dfn attribute for=AudioFrame>[[resource reference]]</dfn>
 :: A reference to a [=frame resource=] that stores the audio sample data for
@@ -1941,28 +1940,36 @@
 <dfn constructor for=AudioFrame title="AudioFrame(init)">
   AudioFrame(init)
 </dfn>
-1. Let |frame| be a new {{AudioFrame}} object.
-2. Assign `init.timestamp` to `frame.timestamp`.
-3. Assign `init.buffer` to `frame.buffer`.
-4. Let |resource| be a new [=frame resource=].
-5. [=Acquire the content=] [[WEBAUDIO]] of `frame.buffer` and assign the
-    acquired content to |resource|.
-
-    NOTE: The acquired content stored in |resource| is an immutable snapshot
-        {{AudioFrameInit/buffer}}'s content. Mutating {{AudioFrame/buffer}}
-        after construction (e.g. via {{AudioBuffer/copyToChannel()}}) will
-        have no effect on the |resource|.
-
-6. Initialize [[resource reference]] with a reference to |resource|.
-7. Assign `false` to the {{AudioFrame/[[detached]]}} internal slot.
-8. Return |frame|.
+1. Let |frame| be a new {{AudioFrame}} object, initialized as follows:
+    1. Assign |init|.{{AudioFrameInit/timestamp}} to {{AudioFrame/timestamp}}.
+    2. Assign |init|.{{AudioFrameInit/buffer}} to {{AudioFrame/buffer}}.
+    3. Let |resource| be a new [=frame resource=].
+    4. [=Acquire the content=] [[WEBAUDIO]] of {{AudioFrame/buffer}} and
+        assign the acquired content to |resource|.
+
+        NOTE: The acquired content stored in |resource| is an immutable
+            snapshot {{AudioFrameInit/buffer}}'s content. Mutating
+            {{AudioFrame/buffer}} after construction (e.g. via
+            {{AudioBuffer/copyToChannel()}}) will have no effect on the
+            |resource|.
+
+    5. Let |resourceReference| be a reference to |resource|.
+    6. Assign |resourceReference| to {{AudioFrame/[[resource reference]]}}.
+    7. Assign `false` to the {{AudioFrame/[[detached]]}} internal slot.
+2. Return |frame|.
 
 ### Attributes ###{#audioframe-attributes}
 : <dfn attribute for=AudioFrame>timestamp</dfn>
 :: The presentation timestamp, given in microseconds.
 
 : <dfn attribute for=AudioFrame>buffer</dfn>
-:: The buffer containing decoded audio data.
+:: The buffer describing decoded audio data.
+
+    NOTE: The backing [=frame resource=] (referenced by
+        {{AudioFrame/[[resource reference]]}}) is immutable. Mutating
+        {{AudioFrame/buffer}} after construction (e.g. via
+        {{AudioBuffer/copyToChannel()}}) will have no effect on the
+        [=frame resource=].
 
 ### Methods ###{#audioframe-methods}
 : <dfn method for=AudioFrame>clone()</dfn>
@@ -1987,12 +1994,12 @@
 
 : <dfn>Clone AudioFrame</dfn> (with |frame|)
 :: Run these steps:
-    1. Let |cloneFrame| be a new {{AudioFrame}} initialized as follows:
+    1. Let |clone| be a new {{AudioFrame}} initialized as follows:
         1. Assign |frame|.{{AudioFrame/[[resource reference]]}} to
-            |cloneFrame|.{{AudioFrame/[[resource reference]]}}.
-        2. Assign |frame|.timestamp to |cloneFrame|.timestamp.
-        3. Assign |frame|.buffer to |cloneFrame|.buffer.
-    2. Return |cloneFrame|.
+            {{AudioFrame/[[resource reference]]}}.
+        2. Assign |frame|.timestamp to {{AudioFrame/timestamp}}.
+        3. Assign |frame|.buffer to {{AudioFrame/buffer}}.
+    2. Return |clone|.
 
 VideoFrame Interface {#videoframe-interface}
 --------------------------------------------

From 245c0a26a34fd62ec81d83b95fd1875456a1db18 Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Thu, 8 Apr 2021 21:56:32 -0700
Subject: [PATCH 5/9] Re-word frame-resource lifetime text

---
 index.src.html | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/index.src.html b/index.src.html
index 0b91b436..2548f9dc 100644
--- a/index.src.html
+++ b/index.src.html
@@ -1900,10 +1900,13 @@
 will clear their [[resource reference]] slot, releasing the reference their
 [=frame resource=]
 
-A [=frame resource=] must outlive the frames that reference it. When no frames
-reference a given [=frame resource=], the resource must be destroyed
-(freeing associated [=system resources=]).
+A [=frame resource=] must remain alive as long as it continues to be referenced
+by a `[[resource reference]]`.
 
+NOTE: When a [=frame resource=] is no longer referenced by a
+    `[[resource reference]]`, the resource may be destroyed. User agents are
+    encouraged to destroy such resources quickly to reduce memory pressure and
+    facilitate resouce reuse.
 
 AudioFrame Interface {#audioframe-interface}
 ---------------------------------------------

From c2586e12b5dafc00557ad1d638d9a8b7e2f598da Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Thu, 29 Apr 2021 21:58:08 -0700
Subject: [PATCH 6/9] Rename AudioFrame->AudioData. Drop dependency on
 AudioBuffer.

The mutability of AudioBuffer was undesirable. Also, we like having mor
sample formats. See discussion in #179.
---
 index.src.html | 334 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 232 insertions(+), 102 deletions(-)

diff --git a/index.src.html b/index.src.html
index 07b0b925..727cb21c 100644
--- a/index.src.html
+++ b/index.src.html
@@ -74,11 +74,6 @@
 spec: media-capabilities; urlPrefix: https://w3c.github.io/media-capabilities/#
     type: method; text: decodingInfo(); url: dom-mediacapabilities-decodinginfo
     type: attribute; text: powerEfficient; url: dom-mediacapabilitiesinfo-powerefficient
-
-spec: webaudio; urlPrefix: https://www.w3.org/TR/webaudio/
-    type: dfn; text: acquire the content; url: #acquire-the-content
-    for: AudioBuffer
-        type: method;  text: copyToChannel(); url: #dom-audiobuffer-copytochannel
 </pre>
 
 
@@ -102,7 +97,7 @@
 : <dfn lt="system resources">Codec System Resources</dfn>
 :: Resources including CPU memory, GPU memory, and exclusive handles to specific
     decoding/encoding hardware that may be allocated by the User Agent as part
-    of codec configuration or generation of {{AudioFrame}} and {{VideoFrame}}
+    of codec configuration or generation of {{AudioData}} and {{VideoFrame}}
     objects. Such resources may be quickly exhuasted and should be released
     immediately when no longer in use.
 
@@ -196,11 +191,11 @@
 };
 
 dictionary AudioDecoderInit {
-  required AudioFrameOutputCallback output;
+  required AudioDataOutputCallback output;
   required WebCodecsErrorCallback error;
 };
 
-callback AudioFrameOutputCallback = undefined(AudioFrame output);
+callback AudioDataOutputCallback = undefined(AudioData output);
 </xmp>
 
 Internal Slots {#audiodecoder-internal-slots}
@@ -293,7 +288,7 @@
     4. Let |decoded outputs| be a [=list=] of decoded video data outputs emitted
         by {{AudioDecoder/[[codec implementation]]}}.
     5. If |decoded outputs| is not empty, queue a task on the [=control thread=]
-        event loop to run the [=Output VideoFrames=] algorithm with
+        event loop to run the [=Output AudioDatas=] algorithm with
         |decoded outputs|.
   </dd>
 
@@ -316,7 +311,7 @@
     2. Let |decoded outputs| be a [=list=] of decoded audio data outputs emitted
         by {{AudioDecoder/[[codec implementation]]}}.
     3. If |decoded outputs| is not empty, queue a task on the [=control thread=]
-        event loop to run the [=Output AudioFrames=] algorithm with
+        event loop to run the [=Output AudioDatas=] algorithm with
         |decoded outputs|.
     4. Queue a task on the [=control thread=] event loop to resolve |promise|.
   </dd>
@@ -370,16 +365,24 @@
 Algorithms {#audiodecoder-algorithms}
 -------------------------------------
 <dl>
-  <dt><dfn>Output AudioFrames</dfn> (with |outputs|)</dt>
+  <dt><dfn>Output AudioDatas</dfn> (with |outputs|)</dt>
   <dd>
     Run these steps:
     1. For each |output| in |outputs|:
-        1. Let |frameInit| be an {{AudioFrameInit}} with the following keys:
-            1. Let {{AudioFrameInit/buffer}} be an {{AudioBuffer}} containing
-                the decoded audio data from |output|.
-            2. Let {{AudioFrameInit/timestamp}} be the timestamp from |output|.
-        2. Let |frame| be an {{AudioFrame}} constructed with |frameInit|.
-        3. Invoke {{AudioDecoder/[[output callback]]}} with frame.
+        1. Let |data| be an {{AudioData}}, intialized as follows:
+            1. Assign `false` to {{AudioData/[[detached]]}}.
+            2. Let |resource| be the [=media resource=] described by |output|.
+            3. Let |resourceReference| be a reference to |resource|.
+            4. Assign |resourceReference| to
+                {{AudioData/[[resource reference]]}}.
+            5. Let |timestamp| be the {{EncodedAudioChunk/timestamp}} of the
+                {{EncodedAudioChunk}} associated with |output|.
+            6. Assign |timestamp| to {{AudioData/[[timestamp]]}}.
+            7. Assign values to {{AudioData/[[sample format]]}},
+                {{AudioData/[[sample rate]]}},
+                {{AudioData/[[number of frames]]}}, and
+                {{AudioData/[[number of channels]]}} as determined by |output|.
+        3. Invoke {{AudioDecoder/[[output callback]]}} with |data|.
   </dd>
   <dt><dfn>Reset AudioDecoder</dfn></dt>
   <dd>
@@ -660,7 +663,7 @@
   readonly attribute long encodeQueueSize;
 
   undefined configure(AudioEncoderConfig config);
-  undefined encode(AudioFrame frame);
+  undefined encode(AudioData data);
   Promise<undefined> flush();
   undefined reset();
   undefined close();
@@ -745,23 +748,23 @@
         {{NotSupportedError}}.
   </dd>
 
-  <dt><dfn method for=AudioEncoder>encode(frame)</dfn></dt>
+  <dt><dfn method for=AudioEncoder>encode(data)</dfn></dt>
   <dd>
-    [=Enqueues a control message=] to encode the given |frame|.
+    [=Enqueues a control message=] to encode the given |data|.
 
     When invoked, run these steps:
-    1. If the value of |frame|'s {{AudioFrame/[[detached]]}} internal slot is
+    1. If the value of |data|'s {{AudioData/[[detached]]}} internal slot is
         `true`, throw a {{TypeError}}.
     2. If {{AudioEncoder/state}} is not `"configured"`, throw an
         {{InvalidStateError}}.
-    3. Let |frameClone| hold the result of running the [=Clone AudioFrame=]
-        algorithm with |frame|.
+    3. Let |dataClone| hold the result of running the [=Clone AudioData=]
+        algorithm with |data|.
     4. Increment {{AudioEncoder/encodeQueueSize}}.
-    5. [=Queue a control message=] to encode |frameClone|.
+    5. [=Queue a control message=] to encode |dataClone|.
 
-    Running a control message to encode the frame means performing these steps.
+    Running a control message to encode the data means performing these steps.
     1. Attempt to use {{AudioEncoder/[[codec implementation]]}} to encode
-        the [=frame resource=] described by |frameClone|.
+        the [=media resource=] described by |dataClone|.
     2. If encoding results in an error, queue a task on the [=control thread=]
         event loop to run the [=Close AudioEncoder=] algorithm with
         {{EncodingError}}.
@@ -857,7 +860,7 @@
             2. Let {{EncodedAudioChunkInit/type}} be the
                 {{EncodedAudioChunkType}} of |output|.
             3. Let {{EncodedAudioChunkInit/timestamp}} be the
-                {{AudioFrame/timestamp}} from the AudioFrame associated with
+                {{AudioData/timestamp}} from the AudioData associated with
                 |output|.
         2. Let |chunk| be a new {{EncodedAudioChunk}} constructed with
             |chunkInit|.
@@ -1815,7 +1818,7 @@
 </dl>
 
 
-Raw Media Interfaces (Frames){#raw-media-interfaces}
+Raw Media Interfaces {#raw-media-interfaces}
 ====================================================
 These interfaces represent unencoded (raw) media.
 
@@ -1826,130 +1829,257 @@
 
 This section is non-normative.
 
-Decoded media data (frames) may occupy a large amount of system memory. To
-minimize the need for expensive copies, this specification defines a scheme
-for reference counting (`frame.clone()` and `frame.close()`).
+Decoded media data may occupy a large amount of system memory. To minimize the
+need for expensive copies, this specification defines a scheme for reference
+counting (`clone()` and `close()`).
 
 ### Reference Counting ### {#raw-media-memory-model-reference-counting}
 
-A <dfn>frame resource</dfn> is storage for the actual pixel data or the audio
-sample data described by a {{VideoFrame}} or {{AudioFrame}}.
+A <dfn>media resource</dfn> is storage for the actual pixel data or the audio
+sample data described by a {{VideoFrame}} or {{AudioData}}.
 
-The {{AudioFrame}} {{AudioFrame/[[resource reference]]}} and {{VideoFrame}}
+The {{AudioData}} {{AudioData/[[resource reference]]}} and {{VideoFrame}}
 {{VideoFrame/[[resource reference]]}} internal slots hold a reference to a
-[=frame resource=].
+[=media resource=].
 
 {{VideoFrame}}.{{VideoFrame/clone()}} and
-{{AudioFrame}}.{{AudioFrame/clone()}} return new frame objects who's
-`[[resource reference]]` points to the same [=frame resource=] as the original
+{{AudioData}}.{{AudioData/clone()}} return new frame objects who's
+`[[resource reference]]` points to the same [=media resource=] as the original
 frame.
 
-{{VideoFrame}}.{{VideoFrame/close()}} and {{AudioFrame}}.{{AudioFrame/close()}}
+{{VideoFrame}}.{{VideoFrame/close()}} and {{AudioData}}.{{AudioData/close()}}
 will clear their [[resource reference]] slot, releasing the reference their
-[=frame resource=]
+[=media resource=]
 
-A [=frame resource=] must remain alive as long as it continues to be referenced
+A [=media resource=] must remain alive as long as it continues to be referenced
 by a `[[resource reference]]`.
 
-NOTE: When a [=frame resource=] is no longer referenced by a
+NOTE: When a [=media resource=] is no longer referenced by a
     `[[resource reference]]`, the resource may be destroyed. User agents are
     encouraged to destroy such resources quickly to reduce memory pressure and
     facilitate resouce reuse.
 
-AudioFrame Interface {#audioframe-interface}
+AudioData Interface {#audiodata-interface}
 ---------------------------------------------
 
 <xmp class='idl'>
 [Exposed=(Window,DedicatedWorker)]
-interface AudioFrame {
-  constructor(AudioFrameInit init);
-
+interface AudioData {
+  constructor(AudioDataInit init);
+
+  readonly attribute AudioSampleFormat sampleFormat;
+  readonly attribute float sampleRate;
+  readonly attribute unsigned long numberOfFrames;
+  readonly attribute unsigned long numberOfChannels;
+  readonly attribute unsigned long allocationSize;
+  readonly attribute unsigned long long duration;
   readonly attribute unsigned long long timestamp;
-  readonly attribute AudioBuffer? buffer;
 
-  AudioFrame clone();
+  undefined copyFromChannel(BufferSource destination, unsigned long channelNumber);
+  AudioData clone();
   undefined close();
 };
 
-dictionary AudioFrameInit {
+dictionary AudioDataInit {
+  required AudioSampleFormat sampleFormat;
+  required float sampleRate;
+  required unsigned long numberOfFrames;
+  required unsigned long numberOfChannels;
   required unsigned long long timestamp;
-  required AudioBuffer buffer;
+  required BufferSource data;
 };
 </xmp>
 
-### Internal Slots ###{#audioframe-internal-slots}
-: <dfn attribute for=AudioFrame>\[[detached]]</dfn>
-:: Boolean indicating whether {{AudioFrame/close()}} was invoked.
+### Internal Slots ###{#audiodata-internal-slots}
+: <dfn attribute for=AudioData>\[[detached]]</dfn>
+:: Boolean indicating whether {{AudioData/close()}} was invoked on this
+    {{AudioData}}.
+
+: <dfn attribute for=AudioData>[[resource reference]]</dfn>
+:: A reference to a [=media resource=] that stores the audio sample data for
+    this {{AudioData}}.
+
+: <dfn attribute for=AudioData>[[sample format]]</dfn>
+:: The {{AudioSampleFormat}} used by this {{AudioData}}.
 
-: <dfn attribute for=AudioFrame>[[resource reference]]</dfn>
-:: A reference to a [=frame resource=] that stores the audio sample data for
-    this frame.
+: <dfn attribute for=AudioData>[[sample rate]]</dfn>
+:: The sample-rate, in Hz, for this {{AudioData}}.
 
-### Constructors ###{#audioframe-constructors}
-<dfn constructor for=AudioFrame title="AudioFrame(init)">
-  AudioFrame(init)
+: <dfn attribute for=AudioData>[[number of frames]]</dfn>
+:: The number of frames (samples per channel) for this {{AudioData}}.
+
+: <dfn attribute for=AudioData>[[number of channels]]</dfn>
+:: The number of audio channels for this {{AudioData}}.
+
+: <dfn attribute for=AudioData>\[[timestamp]]</dfn>
+:: The presentation timestamp, in microseconds, for this {{AudioData}}.
+
+### Constructors ###{#audiodata-constructors}
+<dfn constructor for=AudioData title="AudioData(init)">
+  AudioData(init)
 </dfn>
-1. Let |frame| be a new {{AudioFrame}} object, initialized as follows:
-    1. Assign |init|.{{AudioFrameInit/timestamp}} to {{AudioFrame/timestamp}}.
-    2. Assign |init|.{{AudioFrameInit/buffer}} to {{AudioFrame/buffer}}.
-    3. Let |resource| be a new [=frame resource=].
-    4. [=Acquire the content=] [[WEBAUDIO]] of {{AudioFrame/buffer}} and
-        assign the acquired content to |resource|.
-
-        NOTE: The acquired content stored in |resource| is an immutable
-            snapshot {{AudioFrameInit/buffer}}'s content. Mutating
-            {{AudioFrame/buffer}} after construction (e.g. via
-            {{AudioBuffer/copyToChannel()}}) will have no effect on the
-            |resource|.
-
-    5. Let |resourceReference| be a reference to |resource|.
-    6. Assign |resourceReference| to {{AudioFrame/[[resource reference]]}}.
-    7. Assign `false` to the {{AudioFrame/[[detached]]}} internal slot.
+1. Let |frame| be a new {{AudioData}} object, initialized as follows:
+    1. Assign `false` to {{AudioData/[[detached]]}}.
+    2. Assign |init|.{{AudioDataInit/sampleFormat}} to
+        {{AudioData/[[sample format]]}}.
+    3. Assign |init|.{{AudioDataInit/sampleRate}} to
+        {{AudioData/[[sample rate]]}}.
+    4. Assign |init|.{{AudioDataInit/numberOfFrames}} to
+        {{AudioData/[[number of frames]]}}.
+    5. Assign |init|.{{AudioDataInit/numberOfChannels}} to
+        {{AudioData/[[number of channels]]}}.
+    6. Assign |init|.{{AudioDataInit/timestamp}} to
+        {{AudioData/[[timestamp]]}}.
+    7. Let |resource| be a [=media resource=] containing a copy of
+        |init|.{{AudioDataInit/data}}.
+    8. Let |resourceReference| be a reference to |resource|.
+    9. Assign |resourceReference| to {{AudioData/[[resource reference]]}}.
 2. Return |frame|.
 
-### Attributes ###{#audioframe-attributes}
-: <dfn attribute for=AudioFrame>timestamp</dfn>
-:: The presentation timestamp, given in microseconds.
+### Attributes ###{#audiodata-attributes}
+
+: <dfn attribute for=AudioData>sampleFormat</dfn>
+:: The {{AudioSampleFormat}} used by this {{AudioData}}.
+
+    The {{AudioData/sampleFormat}} getter steps are to return
+    {{AudioData/[[sample format]]}}.
+
+: <dfn attribute for=AudioData>sampleRate</dfn>
+:: The sample-rate, in Hz, for this {{AudioData}}.
+
+    The {{AudioData/sampleRate}} getter steps are to return
+    {{AudioData/[[sample rate]]}}.
+
+: <dfn attribute for=AudioData>numberOfFrames</dfn>
+:: The number of frames (samples per channel) for this {{AudioData}}.
+
+    The {{AudioData/numberOfFrames}} getter steps are to return
+    {{AudioData/[[number of frames]]}}.
+
+: <dfn attribute for=AudioData>numberOfChannels</dfn>
+:: The number of audio channels for this {{AudioData}}.
+
+    The {{AudioData/numberOfChannels}} getter steps are to return
+    {{AudioData/[[number of channels]]}}.
 
-: <dfn attribute for=AudioFrame>buffer</dfn>
-:: The buffer describing decoded audio data.
+: <dfn attribute for=AudioData>allocationSize</dfn>
+:: The the number of bytes allocated to hold all of the samples in this
+    {{AudioData}}.
 
-    NOTE: The backing [=frame resource=] (referenced by
-        {{AudioFrame/[[resource reference]]}}) is immutable. Mutating
-        {{AudioFrame/buffer}} after construction (e.g. via
-        {{AudioBuffer/copyToChannel()}}) will have no effect on the
-        [=frame resource=].
+    The {{AudioData/allocationSize}} getter steps are to:
+    1. Let |sampleSize| be the number of bytes per sample, as defined by the
+        {{AudioData/[[sample format]]}}.
+    2. Return the product of multiplying |sampleSize| by
+        {{AudioData/[[number of channels]]}} and
+        {{AudioData/[[number of frames]]}}.
 
-### Methods ###{#audioframe-methods}
-: <dfn method for=AudioFrame>clone()</dfn>
-:: Creates a new AudioFrame with a reference to the same [=frame resource=].
+: <dfn attribute for=AudioData>timestamp</dfn>
+:: The presentation timestamp, in microseconds, for this {{AudioData}}.
+
+    The {{AudioData/numberOfChannels}} getter steps are to return
+    {{AudioData/[[timestamp]]}}.
+
+: <dfn attribute for=AudioData>duration</dfn>
+:: The duration, in microseconds, for this {{AudioData}}.
+
+    The {{AudioData/duration}} getter steps are to:
+    1. Let |microsecondsPerSecond| be `1,000,000`.
+    2. Let |durationInSeconds| be the result of dividing
+        {{AudioData/[[number of frames]]}} by {{AudioData/[[sample rate]]}}.
+    3. Return the product of |durationInSeconds| and |microsecondsPerSecond|.
+
+### Methods ###{#audiodata-methods}
+: <dfn method for=AudioData>
+      copyFromChannel(destination, channelNumber)
+    </dfn>
+:: Copies the samples from the specified channel of the {{AudioData}} to the
+    destination buffer.
 
     When invoked, run these steps:
-    1. If the value of |frame|'s {{AudioFrame/[[detached]]}} internal slot is
+    1. If the value of |frame|'s {{AudioData/[[detached]]}} internal slot is
         `true`, throw an {{InvalidStateError}} {{DOMException}}.
-    2. Return the result of running the [=Clone AudioFrame=] algorithm with
+    2. Let |allocationSize| be the number of bytes allocated to hold this
+        {{AudioData}}'s [=media resource=], as described by the getter steps of
+        {{AudioData/allocationSize}}.
+    3. If |allocationSize| is greater than `destination.byteLength`, throw a
+        {{TypeError}}.
+    4. Let |resource| be the [=media resource=] referenced by
+        {{AudioData/[[resource reference]]}}.
+    5. Copy the bytes of |resource| into <var ignore=''>destination</var>.
+
+: <dfn method for=AudioData>clone()</dfn>
+:: Creates a new AudioData with a reference to the same [=media resource=].
+
+    When invoked, run these steps:
+    1. If the value of |frame|'s {{AudioData/[[detached]]}} internal slot is
+        `true`, throw an {{InvalidStateError}} {{DOMException}}.
+    2. Return the result of running the [=Clone AudioData=] algorithm with
         [=this=].
 
-: <dfn method for=AudioFrame>close()</dfn>
-:: Clears all state and releases the reference to the [=frame resource=].
+: <dfn method for=AudioData>close()</dfn>
+:: Clears all state and releases the reference to the [=media resource=].
     Close is final.
 
     When invoked, run these steps:
-    1. Assign `null` to {{AudioFrame/[[resource reference]]}}.
-    2. Assign `null` to {{AudioFrame/buffer}}.
-    3. Assign `true` to the {{AudioFrame/[[detached]]}} internal slot.
+    1. Assign `true` to the {{AudioData/[[detached]]}} internal slot.
+    2. Assign `null` to {{AudioData/[[resource reference]]}}.
 
-### Algorithms ### {#audioframe-algorithms}
+### Algorithms ### {#audiodata-algorithms}
 
-: <dfn>Clone AudioFrame</dfn> (with |frame|)
+: <dfn>Clone AudioData</dfn> (with |data|)
 :: Run these steps:
-    1. Let |clone| be a new {{AudioFrame}} initialized as follows:
-        1. Assign |frame|.{{AudioFrame/[[resource reference]]}} to
-            {{AudioFrame/[[resource reference]]}}.
-        2. Assign |frame|.timestamp to {{AudioFrame/timestamp}}.
-        3. Assign |frame|.buffer to {{AudioFrame/buffer}}.
+    1. Let |clone| be a new {{AudioData}} initialized as follows:
+        1. Let |resource| be the [=media resource=] refrenced by |data|'s
+            {{AudioData/[[resource reference]]}}.
+        2. Let |reference| be a new reference to |resource|.
+        3. Assign |reference| to {{AudioData/[[resource reference]]}}.
+        4. Assign the values of |data|'s {{AudioData/[[detached]]}},
+            {{AudioData/[[sample format]]}}, {{AudioData/[[sample rate]]}},
+            {{AudioData/[[number of frames]]}},
+            {{AudioData/[[number of channels]]}}, and
+            {{AudioData/[[timestamp]]}} slots to the corresponding slots in
+            |clone|.
     2. Return |clone|.
 
+Audio Sample Format{#audio-sample-format}
+-----------------------------------------
+Audo sample formats describe the numeric type used to represent a single
+sample (e.g. 32-bit floating point) and the arrangement of samples from
+different channels as either interleaved or planar.
+
+<xmp class='idl'>
+enum AudioSampleFormat {
+  "U8",
+  "S16",
+  "S32",
+  "FLT",
+  "S16P",
+  "S32P",
+  "FLTP",
+};
+</xmp>
+
+: <dfn enum-value for=AudioSampleFormat>U8</dfn>
+:: 8-bit unsigned integer samples with interleaved channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>S16</dfn>
+:: 16-bit signed integer samples with interleaved channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>S32</dfn>
+:: 32-bit signed integer samples with interleaved channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>FLT</dfn>
+:: 32-bit float samples with interleaved channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>S16P</dfn>
+:: 16-bit signed integer samples with planar channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>S32P</dfn>
+:: 32-bit signed integer samples with planar channel arrangement.
+
+: <dfn enum-value for=AudioSampleFormat>FLTP</dfn>
+:: 32-bit float samples with planar channel arrangement.
+
 VideoFrame Interface {#videoframe-interface}
 --------------------------------------------
 
@@ -2409,7 +2539,7 @@
   <dt><dfn>Clone Frame</dfn> (with |frame|)</dt>
   <dd>
     1. Let |cloneFrame| be a new object of the same type as frame (either
-        {{AudioFrame}} or {{VideoFrame}}).
+        {{AudioData}} or {{VideoFrame}}).
     2. Initialize each attribute and internal slot of clone with a copy of the
         value from the corresponding attribute of this frame.
 

From 5dd224a6fdb30e482ecf0fdc9c0a2a181d5514df Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Thu, 29 Apr 2021 22:09:31 -0700
Subject: [PATCH 7/9] Relax media resource lifetime to be 'at least as' long as
 its references

---
 index.src.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index.src.html b/index.src.html
index 727cb21c..0ceb78db 100644
--- a/index.src.html
+++ b/index.src.html
@@ -1851,8 +1851,8 @@
 will clear their [[resource reference]] slot, releasing the reference their
 [=media resource=]
 
-A [=media resource=] must remain alive as long as it continues to be referenced
-by a `[[resource reference]]`.
+A [=media resource=] must remain alive at least as long as it continues to be
+referenced by a `[[resource reference]]`.
 
 NOTE: When a [=media resource=] is no longer referenced by a
     `[[resource reference]]`, the resource may be destroyed. User agents are

From 59464f34e88a82cad82269295a06c460866540c5 Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Thu, 29 Apr 2021 22:56:11 -0700
Subject: [PATCH 8/9] Copy specific channel bytes in copyFromChannel()

---
 index.src.html | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/index.src.html b/index.src.html
index 0ceb78db..36eb52ca 100644
--- a/index.src.html
+++ b/index.src.html
@@ -1990,7 +1990,7 @@
 
 ### Methods ###{#audiodata-methods}
 : <dfn method for=AudioData>
-      copyFromChannel(destination, channelNumber)
+      copyFromChannel(|destination|, |channelNumber|)
     </dfn>
 :: Copies the samples from the specified channel of the {{AudioData}} to the
     destination buffer.
@@ -2005,7 +2005,9 @@
         {{TypeError}}.
     4. Let |resource| be the [=media resource=] referenced by
         {{AudioData/[[resource reference]]}}.
-    5. Copy the bytes of |resource| into <var ignore=''>destination</var>.
+    5. Let |channelBytes| be the region of |resource| corresponding to
+        |channelNumber|.
+    6. Copy the |channelBytes| into |destination|.
 
 : <dfn method for=AudioData>clone()</dfn>
 :: Creates a new AudioData with a reference to the same [=media resource=].

From 5b33d7f72e8b1677013d404c495b33ee0f526422 Mon Sep 17 00:00:00 2001
From: Chris Cunningham <chcunningham@chromium.org>
Date: Mon, 3 May 2021 21:14:00 -0700
Subject: [PATCH 9/9] Fix typos, rename copyFromChannel -> copyTo.

---
 index.src.html | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/index.src.html b/index.src.html
index 8db5bacf..2cfcba41 100644
--- a/index.src.html
+++ b/index.src.html
@@ -311,7 +311,7 @@
     4. Let |decoded outputs| be a [=list=] of decoded video data outputs emitted
         by {{AudioDecoder/[[codec implementation]]}}.
     5. If |decoded outputs| is not empty, queue a task on the [=control thread=]
-        event loop to run the [=Output AudioDatas=] algorithm with
+        event loop to run the [=Output AudioData=] algorithm with
         |decoded outputs|.
   </dd>
 
@@ -334,7 +334,7 @@
     2. Let |decoded outputs| be a [=list=] of decoded audio data outputs emitted
         by {{AudioDecoder/[[codec implementation]]}}.
     3. If |decoded outputs| is not empty, queue a task on the [=control thread=]
-        event loop to run the [=Output AudioDatas=] algorithm with
+        event loop to run the [=Output AudioData=] algorithm with
         |decoded outputs|.
     4. Queue a task on the [=control thread=] event loop to resolve |promise|.
   </dd>
@@ -388,7 +388,7 @@
 Algorithms {#audiodecoder-algorithms}
 -------------------------------------
 <dl>
-  <dt><dfn>Output AudioDatas</dfn> (with |outputs|)</dt>
+  <dt><dfn>Output AudioData</dfn> (with |outputs|)</dt>
   <dd>
     Run these steps:
     1. For each |output| in |outputs|:
@@ -1906,7 +1906,7 @@
   readonly attribute unsigned long long duration;
   readonly attribute unsigned long long timestamp;
 
-  undefined copyFromChannel(BufferSource destination, unsigned long channelNumber);
+  undefined copyTo([AllowShared] BufferSource destination, unsigned long planeNumber);
   AudioData clone();
   undefined close();
 };
@@ -2021,9 +2021,9 @@
 
 ### Methods ###{#audiodata-methods}
 : <dfn method for=AudioData>
-      copyFromChannel(|destination|, |channelNumber|)
+      copyTo(|destination|, |planeNumber|)
     </dfn>
-:: Copies the samples from the specified channel of the {{AudioData}} to the
+:: Copies the samples from the specified plane of the {{AudioData}} to the
     destination buffer.
 
     When invoked, run these steps:
@@ -2036,9 +2036,9 @@
         {{TypeError}}.
     4. Let |resource| be the [=media resource=] referenced by
         {{AudioData/[[resource reference]]}}.
-    5. Let |channelBytes| be the region of |resource| corresponding to
-        |channelNumber|.
-    6. Copy the |channelBytes| into |destination|.
+    5. Let |planeBytes| be the region of |resource| corresponding to
+        |planeNumber|.
+    6. Copy the |planeBytes| into |destination|.
 
 : <dfn method for=AudioData>clone()</dfn>
 :: Creates a new AudioData with a reference to the same [=media resource=].