diff --git a/.gitignore b/.gitignore
index 10b09df..289adb5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ uniteai.egg-info/
 test.md
 test.txt
 *.log
+debug_transcription.wav
 
 # VSCode
 .vscode/
diff --git a/Makefile b/Makefile
index 59be611..72ad802 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ watch-tests:
 			pytest --capture=no; \
 		done
 
-upload:
+publish_pypi:
 	rm -r dist
 	python -m build
 	python -m twine upload dist/*
diff --git a/clients/vscode/package.json b/clients/vscode/package.json
index d5cecad..498c32c 100644
--- a/clients/vscode/package.json
+++ b/clients/vscode/package.json
@@ -3,7 +3,7 @@
 	"description": "Use AI in your Editor.",
 	"author": "uniteai",
 	"license": "Apache-2.0",
-	"version": "0.1.11",
+	"version": "0.1.12",
     "icon": "icon.jpeg",
 	"repository": {
 		"type": "git",
diff --git a/clients/vscode/uniteai-0.1.11.vsix b/clients/vscode/uniteai-0.1.11.vsix
deleted file mode 100644
index 1bc1118..0000000
Binary files a/clients/vscode/uniteai-0.1.11.vsix and /dev/null differ
diff --git a/clients/vscode/uniteai-0.1.12.vsix b/clients/vscode/uniteai-0.1.12.vsix
new file mode 100644
index 0000000..e9f7591
Binary files /dev/null and b/clients/vscode/uniteai-0.1.12.vsix differ
diff --git a/pyproject.toml b/pyproject.toml
index 0ada283..260eda8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "uniteai"
-version = "0.1.9"
+version = "0.1.10"
 description = "AI, Inside your Editor."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/todo/021_efficient_realtime_transcription.md b/todo/021_efficient_realtime_transcription.md
new file mode 100644
index 0000000..819d8af
--- /dev/null
+++ b/todo/021_efficient_realtime_transcription.md
@@ -0,0 +1,12 @@
+# 021: Efficient Realtime Transcription
+
+As of recent commits, during a transcription window, the entire audio is saved in memory, and the whole thing is repeatedly transcribed. Inefficient.
+
+
+## Options:
+
+* Freeze transcription of earlier portions, and only re-recognize the latest portions. Perhaps a sliding window would work, but then the window must overlay with previous windows so that, eg, words aren't cut in half, and there will be some effort needed to properly align the transcribed text with the audio. This seems like a huge ergonomic improvement, but perhaps technically tough.
+
+* Check the rms energy level of audio chunks to find the start/stop of phrases, and cut out silence
+
+* Cut out noise? Or perhaps `whisper` was trained on enough noisy data that it already deals well with it, and this would be a significant inefficiency.
diff --git a/todo/CANCELLED 009_add_emacs_marker_for_transcription.md b/todo/CANCELLED 009_add_emacs_marker_for_transcription.md
deleted file mode 100644
index 84b7c09..0000000
--- a/todo/CANCELLED 009_add_emacs_marker_for_transcription.md	
+++ /dev/null
@@ -1,71 +0,0 @@
-# 009: Add Emacs Marker for Transcription :CANCELLED:
-
-- NOTES: This is cancelled because of how `002_newline_management` was solved, namely, giving the LSP a tagged block that it gets to control.
-
-The emacs marker can keep track of what point should be written to.
-
-## An example of dealing with a marker
-
-```elisp
-;;;;;;;;;;
-;; Marker Test
-
-(defvar-local my-global-marker nil)
-
-(defun my-initialize-marker ()
-  "Set the global marker to the beginning of the buffer."
-  (setq my-global-marker (point-min-marker)))
-
-(add-hook 'find-file-hook 'my-initialize-marker)
-
-(defvar-local my-marker-overlay nil
-  "Overlay for the marker set by `marker-set-command'.")
-
-(defun get-marker-column (marker)
-  "Get column number of a marker"
-  (save-excursion
-    (goto-char marker)
-    (current-column)))
-
-(defun marker-update-command()
-  (interactive)
-  ;; Report marker
-  (let* ((doc (eglot--TextDocumentIdentifier))
-         (line (line-number-at-pos my-global-marker))
-         (character (get-marker-column my-global-marker))
-         (params `(:emacsMarker (:line ,line :character ,character))))
-    (eglot-execute-command (eglot--current-server-or-lose) 'command.markerSet (vector doc params)))
-
-  ;; Remove the old overlay, if any
-  (when (overlayp my-marker-overlay)
-    (delete-overlay my-marker-overlay))
-
-  ;; Create a new overlay at the marker's position
-  (let ((marker-pos (marker-position my-global-marker)))
-    (setq my-marker-overlay (make-overlay marker-pos (1+ marker-pos)))
-    (overlay-put my-marker-overlay 'face 'highlight))
-  )
-
-(defun marker-set-command ()
-  "Send an Emacs marker to the LSP server."
-  (interactive)
-  (setq my-global-marker (point-marker))
-  (marker-update-command))
-
-(defun marker-get-command ()
-  "Get the Emacs marker from the LSP server."
-  (interactive)
-  (let* ((doc (eglot--TextDocumentIdentifier))
-         (marker my-global-marker)
-         (line (line-number-at-pos marker))
-         (character (current-column))
-         (params `(:emacsMarker (:line ,line :character ,character))))
-    (eglot-execute-command (eglot--current-server-or-lose) 'command.markerGet (vector doc params))))
-
-(defun my-after-change-function (begin end length)
-  "Call `marker-set-command' if the current buffer is managed by Eglot."
-  (when (bound-and-true-p eglot--managed-mode)
-    (marker-update-command)))
-
-(add-hook 'after-change-functions #'my-after-change-function)
-```
diff --git a/todo/CANCELLED 010_add_emacs_marker_for_llm.md b/todo/CANCELLED 010_add_emacs_marker_for_llm.md
deleted file mode 100644
index 925d1fc..0000000
--- a/todo/CANCELLED 010_add_emacs_marker_for_llm.md	
+++ /dev/null
@@ -1,7 +0,0 @@
-# 010: Add Emacs Marker for LLM  :CANCELLED:
-
-
-NOTES: This is cancelled because of how `002_newline_management` was solved, namely, giving the LSP a tagged block that it gets to control.
-
-
-The emacs marker can keep track of what point should be written to.
diff --git a/todo/019_realtime_transcription.md b/todo/DONE_019_realtime_transcription.md
similarity index 69%
rename from todo/019_realtime_transcription.md
rename to todo/DONE_019_realtime_transcription.md
index c3fd3bc..750b292 100644
--- a/todo/019_realtime_transcription.md
+++ b/todo/DONE_019_realtime_transcription.md
@@ -3,3 +3,10 @@
 * Is there a library?
 
 * If not, what if we fired off multiple threads to listen at different time-scales, and combine the results? For instance a short timeout could catch every 1 second of audio, and optimistically transcribe that, but then when the longterm timescale listening thread returns, a transcription will likely yield a better result, so we can override previous misses. These audio chunks can be thrown in the same queue, tagged, and we can drain short-timescale chunks off the queue if there's a more recent long-timescale chunk.
+
+
+RESULT:
+
+I've opted for recording the entire audio stream, and not doing processing before `recognize`.
+
+There are definite efficiency gains to still be had, so I'll make a new ticket, but this works well enough for short transcription runs for now.
diff --git a/todo/020_fix_vscode_client.md b/todo/DONE_020_fix_vscode_client.md
similarity index 100%
rename from todo/020_fix_vscode_client.md
rename to todo/DONE_020_fix_vscode_client.md
diff --git a/uniteai/common.py b/uniteai/common.py
index 7e0f799..a76e579 100644
--- a/uniteai/common.py
+++ b/uniteai/common.py
@@ -43,27 +43,6 @@ def mk_logger(name, level):
     return logger
 
 
-
-##################################################
-
-class ThreadSafeCounter:
-    '''
-    A threadsafe incrementable integer.
-    '''
-
-    def __init__(self):
-        self.value = 0
-        self._lock = Lock()
-
-    def increment(self):
-        with self._lock:
-            self.value += 1
-            return self.value
-
-    def get(self):
-        return self.value
-
-
 ##################################################
 # Dict helpers
 
diff --git a/uniteai/transcription.py b/uniteai/transcription.py
index 97e8156..dbc24a2 100644
--- a/uniteai/transcription.py
+++ b/uniteai/transcription.py
@@ -6,54 +6,37 @@
 '''
 
 from thespian.actors import Actor
-from typing import List
-import pygls
 from pygls.server import LanguageServer
 from lsprotocol.types import (
-    ApplyWorkspaceEditParams,
     CodeAction,
     CodeActionKind,
     CodeActionParams,
     Command,
     Position,
-    Range,
     TextDocumentIdentifier,
-    VersionedTextDocumentIdentifier,
-    TextEdit,
-    WorkspaceEdit,
-    DidChangeTextDocumentParams,
 )
-import sys
 import logging
-from pygls.protocol import default_converter
-import requests
-import json
-from concurrent.futures import ThreadPoolExecutor
-import openai
-import yaml
-from threading import Thread, Lock, Event
+from threading import Thread, Event
 from queue import Queue, Empty
 import speech_recognition as sr
 import re
 import numpy as np
 import time
-from dataclasses import dataclass
-from typing import List, Tuple
-import re
-import itertools
 import argparse
+import threading
+from functools import partial
 
-from uniteai.common import ThreadSafeCounter, mk_logger, find_block, get_nested
+from uniteai.common import mk_logger, find_block, get_nested
 from uniteai.edit import BlockJob, cleanup_block, init_block
 
-
 START_TAG = ':START_TRANSCRIPTION:'
 END_TAG = ':END_TRANSCRIPTION:'
 NAME = 'transcription'
 
 # A custom logger for just this feature. You can tune the log level to turn
 # on/off just this feature's logs.
-log = mk_logger(NAME, logging.DEBUG)
+log_level = logging.DEBUG
+log = mk_logger(NAME, log_level)
 
 
 ##################################################
@@ -67,22 +50,17 @@ def __init__(self,
                  model_path,
                  model_size,
                  volume_threshold):
-
         self.model_type = model_type
         self.model_path = model_path
         self.model_size = model_size
 
         # Recognizer
         self.r = sr.Recognizer()
+        self.mic = sr.Microphone()
+        self.sample_rate = None
+        self.sample_width = None
         self.r.energy_threshold = volume_threshold
         self.r.dynamic_energy_threshold = False
-        self.audio_queue = Queue()
-
-        # Keep track of the iteration when a thread was started. That way, if
-        # it had a blocking operation (like `r.listen`) that should have been
-        # terminated, but couldn't because the thread was blocked, well, now we
-        # can deprecate that thread.
-        self.transcription_counter = ThreadSafeCounter()
 
     def recognize(self, audio):
         log.debug(f'MODEL_TYPE: {self.model_type}')
@@ -106,106 +84,156 @@ def recognize(self, audio):
 
     def _warmup(self):
         ''' Warm up, intended for a separate thread. '''
-        empty_audio = sr.AudioData(np.zeros(10), sample_rate=1, sample_width=1)
-        self.recognize(empty_audio)
-        logging.info('Warmed up transcription model')
 
-        # TODO: Transcription needs to be tuned better to deal with ambient
-        # noise, and appropriate volume levels
-        #
-        logging.info('Adjusting thresholds for ambient noise')
-        with sr.Microphone() as source:
-            self.r.adjust_for_ambient_noise(source)
+        # Get some mic params
+        with self.mic as source:
+            self.sample_rate = source.SAMPLE_RATE
+            self.sample_width = source.SAMPLE_WIDTH
 
+        # Get model into memory
+        empty_audio = sr.AudioData(np.zeros(10), sample_rate=1, sample_width=1)
+        self.recognize(empty_audio)
+        log.info(f'Warmed up. sample_rate={self.sample_rate}, sample_width={self.sample_width}')
 
     def warmup(self):
         '''Load whisper model into memory.'''
-        logging.info('Warming up whisper in separate thread')
+        log.info('Warming up transcription model in separate thread')
         warmup_thread = Thread(target=self._warmup)
         warmup_thread.daemon = True
         warmup_thread.start()
 
-    def listen(self, should_stop):
-        def callback(r, audio):
-            log.debug('LISTENING CALLBACK called')
-            self.audio_queue.put(audio, block=False)
-        stop_listening_fn = self.r.listen_in_background(
-            sr.Microphone(),
-            callback
-        )
-        return stop_listening_fn
-
-    def transcription_worker(self, uri, edits, should_stop,
-                             transcription_worker_is_running):
-        transcription_worker_is_running.set()
-        running_transcription = ""
+    def listen_(self,
+                queue: Queue,
+                should_stop: Event):
+        with sr.Microphone() as s:
+            while not should_stop.is_set():
+                buf = s.stream.read(s.CHUNK)
+                queue.put(buf)
+
+    def transcription_(self,
+                       audio_queue,
+                       transcription_callback,
+                       finished_callback,
+                       should_stop):
+        audios = []
         while not should_stop.is_set():
             try:
                 # non-blocking, to more frequently allow the
                 # `stop_transcription` signal to end this thread.
-                audio = self.audio_queue.get(False)
+                buffer = audio_queue.get(False)
+
+                # TODO: can we more intelligently separate silence from speech?
+                # energy = audioop.rms(buffer, self.sample_width)
+                audios.append(buffer)
+                try:
+                    while True:
+                        buffer = audio_queue.get(False)
+                        audios.append(buffer)
+                except Empty:
+                    pass
+
+                log.debug(f'len audio: {len(audios)}')
+
             except Empty:
                 time.sleep(0.2)
                 continue
 
             try:
+                audio = sr.audio.AudioData(
+                    b''.join(audios),
+                    self.sample_rate,
+                    self.sample_width
+                )
+                # Debug audio: The audio gets sliced up regularly, how does it
+                #              sound when stitched back?
+                if log_level == logging.DEBUG:
+                    with open("debug_transcription.wav", "wb") as output_file:
+                        output_file.write(audio.get_wav_data())
+
+                # breakout if needed
+                if should_stop.is_set():
+                    break
+
+                # Speech-to-text
                 x = self.recognize(audio)
+
+                # Nothing recognized
                 if not x:
                     continue
 
                 x = x.strip()
-                log.debug(f'TRANSCRIPTION: {x}')
                 if filter_out(x):
                     continue
 
-                # Add space to respect next loop of transcription
-                running_transcription += x + ' '
-                job = BlockJob(
-                    uri=uri,
-                    start_tag=START_TAG,
-                    end_tag=END_TAG,
-                    text=f'\n{running_transcription}\n',
-                    strict=False,
-                )
-                edits.add_job(NAME, job)
+                # breakout if needed
+                if should_stop.is_set():
+                    break
+
+                transcription_callback(x)
+
             except sr.UnknownValueError:
                 log.debug("ERROR: could not understand audio")
-            self.audio_queue.task_done()
+            audio_queue.task_done()
 
-        cleanup_block(NAME, [START_TAG, END_TAG], uri, edits)
-        transcription_worker_is_running.clear()
+        finished_callback()
         log.debug('DONE TRANSCRIBING')
 
+    def go(self,
+           transcription_callback,
+           finished_callback):
+        audio_queue = Queue()
+        should_stop = Event()
+
+        # Listener Thread
+        l_thread = threading.Thread(
+            target=self.listen_,
+            args=(audio_queue, should_stop))
+        l_thread.daemon = True
+        l_thread.start()
+
+        # Transcription Thread
+        t_thread = threading.Thread(
+            target=self.transcription_,
+            args=(audio_queue,
+                  transcription_callback,
+                  finished_callback,
+                  should_stop))
+        t_thread.daemon = True
+        t_thread.start()
+
+        def stop_fn():
+            log.debug('stop_fn called')
+            should_stop.set()
+            l_thread.join()
+            t_thread.join()
+
+        return stop_fn
+
 
 ##################################################
 # Actor
 
 class TranscriptionActor(Actor):
     def __init__(self):
-        self.transcription_worker_is_running = Event()
-        self.should_stop = Event()
+        self.is_running = Event()
         self.tags = [START_TAG, END_TAG]
         self.speech_recognition = None  # set during initialization
-        self.executor = ThreadPoolExecutor(max_workers=5)
-        self.transcription_thread_future = None
 
         # set during set_config/start
         self.model_path = None
         self.model_size = None
         self.volume_threshold = None
-        self.stop_listening_fn = lambda x,y: None
+        self.stop_fn = lambda: None
 
     def receiveMessage(self, msg, sender):
         command = msg.get('command')
         edits = msg.get('edits')
-        tw_set = self.transcription_worker_is_running.is_set()
+        tw_set = self.is_running.is_set()
         log.debug(f'''
 %%%%%%%%%%
 ACTOR RECV: {msg["command"]}
 ACTOR STATE:
 transcription_worker is running={tw_set}
-should_stop: {self.should_stop.is_set()}
-transcription_thread_future: {self.transcription_thread_future}
 
 EDITS STATE:
 job_thread alive: {edits.job_thread.is_alive() if edits and edits.job_thread else "NOT STARTED"}
@@ -264,43 +292,40 @@ def receiveMessage(self, msg, sender):
             # load the model into GPU
             self.speech_recognition.warmup()
 
-    def start(self, uri, cursor_pos, edits):
-        tw_set = self.transcription_worker_is_running.is_set()
-        if tw_set:
-            log.info(f'WARN: ON_START_BUT_RUNNING. '
-                     f'transcription_worker is running={tw_set}')
-            return
-        log.debug('ACTOR START')
-        self.should_stop.clear()
-
-        # Audio Listener
-        self.stop_listening_fn = self.speech_recognition.listen(self.should_stop)
-
-        # Transcriber
-        self.transcription_thread_future = self.executor.submit(
-            self.speech_recognition.transcription_worker,
-            uri, edits, self.should_stop, self.transcription_worker_is_running)
+    def transcription_callback(self, edits, uri, text):
+        # Add space to respect next loop of transcription
+        log.debug(f'TRANSCRIBED: {text}')
+        job = BlockJob(
+            uri=uri,
+            start_tag=START_TAG,
+            end_tag=END_TAG,
+            text=f'\n{text}\n',
+            strict=False,
+        )
+        edits.add_job(NAME, job)
 
+    def finished_callback(self, edits, uri):
+        log.debug(f'FINISHED CALLBACK: {uri}')
+        cleanup_block(NAME, [START_TAG, END_TAG], uri, edits)
+
+    def start(self, uri, cursor_pos, edits):
+        if self.is_running.is_set():
+            log.info('WARN: ON_START_BUT_RUNNING.')
+            return False
+        self.stop_fn = self.speech_recognition.go(
+            partial(self.transcription_callback, edits, uri),
+            partial(self.finished_callback, edits, uri))
+        self.is_running.set()
         log.debug('START CAN RETURN')
 
     def stop(self):
         log.debug('ACTOR STOP')
-        tw_set = self.transcription_worker_is_running.is_set()
-        if not tw_set:
-            log.info('WARN: ON_STOP_BUT_STOPPED'
-                     f'transcription_worker is running={tw_set}')
+        if not self.is_running.is_set():
+            log.info('WARN: ON_STOP_BUT_STOPPED')
             return False
-
-        self.should_stop.set()
-        self.stop_listening_fn(wait_for_stop=False)
-
-        if self.transcription_thread_future:
-            log.debug('Waiting for audio `transcription_thread_future` to terminate')
-            self.transcription_thread_future.result()  # block, wait to finish
-            self.transcription_thread_future = None  # reset
-
-        self.should_stop.clear()
-        self.stop_listening_fn = lambda x,y: None
+        self.stop_fn()
+        self.is_running.clear()
+        self.stop_fn = lambda: None
         log.debug('FINALLY STOPPED')
 
 
@@ -327,8 +352,6 @@ def filter_alphanum(x: str) -> str:
 
 def filter_out(x: str) -> bool:
     x = filter_alphanum(x)
-    # if len(x) < 4:  # weed out short utterances
-    #     return True
     return x.strip().lower() in filter_list