Skip to content

gh-116622: Android logging fixes #122698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 6, 2024
128 changes: 100 additions & 28 deletions Lib/_android_support.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import io
import sys

from threading import RLock
from time import sleep, time

# The maximum length of a log message in bytes, including the level marker and
# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in
# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages
# longer than this will be be truncated by logcat. This limit has already been
# reduced at least once in the history of Android (from 4076 to 4068 between API
# level 23 and 26), so leave some headroom.
# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at
# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71.
# Messages longer than this will be be truncated by logcat. This limit has already
# been reduced at least once in the history of Android (from 4076 to 4068 between
# API level 23 and 26), so leave some headroom.
MAX_BYTES_PER_WRITE = 4000

# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this
# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE.
# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE.
# However, if the actual number of bytes per character is smaller than that,
# then TextIOWrapper may still join multiple consecutive text writes into binary
# then we may still join multiple consecutive text writes into binary
# writes containing a larger number of characters.
MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4

Expand All @@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio):
if sys.executable:
return # Not embedded in an app.

global logcat
logcat = Logcat(android_log_write)

sys.stdout = TextLogStream(
android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors)
stdout_prio, "python.stdout", errors=sys.stdout.errors)
sys.stderr = TextLogStream(
android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors)
stderr_prio, "python.stderr", errors=sys.stderr.errors)


class TextLogStream(io.TextIOWrapper):
def __init__(self, android_log_write, prio, tag, **kwargs):
def __init__(self, prio, tag, **kwargs):
kwargs.setdefault("encoding", "UTF-8")
kwargs.setdefault("line_buffering", True)
super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs)
self._CHUNK_SIZE = MAX_BYTES_PER_WRITE
super().__init__(BinaryLogStream(prio, tag), **kwargs)
self._lock = RLock()
self._pending_bytes = []
self._pending_bytes_count = 0

def __repr__(self):
return f"<TextLogStream {self.buffer.tag!r}>"
Expand All @@ -52,19 +57,48 @@ def write(self, s):
s = str.__str__(s)

# We want to emit one log message per line wherever possible, so split
# the string before sending it to the superclass. Note that
# "".splitlines() == [], so nothing will be logged for an empty string.
for line in s.splitlines(keepends=True):
while line:
super().write(line[:MAX_CHARS_PER_WRITE])
line = line[MAX_CHARS_PER_WRITE:]
# the string into lines first. Note that "".splitlines() == [], so
# nothing will be logged for an empty string.
with self._lock:
for line in s.splitlines(keepends=True):
while line:
chunk = line[:MAX_CHARS_PER_WRITE]
line = line[MAX_CHARS_PER_WRITE:]
self._write_chunk(chunk)

return len(s)

# The size and behavior of TextIOWrapper's buffer is not part of its public
# API, so we handle buffering ourselves to avoid truncation.
def _write_chunk(self, s):
b = s.encode(self.encoding, self.errors)
if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE:
self.flush()

self._pending_bytes.append(b)
self._pending_bytes_count += len(b)
if (
self.write_through
or b.endswith(b"\n")
or self._pending_bytes_count > MAX_BYTES_PER_WRITE
):
self.flush()

def flush(self):
with self._lock:
self.buffer.write(b"".join(self._pending_bytes))
self._pending_bytes.clear()
self._pending_bytes_count = 0

# Since this is a line-based logging system, line buffering cannot be turned
# off, i.e. a newline always causes a flush.
@property
def line_buffering(self):
return True


class BinaryLogStream(io.RawIOBase):
def __init__(self, android_log_write, prio, tag):
self.android_log_write = android_log_write
def __init__(self, prio, tag):
self.prio = prio
self.tag = tag

Expand All @@ -85,10 +119,48 @@ def write(self, b):

# Writing an empty string to the stream should have no effect.
if b:
# Encode null bytes using "modified UTF-8" to avoid truncating the
# message. This should not affect the return value, as the caller
# may be expecting it to match the length of the input.
self.android_log_write(self.prio, self.tag,
b.replace(b"\x00", b"\xc0\x80"))

logcat.write(self.prio, self.tag, b)
return len(b)


# When a large volume of data is written to logcat at once, e.g. when a test
# module fails in --verbose3 mode, there's a risk of overflowing logcat's own
# buffer and losing messages. We avoid this by imposing a rate limit using the
# token bucket algorithm, based on a conservative estimate of how fast `adb
# logcat` can consume data.
MAX_BYTES_PER_SECOND = 1024 * 1024

# The logcat buffer size of a device can be determined by running `logcat -g`.
# We set the token bucket size to half of the buffer size of our current minimum
# API level, because other things on the system will be producing messages as
# well.
BUCKET_SIZE = 128 * 1024

# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
PER_MESSAGE_OVERHEAD = 28


class Logcat:
def __init__(self, android_log_write):
self.android_log_write = android_log_write
self._lock = RLock()
self._bucket_level = 0
self._prev_write_time = time()

def write(self, prio, tag, message):
# Encode null bytes using "modified UTF-8" to avoid them truncating the
# message.
message = message.replace(b"\x00", b"\xc0\x80")

with self._lock:
now = time()
self._bucket_level += (
(now - self._prev_write_time) * MAX_BYTES_PER_SECOND)
self._bucket_level = min(self._bucket_level, BUCKET_SIZE)
self._prev_write_time = now

self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message)
if self._bucket_level < 0:
sleep(-self._bucket_level / MAX_BYTES_PER_SECOND)

self.android_log_write(prio, tag, message)
132 changes: 112 additions & 20 deletions Lib/test/test_android.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import io
import platform
import queue
import re
import subprocess
import sys
import unittest
from _android_support import TextLogStream
from array import array
from contextlib import contextmanager
from contextlib import ExitStack, contextmanager
from threading import Thread
from test.support import LOOPBACK_TIMEOUT
from time import time
from time import sleep, time
from unittest.mock import patch


if sys.platform != "android":
Expand Down Expand Up @@ -81,18 +84,39 @@ def unbuffered(self, stream):
finally:
stream.reconfigure(write_through=False)

# In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't
# test them directly. Detect this mode and use some temporary streams with
# the same properties.
def stream_context(self, stream_name, level):
# https://developer.android.com/ndk/reference/group/logging
prio = {"I": 4, "W": 5}[level]

stack = ExitStack()
stack.enter_context(self.subTest(stream_name))
stream = getattr(sys, stream_name)
if isinstance(stream, io.StringIO):
stack.enter_context(
patch(
f"sys.{stream_name}",
TextLogStream(
prio, f"python.{stream_name}", errors="backslashreplace"
),
)
)
return stack

def test_str(self):
for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
with self.subTest(stream=stream_name):
with self.stream_context(stream_name, level):
stream = getattr(sys, stream_name)
tag = f"python.{stream_name}"
self.assertEqual(f"<TextLogStream '{tag}'>", repr(stream))

self.assertTrue(stream.writable())
self.assertFalse(stream.readable())
self.assertIs(stream.writable(), True)
self.assertIs(stream.readable(), False)
self.assertEqual("UTF-8", stream.encoding)
self.assertTrue(stream.line_buffering)
self.assertFalse(stream.write_through)
self.assertIs(stream.line_buffering, True)
self.assertIs(stream.write_through, False)

# stderr is backslashreplace by default; stdout is configured
# that way by libregrtest.main.
Expand Down Expand Up @@ -147,6 +171,13 @@ def write(s, lines=None, *, write_len=None):
write("f\n\ng", ["exxf", ""])
write("\n", ["g"])

# Since this is a line-based logging system, line buffering
# cannot be turned off, i.e. a newline always causes a flush.
stream.reconfigure(line_buffering=False)
self.assertIs(stream.line_buffering, True)

# However, buffering can be turned off completely if you want a
# flush after every write.
with self.unbuffered(stream):
write("\nx", ["", "x"])
write("\na\n", ["", "a"])
Expand Down Expand Up @@ -209,30 +240,30 @@ def __str__(self):
# (MAX_BYTES_PER_WRITE).
#
# ASCII (1 byte per character)
write(("foobar" * 700) + "\n",
[("foobar" * 666) + "foob", # 4000 bytes
"ar" + ("foobar" * 33)]) # 200 bytes
write(("foobar" * 700) + "\n", # 4200 bytes in
[("foobar" * 666) + "foob", # 4000 bytes out
"ar" + ("foobar" * 33)]) # 200 bytes out

# "Full-width" digits 0-9 (3 bytes per character)
s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19"
write((s * 150) + "\n",
[s * 100, # 3000 bytes
s * 50]) # 1500 bytes
write((s * 150) + "\n", # 4500 bytes in
[s * 100, # 3000 bytes out
s * 50]) # 1500 bytes out

s = "0123456789"
write(s * 200, [])
write(s * 150, [])
write(s * 51, [s * 350]) # 3500 bytes
write("\n", [s * 51]) # 510 bytes
write(s * 200, []) # 2000 bytes in
write(s * 150, []) # 1500 bytes in
write(s * 51, [s * 350]) # 510 bytes in, 3500 bytes out
write("\n", [s * 51]) # 0 bytes in, 510 bytes out

def test_bytes(self):
for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
with self.subTest(stream=stream_name):
with self.stream_context(stream_name, level):
stream = getattr(sys, stream_name).buffer
tag = f"python.{stream_name}"
self.assertEqual(f"<BinaryLogStream '{tag}'>", repr(stream))
self.assertTrue(stream.writable())
self.assertFalse(stream.readable())
self.assertIs(stream.writable(), True)
self.assertIs(stream.readable(), False)

def write(b, lines=None, *, write_len=None):
if write_len is None:
Expand Down Expand Up @@ -330,3 +361,64 @@ def write(b, lines=None, *, write_len=None):
fr"{type(obj).__name__}"
):
stream.write(obj)

def test_rate_limit(self):
# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
PER_MESSAGE_OVERHEAD = 28

# https://developer.android.com/ndk/reference/group/logging
ANDROID_LOG_DEBUG = 3

# To avoid flooding the test script output, use a different tag rather
# than stdout or stderr.
tag = "python.rate_limit"
stream = TextLogStream(ANDROID_LOG_DEBUG, tag)

# Make a test message which consumes 1 KB of the logcat buffer.
message = "Line {:03d} "
message += "." * (
1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0))
) + "\n"

# See _android_support.py. The default values of these parameters work
# well across a wide range of devices, but we'll use smaller values to
# ensure a quick and reliable test that doesn't flood the log too much.
MAX_KB_PER_SECOND = 100
BUCKET_KB = 10
with (
patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024),
patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024),
):
# Make sure the token bucket is full.
sleep(BUCKET_KB / MAX_KB_PER_SECOND)
line_num = 0

# Write BUCKET_KB messages, and return the rate at which they were
# accepted in KB per second.
def write_bucketful():
nonlocal line_num
start = time()
max_line_num = line_num + BUCKET_KB
while line_num < max_line_num:
stream.write(message.format(line_num))
line_num += 1
return BUCKET_KB / (time() - start)

# The first bucketful should be written with minimal delay. The
# factor of 2 here is not arbitrary: it verifies that the system can
# write fast enough to empty the bucket within two bucketfuls, which
# the next part of the test depends on.
self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)

# Write another bucketful to empty the token bucket completely.
write_bucketful()

# The next bucketful should be written at the rate limit.
self.assertAlmostEqual(
write_bucketful(), MAX_KB_PER_SECOND,
delta=MAX_KB_PER_SECOND * 0.1
)

# Once the token bucket refills, we should go back to full speed.
sleep(BUCKET_KB / MAX_KB_PER_SECOND)
self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix Android stdout and stderr messages being truncated or lost.
Loading