Skip to content

Commit

Permalink
More android tuning for PowerVR
Browse files Browse the repository at this point in the history
* Rearrange the GLSL in unmultiply() to a configuration that doesn't hang on PowerVR, or cause a compile failure with no message.

* Make sure to always either update or preserve the clip & coverage buffers, even if ENABLE_CLIPPING isn't defined for the current draw.

* Don't use GL_KHR_debug on PowerVR. It can cause shader compiles to take 30+ seconds.

* Support 32-bit android arch in check_golds.sh & deploy_tests.py.

* Reverse histogram sorting order so bad matches come up first.

* Simple bugfixes.

Diffs=
3c322193bf More android tuning for PowerVR (#8747)

Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
  • Loading branch information
csmartdalton and csmartdalton committed Dec 14, 2024
1 parent dcbab79 commit da575e1
Show file tree
Hide file tree
Showing 14 changed files with 68 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .rive_head
Original file line number Diff line number Diff line change
@@ -1 +1 @@
32636f7ddee24a23414d6a288de72179c19e9500
3c322193bf69e92663ae4346cf16396ef1578239
4 changes: 0 additions & 4 deletions renderer/src/gl/pls_impl_ext_native.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ class PLSLoadStoreProgram
std::ostringstream glsl;
glsl << "#version 300 es\n";
glsl << "#define " GLSL_FRAGMENT "\n";
if (combinedShaderFeatures & gpu::ShaderFeatures::ENABLE_CLIPPING)
{
glsl << "#define " GLSL_ENABLE_CLIPPING "\n";
}
BuildLoadStoreEXTGLSL(glsl, actions);
GLuint fragmentShader =
glutils::CompileRawGLSL(GL_FRAGMENT_SHADER, glsl.str().c_str());
Expand Down
8 changes: 4 additions & 4 deletions renderer/src/shaders/common.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,13 @@ INLINE half4 unmultiply(half4 color)
{
if (.0 < color.a && color.a < 1.)
{
float inverseAlpha = 1. / color.a;
color.rgb *= 1. / color.a;
// Since multiplying by the reciprocal isn't exact, and to handle
// invalid premultiplied data, take extra steps to ensure
// color * 1/alpha == 1 when color >= alpha.
color.rgb = mix(make_half3(1.),
color.rgb * inverseAlpha,
lessThan(color.rgb, make_half3(color.a)));
color.rgb = mix(color.rgb,
make_half3(1.),
greaterThan(color.rgb, make_half3(254.5 / 255.)));
}
return color;
}
Expand Down
3 changes: 1 addition & 2 deletions renderer/src/shaders/draw_image_mesh.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,8 @@ PLS_MAIN_WITH_IMAGE_UNIFORMS(@drawFragmentMain)
}

PLS_STORE4F(colorBuffer, color);
#ifdef @ENABLE_CLIPPING
PLS_PRESERVE_UI(clipBuffer);
#endif
PLS_PRESERVE_UI(coverageCountBuffer);

PLS_INTERLOCK_END;

Expand Down
3 changes: 2 additions & 1 deletion renderer/src/shaders/draw_path.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ PLS_MAIN(@drawFragmentMain)

#ifdef @DRAW_INTERIOR_TRIANGLES
coverageCount += v_windingWeight;
PLS_PRESERVE_UI(coverageCountBuffer);
#else
if (v_edgeDistance.y >= .0) // Stroke.
coverageCount =
Expand Down Expand Up @@ -456,7 +457,6 @@ PLS_MAIN(@drawFragmentMain)
? min(clipData.r, coverage)
: make_half(.0);
}
PLS_PRESERVE_UI(clipBuffer);
}
#endif
#ifdef @ENABLE_CLIP_RECT
Expand Down Expand Up @@ -509,6 +509,7 @@ PLS_MAIN(@drawFragmentMain)
}

PLS_STORE4F(colorBuffer, color);
PLS_PRESERVE_UI(clipBuffer);
}

#ifndef @DRAW_INTERIOR_TRIANGLES
Expand Down
4 changes: 2 additions & 2 deletions renderer/src/shaders/glsl.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@
#define PLS_STORE4F(PLANE, VALUE) PLANE = (VALUE)
#define PLS_STOREUI(PLANE, VALUE) PLANE = (VALUE)

#define PLS_PRESERVE_4F(PLANE)
#define PLS_PRESERVE_UI(PLANE)
#define PLS_PRESERVE_4F(PLANE) PLANE = PLANE
#define PLS_PRESERVE_UI(PLANE) PLANE = PLANE

#define PLS_INTERLOCK_BEGIN
#define PLS_INTERLOCK_END
Expand Down
1 change: 1 addition & 0 deletions renderer/src/shaders/hlsl.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ INLINE uint pls_atomic_add(PLS_TEX2D<uint> plane, int2 _plsCoord, uint x)
#define notEqual(A, B) ((A) != (B))
#define lessThanEqual(A, B) ((A) <= (B))
#define lessThan(A, B) ((A) < (B))
#define greaterThan(A, B) ((A) > (B))
#define greaterThanEqual(A, B) ((A) >= (B))

// HLSL matrices are stored in row-major order, and therefore transposed from
Expand Down
1 change: 1 addition & 0 deletions renderer/src/shaders/metal.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#define notEqual(A, B) ((A) != (B))
#define lessThanEqual(A, B) ((A) <= (B))
#define lessThan(A, B) ((A) < (B))
#define greaterThan(A, B) ((A) > (B))
#define greaterThanEqual(A, B) ((A) >= (B))
#define MUL(A, B) ((A) * (B))
#define atan $atan2
Expand Down
1 change: 1 addition & 0 deletions renderer/src/shaders/rhi.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ INLINE uint pls_atomic_add(PLS_TEX2D<uint> plane, int2 _plsCoord, uint x)
#define notEqual(A, B) ((A) != (B))
#define lessThanEqual(A, B) ((A) <= (B))
#define lessThan(A, B) ((A) < (B))
#define greaterThan(A, B) ((A) > (B))
#define greaterThanEqual(A, B) ((A) >= (B))

// HLSL matrices are stored in row-major order, and therefore transposed from
Expand Down
24 changes: 13 additions & 11 deletions tests/check_golds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ elif [[ "$OSTYPE" == "msys" ]]; then
else
DEFAULT_BACKEND=gl
fi
ARGS=

NUMBER_OF_PROCESSORS="${NUMBER_OF_PROCESSORS:-$(nproc 2>/dev/null || sysctl -n hw.physicalcpu)}"
if [[ $NUMBER_OF_PROCESSORS > 20 ]]; then
ARGS=-j6
else
ARGS=-j4
fi

while :; do
case $1 in
Expand All @@ -29,10 +35,13 @@ while :; do
UDID="$(xcrun simctl list devices | grep '(Booted)' | sed 's/^[^(]*(\([A-Z0-9\-]*\)) (Booted).*$/\1/')"
shift
;;
-a)
-a|-a32)
TARGET="android"
DEFAULT_BACKEND=gl
SERIAL="$(adb get-serialno)"
if [[ "$1" == "-a32" ]]; then
ARGS="--android-arch arm"
fi
shift
;;
-R)
Expand Down Expand Up @@ -85,23 +94,16 @@ do
ID="android_$SERIAL/$BACKEND"
fi

NUMBER_OF_PROCESSORS="${NUMBER_OF_PROCESSORS:-$(nproc 2>/dev/null || sysctl -n hw.physicalcpu)}"
if [[ $NUMBER_OF_PROCESSORS > 20 ]]; then
GOLDEN_JOBS=6
else
GOLDEN_JOBS=4
fi

if [ "$REBASELINE" == true ]; then
echo
echo "Rebaselining $ID..."
rm -fr .gold/$ID
python3 deploy_tests.py gms goldens -j$GOLDEN_JOBS $ARGS --target=$TARGET --outdir=.gold/$ID --backend=$BACKEND $NO_REBUILD
python3 deploy_tests.py gms goldens $ARGS --target=$TARGET --outdir=.gold/$ID --backend=$BACKEND $NO_REBUILD
else
echo
echo "Checking $ID..."
rm -fr .gold/candidates/$ID
python3 deploy_tests.py gms goldens -j$GOLDEN_JOBS $ARGS --target=$TARGET --outdir=.gold/candidates/$ID --backend=$BACKEND $NO_REBUILD
python3 deploy_tests.py gms goldens $ARGS --target=$TARGET --outdir=.gold/candidates/$ID --backend=$BACKEND $NO_REBUILD

echo
echo "Checking $ID..."
Expand Down
9 changes: 7 additions & 2 deletions tests/common/testing_window_egl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,11 @@ class TestingWindowEGL : public TestingWindow
}
#endif

auto rendererStr =
reinterpret_cast<const char*>(glGetString(GL_RENDERER));
printf("==== EGL GPU: OpenGL %s; %s; %s ====\n",
glGetString(GL_VENDOR),
glGetString(GL_RENDERER),
rendererStr,
glGetString(GL_VERSION));

int extensionCount;
Expand All @@ -431,7 +433,10 @@ class TestingWindowEGL : public TestingWindow
{
auto* ext =
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
if (strcmp(ext, "GL_KHR_debug") == 0)
if (strcmp(ext, "GL_KHR_debug") == 0 &&
// Our shader compiles can take over 30 SECONDS on PowerVR when
// debug output is enabled. Just don't use it.
!strstr(rendererStr, "PowerVR"))
{
glEnable(GL_DEBUG_OUTPUT_KHR);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_KHR);
Expand Down
47 changes: 30 additions & 17 deletions tests/deploy_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@
default="host",
choices=["host", "android", "ios", "iossim", "unreal"],
help="which platform to run on")
parser.add_argument("-a", "--android-arch",
default="arm64",
choices=["arm", "arm64"])
parser.add_argument("-u", "--ios_udid",
type=str,
default=None,
Expand Down Expand Up @@ -467,7 +470,7 @@ def main():
if args.target == "android":
args.jobs_per_tool = 1 # Android can only launch one process at a time.
if args.builddir == None:
args.builddir = "out/android_arm64_debug"
args.builddir = f"out/android_{args.android_arch}_debug"
if args.backend == None:
args.backend = "gl"
elif args.target == "ios":
Expand Down Expand Up @@ -545,25 +548,35 @@ def main():
if not args.no_install:
if args.target == "android":
# Copy the native libraries into the android_tests project.
jni_dir = os.path.join("android_tests", "app", "src", "main", "jniLibs")
android_arch = "arm64-v8a" # TODO: support more android architectures if needed.
os.makedirs(os.path.join(jni_dir, android_arch), exist_ok=True)
jnidir = os.path.join("android_tests", "app", "src", "main", "jniLibs")
shutil.rmtree(jnidir, ignore_errors=True)
if args.android_arch == "arm64":
arch_full_name = "arm64-v8a"
else:
assert(args.android_arch == "arm")
arch_full_name = "armeabi-v7a"
os.makedirs(os.path.join(jnidir, arch_full_name))
for tool in build_targets:
sharedlib = "lib%s.so" % tool
shutil.copy(os.path.join(args.builddir, sharedlib), os.path.join(jni_dir, android_arch))
layerpath = os.path.join(jni_dir, android_arch, "libVkLayer_khronos_validation.so")
if args.backend in ["vk", "vulkan", "sw", "swiftshader"] and not os.path.exists(layerpath):
# Download & bundle the Vulkan validation layers.
print("Downloading Android Vulkan validation layers...", flush=True)
url = "https://github.com/KhronosGroup/Vulkan-ValidationLayers/releases/download/"\
"vulkan-sdk-1.3.290.0/android-binaries-1.3.290.0.zip"
zipfile.ZipFile(urllib.request.urlretrieve(url)[0], 'r').extractall()
for lib in glob.glob("android-binaries-1.3.290.0/**/*.so", recursive=True):
dst = lib.replace("android-binaries-1.3.290.0", jni_dir)
shutil.copy(os.path.join(args.builddir, sharedlib),
os.path.join(jnidir, arch_full_name))
if args.backend in ["vk", "vulkan", "sw", "swiftshader"]:
layerpath = os.path.join("dependencies", "Vulkan-ValidationLayers")
if not os.path.exists(layerpath):
# Download the Vulkan validation layers.
print("Downloading Android Vulkan validation layers...", flush=True)
url = "https://github.com/KhronosGroup/Vulkan-ValidationLayers/releases/download/"\
"vulkan-sdk-1.3.290.0/android-binaries-1.3.290.0.zip"
zipfile.ZipFile(urllib.request.urlretrieve(url)[0], 'r').extractall(path=layerpath)
# Bundle the Vulkan validation layers.
for lib in glob.glob(os.path.join(layerpath,
"android-binaries-1.3.290.0",
arch_full_name,
"*.so")):
dst = os.path.join(jnidir, arch_full_name, os.path.basename(lib))
print(" bundling %s -> %s" % (lib, dst), flush=True)
os.makedirs(os.path.dirname(dst), exist_ok=True)
shutil.move(lib, dst)
shutil.rmtree("android-binaries-1.3.290.0")
shutil.copy(lib, dst)

# Build the android_tests wrapper app.
cwd = os.getcwd()
os.chdir(os.path.join(rive_tools_dir, "android_tests"))
Expand Down
4 changes: 3 additions & 1 deletion tests/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ def __lt__(self, other):
if (self.histogram is not None and
other.histogram is not None and
self.histogram != other.histogram):
return self.histogram < other.histogram
# LOWER histogram values mean worse matches. Sort the bad matches first.
return self.histogram > other.histogram
else:
# HIGHER avg values mean worse matches. Sort the bad matches first.
return self.avg < other.avg

def __str__(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/image_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ def main():
status.write(args.name + "\t");
if candidate is None:
status.write("missing_candidate\n")
verbose_log("missing golden for", args.name)
verbose_log("missing golden for " + args.name)
return
if golden is None:
status.write("missing_golden\n")
verbose_log("missing golden for", args.name)
verbose_log("missing golden for " + args.name)
return
if failed:
status.write("failed\n")
Expand Down

0 comments on commit da575e1

Please sign in to comment.