Skip to content

Commit

Permalink
Reduce stack usage.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Dec 22, 2024
1 parent 0dbb6f4 commit f3c6c01
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 45 deletions.
97 changes: 97 additions & 0 deletions build/DumpTool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import sys
import os.path
import glob
import re

def dump_static_linked_function(path, dumpAll=True):
result = {}
with open(path, encoding='utf-8') as fd:
for line in fd.readlines():
if line.count(':') > 1:
items = line.split()
func = items[1]
obj = items[-1]
if dumpAll or (items[3] == 'f' and '<lambda_' not in func and items[4] != 'i'):
if obj in result:
result[obj].append(func)
else:
result[obj] = [func]

path, ext = os.path.splitext(path)
path = f'{path}-crt{ext}'
print('write:', path)
with open(path, 'w', encoding='utf-8') as fd:
for obj, items in sorted(result.items()):
fd.write(obj + '\n')
fd.write(''.join(f'\t{func}\n' for func in sorted(items)))

text_segment = re.compile(r'_TEXT\s+SEGMENT')
proc_comdat = re.compile(r'(\w+\s+)?PROC\s*;(.+?)COMDAT')
sub_rsp = re.compile(r'sub\s+(e|r)sp\s*,\s*(\d+)')
add_rsp = re.compile(r'add\s+(e|r)sp\s*,\s*(\d+)')
mov_eax = re.compile(r'mov\s+eax\s*,\s*(\d+)')
call_chkstk = re.compile(r'call\s+_?_?chkstk')

def get_stack_size(path, result_map, threshold):
with open(path, 'r', encoding='cp1252') as fd:
doc = fd.read()
segmentList = text_segment.split(doc)
path = os.path.basename(path)
for segment in segmentList:
items = sub_rsp.findall(segment) + add_rsp.findall(segment)
items = [item[1] for item in items]
chkstk = call_chkstk.search(segment)
if chkstk:
items.extend(mov_eax.findall(segment[:chkstk.start(0)]))
if items:
stack_size = max(int(value) for value in items)
if stack_size >= threshold:
proc = proc_comdat.search(segment)
name = proc.group(2).strip(' \t,;')
if not name:
name = proc.group(1).strip()
if stack_size in result_map:
result = result_map[stack_size]
if path in result:
result[path].append(name)
else:
result[path] = [name]
else:
result_map[stack_size] = {path: [name]}

def dump_stack_size():
if len(sys.argv) < 3:
print(f'Usgae: {sys.argv[0]} threshold <path or folder>')
return

threshold = int(sys.argv[1])
result_map = {}
for arg in sys.argv[2:]:
if os.path.isfile(arg):
get_stack_size(arg, result_map, threshold)
elif os.path.isdir(arg):
for path in glob.glob(os.path.join(arg, '*.cod')):
get_stack_size(path, result_map, threshold)

if not result_map:
print(f'No result for threshold: {threshold}')
return

output = []
for stack_size in sorted(result_map.keys(), reverse=True):
output.append(f'{stack_size}:\n')
result = result_map[stack_size]
for path in sorted(result.keys()):
output.append(f'\t{path}:\n\t')
output.append('\n\t'.join(sorted(result[path])))
output.append('\n\n')
path = f'StackSize{threshold}.log'
print(f'write: {path}')
with open(path, 'w', encoding='utf-8') as fd:
fd.write(''.join(output))

#dump_static_linked_function('bin/Release/x64/matepath.map')
#dump_static_linked_function('bin/Release/Win32/matepath.map')
#dump_static_linked_function('bin/Release/x64/Notepad4.map')
#dump_static_linked_function('bin/Release/Win32/Notepad4.map')
dump_stack_size()
26 changes: 0 additions & 26 deletions build/Toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,28 +71,6 @@ def update_all_copyright_year():
for path in glob.glob('../locale/*/*.rc'):
update_copyright_year(path, year)

def dump_static_linked_function(path, dumpAll=True):
result = {}
with open(path, encoding='utf-8') as fd:
for line in fd.readlines():
if line.count(':') > 1:
items = line.split()
func = items[1]
obj = items[-1]
if dumpAll or (items[3] == 'f' and '<lambda_' not in func and items[4] != 'i'):
if obj in result:
result[obj].append(func)
else:
result[obj] = [func]

path, ext = os.path.splitext(path)
path = f'{path}-crt{ext}'
print('write:', path)
with open(path, 'w', encoding='utf-8') as fd:
for obj, items in sorted(result.items()):
fd.write(obj + '\n')
fd.write(''.join(f'\t{func}\n' for func in sorted(items)))

def quote_path(path):
return f'"{path}"' if ' ' in path else path

Expand Down Expand Up @@ -189,10 +167,6 @@ def include_path(folder, path):

#update_all_project_toolset()
#update_all_copyright_year()
#dump_static_linked_function('bin/Release/x64/matepath.map')
#dump_static_linked_function('bin/Release/Win32/matepath.map')
#dump_static_linked_function('bin/Release/x64/Notepad4.map')
#dump_static_linked_function('bin/Release/Win32/Notepad4.map')
generate_compile_commands('x86_64-pc-windows-msvc', avx2=True)
#generate_compile_commands('x86_64-pc-windows-msvc')
#generate_compile_commands('i686-pc-windows-msvc')
Expand Down
102 changes: 83 additions & 19 deletions scintilla/win32/PlatWin.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,19 @@ std::shared_ptr<Font> Font::Allocate(const FontParameters &fp) {
// when less than safe size otherwise allocate on heap and free automatically.
template<typename T, size_t lengthStandard>
class VarBuffer {
T * const buffer;
T *buffer;
T bufferStandard[lengthStandard];
public:
explicit VarBuffer(size_t length) :
buffer {(length > lengthStandard) ? new T[length] : bufferStandard} {
VarBuffer() noexcept {
buffer = bufferStandard;
}
explicit VarBuffer(size_t length): buffer{bufferStandard} {
allocate(length);
}
void allocate(size_t length) {
if (length > lengthStandard) {
buffer = new T[length];
}
static_assert(__is_standard_layout(T));
memset(buffer, 0, length*sizeof(T));
}
Expand Down Expand Up @@ -469,14 +477,16 @@ class VarBuffer {
}
};

constexpr size_t stackBufferLength = 512;
constexpr size_t stackBufferLength = 480; // max value to keep stack usage under 4096
class TextWide {
wchar_t * const buffer;
wchar_t *buffer;
UINT len; // Using UINT instead of size_t as most Win32 APIs take UINT.
wchar_t bufferStandard[stackBufferLength];
public:
TextWide(std::string_view text, int codePage) :
buffer {(text.length() > stackBufferLength) ? new wchar_t[text.length()] : bufferStandard} {
TextWide(std::string_view text, int codePage): buffer {bufferStandard} {
if (text.length() > stackBufferLength) {
buffer = new wchar_t[text.length()];
}
if (codePage == CpUtf8) {
len = static_cast<UINT>(UTF16FromUTF8(text, buffer, text.length()));
} else {
Expand Down Expand Up @@ -505,8 +515,59 @@ class TextWide {
}
};

using TextPositions = VarBuffer<XYPOSITION, stackBufferLength>;
using TextPositionsI = VarBuffer<int, stackBufferLength>;
class TextWideD2D {
XYPOSITION *positions = nullptr;
wchar_t * buffer;
UINT len; // Using UINT instead of size_t as most Win32 APIs take UINT.
// reuse bufferStandard for both text and position, since only one is active
XYPOSITION bufferStandard[stackBufferLength];
public:
TextWideD2D(std::string_view text, int codePage): buffer {reinterpret_cast<wchar_t *>(bufferStandard)} {
if (text.length() > stackBufferLength*sizeof(XYPOSITION)/sizeof(wchar_t)) {
buffer = new wchar_t[text.length()];
}
if (codePage == CpUtf8) {
len = static_cast<UINT>(UTF16FromUTF8(text, buffer, text.length()));
} else {
// Support Asian string display in 9x English
len = ::MultiByteToWideChar(codePage, 0, text.data(), static_cast<int>(text.length()),
buffer, static_cast<int>(text.length()));
}
}
const wchar_t *data() const noexcept {
return buffer;
}
UINT length() const noexcept {
return len;
}
XYPOSITION *position() noexcept {
return positions;
}
void allocate() {
positions = bufferStandard;
if (len > stackBufferLength) {
positions = new XYPOSITION[len];
}
memset(positions, 0, len*sizeof(XYPOSITION));
}

// Deleted so TextWideD2D objects can not be copied.
TextWideD2D(const TextWideD2D &) = delete;
TextWideD2D(TextWide &&) = delete;
TextWideD2D &operator=(const TextWideD2D &) = delete;
TextWideD2D &operator=(TextWideD2D &&) = delete;

~TextWideD2D() noexcept {
if (buffer != static_cast<void *>(bufferStandard)) {
delete[]buffer;
}
if (positions != bufferStandard) {
delete[]positions;
}
}
};

using TextPositionsGDI = VarBuffer<int, stackBufferLength>;

class SurfaceGDI final : public Surface {
HDC hdc{};
Expand Down Expand Up @@ -1174,9 +1235,10 @@ void SurfaceGDI::MeasureWidths(const Font *font_, std::string_view text, XYPOSIT
int fit = 0;
int i = 0;
const int len = static_cast<int>(text.length());
TextPositionsGDI poses;
if (mode.codePage == CpUtf8) {
const TextWide tbuf(text, CpUtf8);
TextPositionsI poses(tbuf.length());
poses.allocate(tbuf.length());
if (!::GetTextExtentExPointW(hdc, tbuf.data(), tbuf.length(), maxWidthMeasure, &fit, poses.data(), &sz)) {
// Failure
return;
Expand All @@ -1194,7 +1256,7 @@ void SurfaceGDI::MeasureWidths(const Font *font_, std::string_view text, XYPOSIT
}
}
} else {
TextPositionsI poses(len);
poses.allocate(len);
if (!::GetTextExtentExPointA(hdc, text.data(), len, maxWidthMeasure, &fit, poses.data(), &sz)) {
// Eeek - a NULL DC or other foolishness could cause this.
return;
Expand Down Expand Up @@ -1266,7 +1328,7 @@ void SurfaceGDI::MeasureWidthsUTF8(const Font *font_, std::string_view text, XYP
int i = 0;
const int len = static_cast<int>(text.length());
const TextWide tbuf(text, CpUtf8);
TextPositionsI poses(tbuf.length());
TextPositionsGDI poses(tbuf.length());
if (!::GetTextExtentExPointW(hdc, tbuf.data(), tbuf.length(), maxWidthMeasure, &fit, poses.data(), &sz)) {
// Failure
return;
Expand Down Expand Up @@ -2529,7 +2591,7 @@ void SurfaceD2D::DrawTextTransparent(PRectangle rc, const Font *font_, XYPOSITIO

namespace {

HRESULT MeasurePositions(const Font *font_, TextPositions &poses, const TextWide &tbuf) {
HRESULT MeasurePositions(const Font *font_, TextWideD2D &tbuf) {
const FontDirectWrite *pfm = down_cast<const FontDirectWrite *>(font_);
if (!pfm->pTextFormat) {
// Unexpected failure like no access to DirectWrite so give up.
Expand All @@ -2546,6 +2608,7 @@ HRESULT MeasurePositions(const Font *font_, TextPositions &poses, const TextWide
return E_FAIL;
}

tbuf.allocate();
VarBuffer<DWRITE_CLUSTER_METRICS, stackBufferLength> clusterMetrics(tbuf.length());
UINT32 count = 0;
const HRESULT hrGetCluster = pTextLayout->GetClusterMetrics(clusterMetrics.data(), tbuf.length(), &count);
Expand All @@ -2556,6 +2619,7 @@ HRESULT MeasurePositions(const Font *font_, TextPositions &poses, const TextWide
// A cluster may be more than one WCHAR, such as for "ffi" which is a ligature in the Candara font
XYPOSITION position = 0.0;
UINT ti = 0;
XYPOSITION * const poses = tbuf.position();
for (UINT32 ci = 0; ci < count; ci++) {
const int length = clusterMetrics[ci].length;
const XYPOSITION width = clusterMetrics[ci].width;
Expand All @@ -2571,11 +2635,11 @@ HRESULT MeasurePositions(const Font *font_, TextPositions &poses, const TextWide
}

void SurfaceD2D::MeasureWidths(const Font *font_, std::string_view text, XYPOSITION *positions) {
const TextWide tbuf(text, mode.codePage);
TextPositions poses(tbuf.length());
if (FAILED(MeasurePositions(font_, poses, tbuf))) {
TextWideD2D tbuf(text, mode.codePage);
if (FAILED(MeasurePositions(font_, tbuf))) {
return;
}
const XYPOSITION * const poses = tbuf.position();
if (mode.codePage == CpUtf8) {
// Map the widths given for UTF-16 characters back onto the UTF-8 input string
size_t i = 0;
Expand Down Expand Up @@ -2667,13 +2731,13 @@ void SurfaceD2D::DrawTextTransparentUTF8(PRectangle rc, const Font *font_, XYPOS
}

void SurfaceD2D::MeasureWidthsUTF8(const Font *font_, std::string_view text, XYPOSITION *positions) {
const TextWide tbuf(text, CpUtf8);
TextPositions poses(tbuf.length());
if (FAILED(MeasurePositions(font_, poses, tbuf))) {
TextWideD2D tbuf(text, CpUtf8);
if (FAILED(MeasurePositions(font_, tbuf))) {
return;
}
// Map the widths given for UTF-16 characters back onto the UTF-8 input string
size_t i = 0;
const XYPOSITION * const poses = tbuf.position();
for (UINT ui = 0; ui < tbuf.length(); ui++) {
const unsigned char uch = text[i];
const unsigned int byteCount = UTF8BytesOfLead(uch);
Expand Down

0 comments on commit f3c6c01

Please sign in to comment.