-
Notifications
You must be signed in to change notification settings - Fork 3
/
sorting_bytecode.h
176 lines (139 loc) · 5.93 KB
/
sorting_bytecode.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#ifndef SORTING_BYTECODE_H
#define SORTING_BYTECODE_H
#include <vector>
// A bytecode that describe in-place sorting operations performed on a
// 2D grid.
enum OpCode {
// Sort a sequence
Sort = 0,
// Assert that a sequence is sorted. Some interpreters may ignore
// this.
AssertSorted = 1,
// Merge two sorted sequences in place
Merge = 2,
// Just copy from s1 to s2, which must be the same length
Copy = 3,
// Print a sequence to stdout. Some interpreters may ignore this.
Print = 4,
};
// A linear sequence
struct Seq {
Seq() = default;
Seq(const Seq &other) = default;
Seq(int x, int dx, int len)
: x(x), dx(dx), len(len) {
}
// The start
int x = 0;
// The step
int dx = 0;
// The length;
int len = 0;
};
// Sort a sequence in-place, or merge two subsequences.
struct Op {
// Sort, merge, or pairwise swap. If sort, any field ending in '2'
// below gets ignored.
OpCode op = Sort;
// Output range of interest. Inclusive. The sort is permitted to
// be incorrect outside of this range.
int min_idx = 0, max_idx = 0;
// The first sequence
Seq s1;
// The second sequence.
Seq s2;
// The reason the op is being performed. Used for
// debugging. Should be a string constant.
const char *reason = "";
void print() const;
};
// Sort a 2D rectangle into scanline order, using the 1D primitives
// above. Sorts the rows then uses a simple merge across them. Ignores
// min_idx/max_idx.
std::vector<Op> sort_2d_naive(int w, int h, int min_idx, int max_idx);
// Sort a 2D rectangle into scanline order, using the 1D primitives
// above. Aggressively eliminates things outside [min_idx, max_idx] as
// it goes.
std::vector<Op> sort_2d(int w, int h, int min_idx, int max_idx);
// Just sort in 1d along the x dimension
std::vector<Op> sort_1d(int w, int min_idx, int max_idx);
// Take a pair of arrays stored back-to-back and sort both arrays and
// then also write the merged list of the same total length after
// them. We'll use it to sort pairs of columns while still having the
// original sorted columns available.
std::vector<Op> sort_pair(int w);
// Sort a 2d rectangle into scanline order. Equivalent to sort_2d but
// assumes each columns is already in sorted order. Uses addresses
// starting at scratch_idx for scratch space. The final range
// min_idx...max_idx is placed at position zero.
std::vector<Op> merge_cols_2d(int w, int h, int min_idx, int max_idx, int scratch_idx);
// Sort a 2d rectangle into scanline order. Equivalent to sort_2d but
// assumes each row is already in sorted order. Does not use scratch
// space. The final range from min_idx to max_idx is placed starting
// at position zero.
std::vector<Op> merge_rows_2d(int w, int h, int min_idx, int max_idx);
// In a sequence of ops, count the number of instructions due to each
// reason and print a table. Used to figure out which tasks are taking
// up all the ops.
void print_op_histogram(const std::vector<Op> &ops);
// Compute the high water mark (plus one) of locations touched during
// a sequence of ops, so an interpreter can allocation enough working
// space.
int compute_working_memory(const std::vector<Op> &ops);
// Computes a tile of medians of overlapping rectangles. The medians
// are over a w x h window, and the tile size is tw x th. Requires
// input in a particular layout. The core values shared by all outputs
// (the intersection of the footprints) is a sub-rectangle of size (w
// - tw + 1) x (h - th + 1). This comes first in scanline order, with
// the columns already sorted. Then we have (tw - 1)*2 extra central
// columns of height (h - th + 1), also sorted. The columns appear in
// order from left to right. Then we have the (th - 1)*2 extra central
// rows of width (w - tw + 1). These are unsorted. Finally we have the
// corner elements in scanline order. Given a 2x2 tiling of a 4x4
// window, the order of the elements expected in the input is thus:
/*
21 | 15 16 17 | 22
---+----------+---
9 | 0 1 2 | 12
10 | 3 4 5 | 13
11 | 6 7 8 | 14
---+----------+---
23 | 18 19 20 | 24
*/
// The resulting tw x th medians are placed in scanline order starting
// at position (w + tw - 1) * (h + th - 1) (i.e. right after the all
// the input data)
// If the last arg is true, then instead of the cols being sorted
// within a core, the rows are sorted (and the shape of the core is
// transposed). In this case we use merge_rows_2d instead of
// merge_cols_2d for sorting the core.
std::vector<Op> tiled_median_2d(int w, int h, int tw, int th, bool col_major_core = false);
// Take the median of a rectangle.
inline std::vector<Op> median_2d(int w, int h) {
return sort_2d(w, h, (w * h) / 2, (w * h) / 2);
}
// Limit the maximum length of the ops by decomposing merges and sorts
// into combinations of smaller primitives. Pass '2' to lower all the
// way to individual swaps.
std::vector<Op> limit_max_op_length(const std::vector<Op> &ops, int max_len);
// Run the sorting network over some ints for testing
void run(const int *src, int *dst, int W, int H, const std::vector<Op> &ops);
// A more compact instruction encoding for a subset of op
// types. Useful to drive an interpreter.
struct Instruction {
int op, x1, x2;
};
// Generate all instructions within some size
// limit. limit_max_op_length with the same size limit will only
// generate ops encodable with these instructions.
std::vector<std::pair<Op, Instruction>> get_instruction_table(int max_len);
// Encode an instruction using a given instruction table
Instruction encode(Op op, const std::vector<std::pair<Op, Instruction>> &table);
// Decode an instruction using an instruction table, and the source
// stride to use for any strided copy instructions.
Op decode(Instruction inst, int stride, const std::vector<std::pair<Op, Instruction>> &table);
struct PerformanceCounters {
int loads = 0, stores = 0, swaps = 0;
};
PerformanceCounters estimate_performance(const std::vector<Op> &ops);
#endif