-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBitonicSort.s
307 lines (265 loc) · 10.5 KB
/
BitonicSort.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
////////////////////////
// //
// main //
// //
////////////////////////
// Test FindM
ADDI X0, XZR, #32
BL FindM
PUTINT X0
ADDI X0, XZR, #10
PUTCHAR X0
// Original List
LDA X0, array
LDA X1, arraysize
LDUR X1, [X1, #0]
BL printList
// Bitonic Sort
LDA X0, array
LDA X1, arraysize
LDUR X1, [X1, #0]
BL BLueRecursion
// Sorted List
LDA X0, array
LDA X1, arraysize
LDUR X1, [X1, #0]
BL printList
STOP
////////////////////////
// //
// FindM //
// //
////////////////////////
FindM:
// X0: N
// Return value at X0: the largest power of 2 less than X0
// X11 - X12: temporary register
// Callee's responsibility
SUBI SP, SP, #16 // Set up stack pointer, allocate 16 bits of memory in stack
STUR LR, [SP, #0] // Store LR to SP[0]
ADDI X12, XZR, #1 // Iterator for the while loop, starting from 1
ADDI X11, XZR, #1 // Previous value of X12, to be returned at X0
while:
CMP X12, X0 // Compare current M with N
B.GE exit_FindM // If M is larger then return previous M stored in X11
ADD X11, XZR, X12 // Update X11
LSL X12, X11, #1 // Double X12 for the next possible M value
B while // Re-enter while loop
exit_FindM:
ADD X0, XZR, X11 // Return previous M stored in X11
// Callee's responsibility
LDUR LR, [SP, #0] // Restore LR from SP[0]
ADDI SP, SP, #16 // Restore SP to original value, free the current stack
BR LR // Return to caller
////////////////////////
// //
// RedLoop //
// //
////////////////////////
RedLoop:
// X0: base address of the (sub)list
// X1: size of the (sub)list
// X2: the largest power of 2 less than x1
// X11 - X16: temporary register
// Callee's responsibility
SUBI SP, SP, #16 // Set up stack pointer, allocate 16 bits of memory in stack
STUR LR, [SP, #0] // Store LR to SP[0]
ADDI X11, XZR, #0 // Set X11 to i
SUB X12, X1, X2 // Set X12 to N - M
SUBI X12, X12, #1 // Set X12 to N-M-1
for:
CMP X11, X12 // Compare i with N-M-1
B.GT exit_RedLoop // Exit for loop if i > N-M-1
ADD X13, X11, XZR // index i for swap
ADD X14, X11, X2 // index i + M for swap
LSL X13, X13, #3 // 8 * i
LSL X14, X14, #3 // 8 * (i+M)
ADD X13, X0, X13 // address of a[i]
ADD X14, X0, X14 // address of a[i+M]
LDUR X15, [X13, #0] // Set X15 to be a[i]
LDUR X16, [X14, #0] // Set X16 to be a[i+M]
CMP X15, X16 // Compare a[i] and a[i+M]
B.GT Swap // if a[i] > a[i+M] then swap
ADDI X11, X11, #1 // i++
B for // re-enter for loop
Swap:
STUR X15, [X14, #0] // Store a[i] to the original location of a[i+M]
STUR X16, [X13, #0] // Store a[i+M] to the original location of a[i]
ADDI X11, X11, #1 // i++
B for // re-enter for loop
exit_RedLoop:
// Callee's responsibility
LDUR LR, [SP, #0] // Restore LR from SP[0]
ADDI SP, SP, #16 // Restore SP to original value, free the current stack
BR LR // Return to caller
////////////////////////
// //
// RedRecursion //
// //
////////////////////////
RedRecursion:
// X0: base address of the (sub)list
// X1: size of the (sub)list
// Callee's responsibility
SUBI SP, SP, #64 // Set up stack pointer, allocate 64 bits of memory in stack
STUR LR, [SP, #0] // Store LR to SP[0]
CMPI X1, #1 // Comapre N with 1
B.LE exit_RR // Only continue if N > 1
STUR X0, [SP, #24] // Caller's responsibility: save X0
ADD X0, XZR, X1 // Input for FindM: X0 to be N
BL FindM // Call FindM(N)
ADD X2, XZR, X0 // Input for RedLoop: X2 to be M
LDUR X0, [SP, #24] // Caller's responsibility: load back X0
BL RedLoop // Call RedLoop(a, N, M)
STUR X0, [SP, #16] // Caller's responsibility: save X0
STUR X1, [SP, #8] // Caller's responsibility: save sublist size N
STUR X2, [SP, #48] // Caller's responsibility: save M
ADD X1, XZR, X2 // Input for RedRecursion: M in X2 stored before
BL RedRecursion // Call RedRecursion(a, M)
LDUR X0, [SP, #16] // Caller's responsibility: load back X0
LDUR X1, [SP, #8] // Caller's responsibility: load back N
LDUR X2, [SP, #48] // Caller's responsibility: load back M
STUR X0, [SP, #40] // Caller's responsibility: save X0
STUR X1, [SP, #32] // Caller's responsibility: save sublist size N
STUR X2, [SP, #56] // Caller's responsibility: save M
SUB X1, X1, X2 // Input 2 for RedRecursion: N - M
LSL X12, X2, #3 // 8 * M
ADD X0, X0, X12 // Input 1 for RedRecursion: address of a[M]
BL RedRecursion // Call RedRecursion(a[M], N-M)
LDUR X0, [SP, #40] // Caller's responsibility: load back X0
LDUR X1, [SP, #32] // Caller's responsibility: load back size N
LDUR X2, [SP, #56] // Caller's responsibility: load back M
exit_RR:
// Callee's responsibility
LDUR LR, [SP, #0] // Restore LR from SP[0]
ADDI SP, SP, #64 // Restore SP to original value, free the current stack
BR LR // Return to caller
////////////////////////
// //
// BLueLoop //
// //
////////////////////////
BLueLoop:
// X0: base address of the (sub)list
// X1: size of the (sub)list
// X2: the largest power of 2 less than X1
// X11 - X16: temporary register
// Callee's responsibility
SUBI SP, SP, #16 // Set up stack pointer, allocate 16 bits of memory in stack
STUR LR, [SP, #0] // Store LR to SP[0]
ADDI X11, XZR, #2 // Set X11 to 2
MUL X11, X11, X2 // Set X11 to 2 * M
SUB X11, X11, X1 // Set X11 to i = 2 * M - N
SUBI X12, X2, #1 // Set X12 to M-1
for_BlueLoop:
CMP X11, X12 // Compare i with N-M-1
B.GT exit_BlueLoop // exit for loop if i > N-M-1
ADD X13, X11, XZR // index i for swap
ADDI X14, XZR, #2 // Set X14 to 2
MUL X14, X14, X2 // Set X14 to 2 * M
SUB X14, X14, X13 // Set X14 to 2 * M - i
SUBI X14, X14, #1 // X14: index 2M - i - 1 for swap
LSL X13, X13, #3 // Set X13 to 8 * i
LSL X14, X14, #3 // Set X14 to 8 * (2M - i - 1)
ADD X13, X0, X13 // address of a[i]
ADD X14, X0, X14 // address of a[i+M]
LDUR X15, [X13, #0] // a[i]
LDUR X16, [X14, #0] // a[i+M]
CMP X15, X16 // Swap if a[i] > a[i+M]
B.GT Swap_BlueLoop // Call Swap
ADDI X11, X11, #1 // i++
B for_BlueLoop // re-enter for loop
Swap_BlueLoop:
STUR X15, [X14, #0] // Store a[i] to the original location of a[i+M]
STUR X16, [X13, #0] // Store a[i+M] to the original location of a[i]
ADDI X11, X11, #1 // i++
B for_BlueLoop // re-enter for loop
exit_BlueLoop:
// Callee's responsibility
LDUR LR, [SP, #0] // Restore LR from SP[0]
ADDI SP, SP, #16 // Restore SP to original value, free the current stack
BR LR // Return to caller
////////////////////////
// //
// BLueRecursion //
// //
////////////////////////
BLueRecursion:
// X0: base address of the (sub)list
// X1: size of the (sub)list
// Callee's responsibility
SUBI SP, SP, #64 // Set up stack pointer, allocate 64 bits of memory in stack
STUR LR, [SP, #0] // Store LR to SP[0]
CMPI X1, #1 // Compare N with 1
B.LE exit_BR // Only continue if N > 1
STUR X0, [SP, #8] // Caller's responsibility: save X0
ADD X0, XZR, X1 // Input for FindM: X0 to be N
BL FindM // Call FindM(N)
ADD X2, XZR, X0 // Input for BlueLoop: X2 to be M
LDUR X0, [SP, #8] // Caller's responsibility: load back X0
STUR X0, [SP, #16] // Caller's responsibility: save X0
STUR X1, [SP, #24] // Caller's responsibility: save sublist size N
STUR X2, [SP, #32] // Caller's responsibility: save M
ADD X1, XZR, X2 // Input for BlueRecursion: M in X2 stored before
BL BlueRecursion // Call BlueRecursion(a, M)
LDUR X0, [SP, #16] // Caller's responsibility: load back X0
LDUR X1, [SP, #24] // Caller's responsibility: load back size N
LDUR X2, [SP, #32] // Caller's responsibility: load back M
STUR X0, [SP, #40] // Caller's responsibility: save X0
STUR X1, [SP, #48] // Caller's responsibility: save sublist size N
STUR X2, [SP, #56] // Caller's responsibility: save M
SUB X1, X1, X2 // Input 2 for BlueRecursion: N - M
LSL X12, X2, #3 // 8 * M
ADD X0, X0, X12 // Input 1 for BlueRecursion: address of a[M]
BL BlueRecursion // Call BlueRecursion(a[M], N-M)
LDUR X0, [SP, #40] // Caller's responsibility: load back X0
LDUR X1, [SP, #48] // Caller's responsibility: load back size N
LDUR X2, [SP, #56] // Caller's responsibility: load back M
BL BlueLoop // Call BlueLoop(a, N, M)
STUR X0, [SP, #16] // Caller's responsibility: save address X0
STUR X1, [SP, #24] // Caller's responsibility: save sublist size N
STUR X2, [SP, #32] // Caller's responsibility: save M
ADD X1, XZR, X2 // Input for RedRecursion: M in X2 stored before
BL RedRecursion // Call RedRecursion(a, M)
LDUR X0, [SP, #16] // Caller's responsibility: load back address X0
LDUR X1, [SP, #24] // Caller's responsibility: load back size N
LDUR X2, [SP, #32] // Caller's responsibility: load back M
STUR X0, [SP, #40] // Caller's responsibility: save address X0
STUR X1, [SP, #48] // Caller's responsibility: save sublist size N
STUR X2, [SP, #56] // Caller's responsibility: save M
SUB X1, X1, X2 // Input 2 for RedRecursion: N - M
LSL X12, X2, #3 // 8 * M
ADD X0, X0, X12 // Input 1 for RedRecursion: address of a[M]
BL RedRecursion // Call RedRecursion(a[M], N-M)
LDUR X0, [SP, #40] // Caller's responsibility: load back address X0
LDUR X1, [SP, #48] // Caller's responsibility: load back size N
LDUR X2, [SP, #56] // Caller's responsibility: load back M
exit_BR:
// Callee's responsibility
LDUR LR, [SP, #0] // Restore LR from SP[0]
ADDI SP, SP, #64 // Restore SP to original value, free the current stack
BR LR // Return to caller
////////////////////////
// //
// printList //
// //
////////////////////////
printList:
// X0: base address
// X1: length of the array
MOV X2, XZR
ADDI X5, XZR, #32
ADDI X6, XZR, #10
printList_loop:
CMP X2, X1
B.EQ printList_loopEnd
LSL X3, X2, #3
ADD X3, X3, X0
LDUR X4, [X3, #0]
PUTINT X4
PUTCHAR X5
ADDI X2, X2, #1
B printList_loop
printList_loopEnd:
PUTCHAR X6
BR LR