-
Notifications
You must be signed in to change notification settings - Fork 0
/
rev26.c
147 lines (135 loc) · 3.4 KB
/
rev26.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/* To compile, run one of: */
/* gcc -o rev26 -O3 rev26.c -lpthread -Wall -DUSE_A_POSIX_BARRIER */
/* or: */
/* gcc -o rev26 -O3 rev26.c -lpthread -Wall -DUSE_A_SPIN_BARRIER */
/* or maybe even: */
/* gcc -o rev26 -O3 rev26.c -lpthread -Wall -DUSE_A_SPIN_BARRIER
* -DGO_FASTER_DAMMIT */
/* To run, run as: */
/* ./rev26 */
/* Wait for a bit; it'll try to count violations of sequential consistency; */
/* if Paul Loewenstein is right then it will print nonzero numbers. ☺. */
/* With posix barriers, I get ~one violation every few million iterations. */
/* With spin barriers, anything up to 90% of iterations show violations. */
/* This program is an experiment for x86 processors that shows off the */
/* 2nd example discrepancy on page 3 of the x86-TSO paper, found here: */
/* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.153.6657 */
/* This is called "rev26" because it shows a flaw in rev 26 of Intel's SDM. */
#ifndef USE_A_POSIX_BARRIER
#ifndef USE_A_SPIN_BARRIER
#error Please give either -DUSE_A_POSIX_BARRIER or -DUSE_A_SPIN_BARRIER
#endif
#endif
#ifdef USE_A_POSIX_BARRIER
#ifdef USE_A_SPIN_BARRIER
#error Please do not give both -DUSE_A_POSIX_BARRIER and -DUSE_A_SPIN_BARRIER
#endif
#endif
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
volatile int xv, yv;
volatile int *x = &xv, *y = &yv;
#ifdef USE_A_POSIX_BARRIER
pthread_barrier_t barr;
#else /* USE_A_SPIN_BARRIER */
int pretend_i_am_a_barrier;
#endif
/* If I take the __attribute__((noinline)) away, this program runs faster */
/* and the violation rate goes up to >90%. However, the assembly code that */
/* comes out of gcc with -O3 becomes twisty and I find it too hard to read */
/* to verify that what the CPUs are being asked to do actually matches my */
/* expectations. */
#ifndef GO_FASTER_DAMMIT
__attribute__((noinline))
#endif
void paws()
{
#ifdef USE_A_POSIX_BARRIER
pthread_barrier_wait(&barr);
#else /* USE_A_SPIN_BARRIER */
int *vi = &pretend_i_am_a_barrier;
if (__sync_bool_compare_and_swap(vi, 0, 1)) {
while (!__sync_bool_compare_and_swap(vi, 2, 0)) {
;
}
} else {
while (!__sync_bool_compare_and_swap(vi, 1, 2)) {
;
}
}
#endif
}
#define NO_OF_ITERATIONS (100 * 1000 * 1000)
static void *links(void *context)
{
int i;
int a, b;
int nots = 0;
for (i = 0; i < NO_OF_ITERATIONS; i++) {
*x = 0;
*y = 0;
paws();
*x = 1;
a = *x;
b = *y;
paws();
if ((a == 1) && (b == 0) && (*x == 1)) {
nots++;
}
if ((i > 0) && (i % (1000000) == 0)) {
double pc = (double)nots * 1.0e-4;
printf("Saw %d violations per million. (%.4f%%)\n",
nots, pc);
fflush(stdout);
nots = 0;
}
}
return NULL;
}
static void *recht(void *context)
{
int i;
for (i = 0; i < NO_OF_ITERATIONS; i++) {
paws();
*y = 2;
*x = 2;
paws();
}
return NULL;
}
int main(int arc, char **argv)
{
int i;
int n = 1;
#ifdef USE_A_POSIX_BARRIER
if (pthread_barrier_init(&barr, NULL, 2)) {
abort();
}
#else /* USE_A_SPIN_BARRIER */
pretend_i_am_a_barrier = 0;
#endif
pthread_t pair[2];
for (i = 0; i < n; i++) {
if (pthread_create(pair + 0, NULL, links, NULL)) {
abort();
}
if (pthread_create(pair + 1, NULL, recht, NULL)) {
abort();
}
}
for (i = 0; i < n; i++) {
if (pthread_join(pair[0], NULL)) {
abort();
}
if (pthread_join(pair[1], NULL)) {
abort();
}
}
#ifdef USE_A_POSIX_BARRIER
pthread_barrier_destroy(&barr);
#else /* USE_A_SPIN_BARRIER */
#endif
return 0;
}