-
Notifications
You must be signed in to change notification settings - Fork 7
/
vecLibFort.c
301 lines (258 loc) · 8.61 KB
/
vecLibFort.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/*
vecLibFort
https://github.com/mcg1969/vecLibFort
Run-time F2C/GFORTRAN translation for Apple's vecLib BLAS/LAPACK
Copyright (c) 2014 Michael C. Grant
See README.md for full background and usage details.
Use, modification and distribution is subject to the Boost Software
License, Version 1.0. See the accompanying file LICENSE or
http://www.booost.org/LICENSE_1_0.txt
*/
#include <stdio.h>
#include "cloak.h"
/* Don't load the CLAPACK header, because we are using a different calling
convention for the replaced functions than the ones listed there. */
#define __CLAPACK_H
#include "vecLib-760.100.h"
#include <Accelerate/Accelerate.h>
#include <AvailabilityMacros.h>
/* Add a SGEMV fix for Mavericks. See
http://www.openradar.me/radar?id=5864367807528960 */
#if !defined(VECLIBFORT_SGEMV) && \
defined(MAC_OS_X_VERSION_10_9) && \
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 && \
!(defined(MAC_OS_X_VERSION_10_10) && \
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
#define VECLIBFORT_SGEMV
#endif
#define VOIDS_(s,i,id) COMMA_IF(i) void*
#define VOIDS(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),VOIDS_,~)),void)
#define VOIDA_(s,i,id) COMMA_IF(i) void *a ## i
#define VOIDA(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),VOIDA_,~)),void)
#define PARAM_(s,i,id) COMMA_IF(i)a ## i
#define PARAM(n) IF(n)(EXPR_S(0)(REPEAT_S(0,DEC(n),PARAM_,~)),)
#ifdef VECLIBFORT_VERBOSE
#define DEBUG(...) fprintf(stderr,__VA_ARGS__);
static const char* dynamic_msg = "Entering dynamic %s replacement\n";
static const char* static_msg = "Entering static %s replacement\n";
#define DEBUG_S(x) DEBUG( static_msg, x )
#define DEBUG_D(x) DEBUG( dynamic_msg, x )
#else
#define DEBUG(...)
#define DEBUG_S(x)
#define DEBUG_D(x)
#endif
#include <complex.h>
typedef float complex c_float;
typedef double complex c_double;
#ifdef VECLIBFORT_INTERPOSE
/*
* INTERPOSING MODE
*
* In this mode, dyld is instructed to preload this library even before the
* executable itself. It reads the __DATA.__interpose section of the library
* for the interpose information, which it uses to swap out the offending
* BLAS/LAPACK functions with our replacements. Because vecLib provides two
* aliases for each function---one with a trailing underscore, and one
* without---we need two interpose records for each replacement.
*
* For instance, for "sdot", we define a static function
* static float my_sdot( const int* N, const float* X, const int* incX )
* add interpose data to signify two substitutions:
* sdot_ -> my_sdot
* sdot -> my_sdot
*/
typedef struct interpose_t_ {
const void *replacement;
const void *original;
} interpose_t;
#define INTERPOSE(name) \
__attribute__((used)) interpose_t interpose_ ## name [] \
__attribute__((section ("__DATA,__interpose"))) = \
{ { (const void*)&my_ ## name, (const void*)&name }, \
{ (const void*)&my_ ## name, (const void*)&name ## _ } };
#define D2F_CALL(name,n) \
extern double name( VOIDS(n) ); \
extern double name ## _( VOIDS(n) ); \
static float my_ ## name ( VOIDA(n) ) \
{ return (float)name ## _( PARAM(n) ); } \
INTERPOSE(name)
#define CPLX_CALL(type,name,n) \
extern void name( VOIDS(INC(n)) ); \
extern void name ## _( VOIDS(INC(n)) ); \
static c_ ## type my_ ## name ( VOIDA(n) ) \
{ \
c_ ## type cplx; \
name ## _( &cplx, PARAM(n) ); \
return cplx; \
} \
INTERPOSE(name)
/*
* DYNAMIC BLAS SUBSTITUTION
*
* For the interpose library we need to use the same techniques for the BLAS
* as we do for the LAPACK routines. However, because we have CBLAS versions
* available to use, we can use the wrappers already created in "static.h"
* by prepending them with the "my_" prefixes.
*/
#define BLS_CALL(type,name,n) \
extern type name( VOIDS(n) ); \
extern type name ## _( VOIDS(n) ); \
INTERPOSE(name)
#define ADD_PREFIX
#include "static.h"
#undef ADD_PREFIX
BLS_CALL(float,sdsdot,6)
BLS_CALL(float,sdot,5)
BLS_CALL(float,snrm2,3)
BLS_CALL(float,sasum,3)
BLS_CALL(c_float,cdotu,5)
BLS_CALL(c_float,cdotc,5)
BLS_CALL(float,scnrm2,3)
BLS_CALL(float,scasum,3)
BLS_CALL(c_double,zdotu,5)
BLS_CALL(c_double,zdotc,5)
#if defined(VECLIBFORT_SGEMV)
BLS_CALL(void,sgemv,11)
#endif
#else
/*
* STATIC BLAS SUBSTITUTION
*
* For BLAS functions, we have access to CBLAS versions of each function.
* So the hoops we need to jump through to resolve the name clashes in the
* dynamic substitution mode can be avoided. Instead, we simply create the
* replacement functions to call the CBLAS counterparts instead.
*
* To void duplicating code, we include the functions in "static.h" twice:
* once for the functions with trailing underscores (e.g., "sdot_"), and once
* without (e.g., "sdot"). In theory, we could create just one replacement
* with two aliases, but clang has thus far been uncooperative. Any assistance
* on this matter would be appreciated.
*/
#include "static.h"
#define ADD_UNDERSCORE
#include "static.h"
/*
* DYNAMIC LAPACK SUBSTITUTION
*
* In this mode, we give our functions identical names, and rely on link
* order to ensure that these take precedence over those declared in vecLib.
* Thus whenever the main code attempts to call one of the covered functions,
* it will be directed to one of our wrappers instead.
*
* Because vecLib provides two aliases for each function---one with a
* trailing underscore, and one without---we actually need two separate
* replacement functions (at least until we can figure out how to do aliases
* cleanly in clang.) Each pair of replacements controls a single static
* pointer to the replacement function. On the first invocation of either,
* this pointer is retrieved using a dlsym() command.
*
* For instance, for "sdot", we define two functions
* float sdot_( const int* N, const float* X, const int* incX )
* float sdot ( const int* N, const float* X, const int* incX )
* On the first invocation of either, the "sdot_" symbol from vecLib is
* retrieved using the dlsym() command and stored in
* static void* fp_dot;
* In theory, we could create just one replacement with two aliases, but
* clang has thus far been uncooperative. Any assistance on this matter would
* be appreciated.
*/
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#define VECLIB_FILE "/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/vecLib"
static void * veclib = 0;
static void unloadlib(void)
{
DEBUG( "Unloading vecLib\n" );
dlclose (veclib);
}
static void loadlib(void)
{
static const char* veclib_loc = VECLIB_FILE;
DEBUG( "Loading library: %s\n", veclib_loc )
veclib = dlopen (veclib_loc, RTLD_LOCAL | RTLD_FIRST);
if ( veclib == 0 ) {
fprintf( stderr, "Failed to open vecLib library; aborting.\n Location: %s\n", veclib );
abort ();
}
atexit(unloadlib);
}
static void* loadsym( const char* nm )
{
if ( veclib == 0 ) loadlib();
DEBUG( "Loading function: %s\n", nm )
void *ans = dlsym( veclib, nm );
if ( ans != 0 ) return ans;
fprintf( stderr, "vecLib symbol '%s' could not be resolved; aborting.\n", nm );
abort();
}
#define D2F_CALL_(fname,name,n) \
float fname( VOIDA(n) ) \
{ \
DEBUG_D( #name "_" ) \
if ( !fp_ ## name ) fp_ ## name = loadsym( #name "_" ); \
return ((ft_ ## name)fp_ ## name)( PARAM(n) ); \
}
#define D2F_CALL(name,n) \
typedef double (*ft_ ## name)( VOIDS(n) ); \
static void *fp_ ## name = 0; \
D2F_CALL_(name,name,n) \
D2F_CALL_(name ## _,name,n)
#define CPLX_CALL_(type,fname,name,n) \
c_ ## type fname( VOIDA(n) ) \
{ \
c_ ## type cplx; \
DEBUG_D( #name "_" ) \
if ( !fp_ ## name ) fp_ ## name = loadsym( #name "_" ); \
((ft_ ## name)fp_ ## name)( &cplx, PARAM(n) ); \
return cplx; \
}
#define CPLX_CALL(type,name,n) \
typedef void (*ft_ ## name)( VOIDS(INC(n)) ); \
static void *fp_ ## name = 0; \
CPLX_CALL_(type,name,name,n) \
CPLX_CALL_(type,name ## _,name,n)
#endif
D2F_CALL(clangb,7)
D2F_CALL(clange,6)
D2F_CALL(clangt,5)
D2F_CALL(clanhb,7)
D2F_CALL(clanhe,6)
D2F_CALL(clanhp,5)
D2F_CALL(clanhs,5)
D2F_CALL(clanht,4)
D2F_CALL(clansb,7)
D2F_CALL(clansp,5)
D2F_CALL(clansy,6)
D2F_CALL(clantb,8)
D2F_CALL(clantp,6)
D2F_CALL(clantr,8)
D2F_CALL(scsum1,3)
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
D2F_CALL(slaneg,6)
#endif
D2F_CALL(slangb,7)
D2F_CALL(slange,6)
D2F_CALL(slangt,5)
D2F_CALL(slanhs,5)
D2F_CALL(slansb,7)
D2F_CALL(slansp,5)
D2F_CALL(slanst,4)
D2F_CALL(slansy,6)
D2F_CALL(slantb,8)
D2F_CALL(slantp,6)
D2F_CALL(slantr,8)
D2F_CALL(slapy2,2)
D2F_CALL(slapy3,3)
D2F_CALL(slamch,1)
D2F_CALL(slamc3,2)
#if defined(MAC_OS_X_VERSION_10_7) && \
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
D2F_CALL(clanhf,6)
D2F_CALL(slansf,6)
#endif
CPLX_CALL(float,cladiv,2)
CPLX_CALL(double,zladiv,2)