10
10
#include < cassert>
11
11
#include < cmath>
12
12
#include < cstdio>
13
+ #include < cerrno>
13
14
#include < cstring>
14
15
#include < fstream>
15
16
#include < map>
23
24
#include < unistd.h>
24
25
#include < sys/mman.h>
25
26
#include < sys/stat.h>
26
- #else
27
- #include < errno.h>
28
- #define msync (addr, len_bytes, flag ) winMSync
29
- #define MS_ASYNC 0
30
27
#endif
31
28
32
29
#define ROUNDUP (X, K ) (((X) + (K)-1 ) & -(K))
33
30
#define IS2POW (X ) (!((X) & ((X)-1 )))
34
31
35
32
#define MAGIC_PATH " magic.dat"
36
33
#define MAGIC_ADDR (char *)0x330000000000
37
- #define MAGIC_GRAN 2097152
34
+ #define MAGIC_GRAN 65536
38
35
#define MAGIC_ALGN (sizeof (size_t ) * 2 )
39
36
40
37
#define ANSI_COLOR_RED " \x1b [31m"
@@ -104,49 +101,21 @@ struct llama_model {
104
101
std::map<std::string, struct ggml_tensor *> tensors;
105
102
};
106
103
107
-
108
104
struct magic {
109
105
uint32_t magic;
110
106
std::atomic<unsigned > lock;
111
107
int fd;
112
- size_t commit;
113
- size_t offset;
114
- size_t capacity;
115
- gpt_vocab* vocab;
116
- llama_model* model;
108
+ uint64_t commit;
109
+ uint64_t offset;
110
+ uint64_t capacity;
111
+ gpt_vocab * vocab;
112
+ llama_model * model;
117
113
};
118
114
119
- static void winMSync (magic* addr, size_t len_bytes) {
120
- bool success = FlushViewOfFile ((void *)addr, len_bytes);
121
- if (!success) {
122
- LPVOID lpMsgBuf;
123
- LPVOID lpDisplayBuf;
124
- DWORD error_code = GetLastError ();
125
- FormatMessage (
126
- FORMAT_MESSAGE_ALLOCATE_BUFFER |
127
- FORMAT_MESSAGE_FROM_SYSTEM |
128
- FORMAT_MESSAGE_IGNORE_INSERTS,
129
- NULL ,
130
- error_code,
131
- MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
132
- (LPTSTR)&lpMsgBuf,
133
- 0 , NULL );
134
- lpDisplayBuf = (LPVOID)LocalAlloc (LMEM_ZEROINIT,
135
- (lstrlen ((LPCTSTR)lpMsgBuf) + 40 ) * sizeof (TCHAR));
136
- StringCchPrintf ((LPTSTR)lpDisplayBuf,
137
- LocalSize (lpDisplayBuf) / sizeof (TCHAR),
138
- TEXT (" failed with error %d: %s" ),
139
- error_code, lpMsgBuf);
140
- }
141
- HANDLE hFile = (HANDLE)_get_osfhandle (addr->fd );
142
- FlushFileBuffers (hFile);
143
- }
144
-
145
-
146
115
static struct magic *mag;
147
116
148
117
static inline void spin_lock (std::atomic<unsigned > &lock) {
149
- while (! lock.exchange (1 , std::memory_order_acquire));
118
+ while (lock.exchange (1 , std::memory_order_acquire));
150
119
}
151
120
152
121
static inline void spin_unlock (std::atomic<unsigned > &lock) {
@@ -162,62 +131,64 @@ static void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t
162
131
}
163
132
164
133
static void magic_commit (void ) {
165
- mag->offset = mag->capacity ;
166
- mag->commit = mag->capacity ;
134
+ mag->commit = ROUNDUP (mag->offset , MAGIC_GRAN);
167
135
mag->magic = 0xFEEDABEE ;
168
- bool success = msync (mag, mag->commit , MS_ASYNC);
136
+ if (msync (mag, mag->commit , MS_ASYNC) == -1 ) {
137
+ perror (" msync" );
138
+ exit (77 );
139
+ }
169
140
}
170
141
171
142
static void magic_init (void ) {
172
143
int fd;
173
144
size_t n;
174
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
175
- struct stat st;
176
- #else
177
- struct _stat64 st;
178
- #endif
145
+ int64_t size;
179
146
if (mag) return ;
180
147
n = ROUNDUP (sizeof (struct magic ), MAGIC_GRAN);
181
148
if ((fd = open (MAGIC_PATH, O_RDWR)) != -1 ) {
182
- int result = fstat (fd, &st);
183
- int error = errno;
184
- if (errno == EBADF)
185
- fprintf (stderr, " Bad file descriptor.\n " );
186
- else if (errno == EINVAL)
187
- fprintf (stderr, " Invalid argument to _fstat.\n " );
188
- if (st.st_size >= n) {
149
+ if ((size = lseek (fd, 0 , SEEK_END)) == -1 ) {
150
+ perror (" lseek" );
151
+ exit (77 );
152
+ }
153
+ if (size >= n) {
189
154
mag = (struct magic *)Mmap (MAGIC_ADDR, n,
190
155
PROT_READ | PROT_WRITE,
191
156
MAP_PRIVATE | MAP_FIXED, fd, 0 );
192
157
if (mag->magic == 0xFEEDABEE ) {
193
- mag = (struct magic *)Mmap (MAGIC_ADDR, mag->capacity ,
158
+ mag = (struct magic *)Mmap (MAGIC_ADDR, mag->commit ,
194
159
PROT_READ | PROT_WRITE,
195
160
MAP_PRIVATE | MAP_FIXED, fd, 0 );
196
161
madvise (MAGIC_ADDR, mag->capacity , MADV_WILLNEED);
197
- ftruncate (fd, mag->commit );
198
162
mag->offset = mag->commit ;
199
163
mag->capacity = mag->commit ;
200
164
mag->fd = -1 ;
201
165
return ;
202
166
}
203
167
}
204
- ftruncate (fd, 0 );
168
+ if (ftruncate (fd, 0 ) == -1 ) {
169
+ perror (" ftruncate" );
170
+ exit (77 );
171
+ }
205
172
} else if ((fd = open (MAGIC_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644 )) == -1 ) {
206
173
perror (MAGIC_PATH);
207
174
exit (77 );
208
175
}
209
- ftruncate (fd, n);
176
+ if (ftruncate (fd, n) == -1 ) {
177
+ perror (" ftruncate" );
178
+ exit (77 );
179
+ }
210
180
mag = (struct magic *)Mmap (MAGIC_ADDR, n,
211
181
PROT_READ | PROT_WRITE,
212
182
MAP_SHARED | MAP_FIXED, fd, 0 );
213
- mag->offset = MAGIC_GRAN;
183
+ mag->offset = n;
184
+ mag->capacity = n;
214
185
mag->fd = fd;
215
186
}
216
187
217
- void *memalign (size_t a, size_t n) {
188
+ void *magic_memalign (size_t a, size_t n) {
218
189
void *p;
219
- size_t i, j, k, m;
220
190
static int count;
191
+ size_t i, j, k, m, c2;
221
192
magic_init ();
222
193
if (a < MAGIC_ALGN) a = MAGIC_ALGN;
223
194
while (!IS2POW (a)) ++a;
@@ -227,85 +198,82 @@ void *memalign(size_t a, size_t n) {
227
198
i = i + sizeof (size_t );
228
199
i = ROUNDUP (i, a);
229
200
j = ROUNDUP (i + m, MAGIC_GRAN);
230
- // if (j > mag->capacity) {
201
+ if (j > mag->capacity ) {
202
+ c2 = mag->capacity ;
203
+ if (!c2) {
204
+ c2 = MAGIC_GRAN;
205
+ }
206
+ while (j > c2) {
207
+ c2 += c2 >> 4 ;
208
+ c2 = ROUNDUP (c2, MAGIC_GRAN);
209
+ }
231
210
if (!mag->magic ) {
232
- int result = ftruncate (mag->fd , j);
211
+ if (ftruncate (mag->fd , c2) == -1 ) {
212
+ perror (" ftruncate" );
213
+ spin_unlock (mag->lock );
214
+ return 0 ;
215
+ }
233
216
p = mmap (MAGIC_ADDR + mag->capacity ,
234
- j - mag->capacity , PROT_READ | PROT_WRITE,
217
+ c2 - mag->capacity , PROT_READ | PROT_WRITE,
235
218
MAP_SHARED | MAP_FIXED, mag->fd , mag->capacity );
236
219
} else {
237
220
p = mmap (MAGIC_ADDR + mag->capacity ,
238
- j - mag->capacity , PROT_READ | PROT_WRITE,
239
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 );
221
+ c2 - mag->capacity , PROT_READ | PROT_WRITE,
222
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 );
240
223
}
241
224
if (p != MAP_FAILED) {
242
- mag->capacity = j ;
225
+ mag->capacity = c2 ;
243
226
} else {
227
+ perror (" mmap" );
244
228
spin_unlock (mag->lock );
245
229
return 0 ;
246
230
}
247
- // }
231
+ }
248
232
mag->offset = i + m;
249
233
spin_unlock (mag->lock );
250
234
p = MAGIC_ADDR + i;
251
235
((size_t *)p)[-1 ] = n;
252
236
return p;
253
237
}
254
238
255
- void *_malloc (size_t n) {
256
- return memalign (MAGIC_ALGN, n);
257
- }
258
-
259
- size_t malloc_usable_size (const void *p) {
260
- return ((const size_t *)p)[-1 ];
239
+ void *magic_malloc (size_t n) {
240
+ return magic_memalign (MAGIC_ALGN, n);
261
241
}
262
242
263
- void *_calloc (size_t n, size_t z) {
243
+ void *magic_calloc (size_t n, size_t z) {
264
244
void *p;
265
- if ((p = _malloc ((n *= z)))) {
245
+ if ((p = magic_malloc ((n *= z)))) {
266
246
memset (p, 0 , n);
267
247
}
268
248
return p;
269
249
}
270
250
271
- void _free (void *p) {
251
+ void magic_free (void *p) {
272
252
// do nothing
273
253
}
274
254
275
- void *_realloc (void *p, size_t n) {
255
+ void *magic_realloc (void *p, size_t n) {
276
256
void *q;
277
257
if (!p) {
278
- return _malloc (n);
258
+ return magic_malloc (n);
279
259
}
280
260
if (!n) {
281
- _free (p);
261
+ magic_free (p);
282
262
return 0 ;
283
263
}
284
- if ((q = _malloc (n))) {
264
+ if ((q = magic_malloc (n))) {
285
265
memcpy (q, p, ((const size_t *)p)[-1 ]);
286
266
}
287
267
return q;
288
268
}
289
269
290
- #if defined(malloc)
291
- # undef malloc
292
- #endif
293
- #define malloc (x ) _malloc(x)
294
-
295
- #if defined(calloc)
296
- # undef calloc
297
- #endif
298
- #define calloc (x ) _calloc(x)
299
-
300
- #if defined(realloc)
301
- # undef realloc
302
- #endif
303
- #define realloc (x ) _realloc(x)
270
+ void * operator new (size_t size) {
271
+ return magic_malloc (size);
272
+ }
304
273
305
- #if defined(free)
306
- # undef free
307
- #endif
308
- #define free (x ) _free(x)
274
+ void operator delete (void * p) {
275
+ magic_free (p);
276
+ }
309
277
310
278
// load the model's weights from a file
311
279
bool llama_model_load (const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
@@ -451,7 +419,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
451
419
{
452
420
struct ggml_init_params params = {
453
421
/* .mem_size =*/ ctx_size,
454
- /* .mem_buffer =*/ NULL ,
422
+ /* .mem_buffer =*/ magic_malloc (ctx_size) ,
455
423
};
456
424
457
425
model.ctx = ggml_init (params);
@@ -772,15 +740,15 @@ bool llama_eval(
772
740
const int d_key = n_embd/n_head;
773
741
774
742
static size_t buf_size = 512u *1024 *1024 ;
775
- static void * buf = _malloc (buf_size);
743
+ static void * buf = malloc (buf_size);
776
744
777
745
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
778
746
const size_t buf_size_new = 1.1 *(mem_per_token*N); // add 10% to account for ggml object overhead
779
747
// fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
780
748
781
749
// reallocate
782
750
buf_size = buf_size_new;
783
- buf = _realloc (buf, buf_size);
751
+ buf = realloc (buf, buf_size);
784
752
if (buf == nullptr ) {
785
753
fprintf (stderr, " %s: failed to allocate %zu bytes\n " , __func__, buf_size);
786
754
return false ;
0 commit comments