@@ -1166,50 +1166,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
11661166 ctx->info [tensor_id].t .data = (void *)(uintptr_t )data; // double cast suppresses warning about casting away const
11671167}
11681168
1169- struct gguf_writer {
1170- std::vector<int8_t > & buf;
1169+ struct gguf_writer_base {
1170+ size_t written_bytes {0u };
1171+
1172+ ~gguf_writer_base (void ) {}
11711173
1172- gguf_writer (std::vector<int8_t > & buf) : buf(buf) {}
1174+ // we bet on devirtualization
1175+ virtual void write (int8_t val) = 0;
1176+ virtual void write (const std::vector<int8_t > & val) = 0;
1177+ virtual void write_tensor_data (const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
11731178
11741179 template <typename T>
1175- void write (const T & val) const {
1180+ void write (const T & val) {
11761181 for (size_t i = 0 ; i < sizeof (val); ++i) {
1177- buf. push_back (reinterpret_cast <const int8_t *>(&val)[i]);
1182+ write (reinterpret_cast <const int8_t *>(&val)[i]);
11781183 }
11791184 }
11801185
1181- void write (const std::vector<int8_t > & val) const {
1182- buf.insert (buf.end (), val.begin (), val.end ());
1183- }
1184-
1185- void write (const bool & val) const {
1186+ void write (const bool & val) {
11861187 const int8_t val8 = val ? 1 : 0 ;
11871188 write (val8);
11881189 }
11891190
1190- void write (const std::string & val) const {
1191+ void write (const std::string & val) {
11911192 {
11921193 const uint64_t n = val.length ();
11931194 write (n);
11941195 }
11951196 for (size_t i = 0 ; i < val.length (); ++i) {
1196- buf. push_back ( reinterpret_cast < const int8_t *> (val.data ())[i]);
1197+ write ( (val.data ())[i]);
11971198 }
11981199 }
11991200
1200- void write (const char * val) const {
1201+ void write (const char * val) {
12011202 write (std::string (val));
12021203 }
12031204
1204- void write (const enum ggml_type & val) const {
1205+ void write (const enum ggml_type & val) {
12051206 write (int32_t (val));
12061207 }
12071208
1208- void write (const enum gguf_type & val) const {
1209+ void write (const enum gguf_type & val) {
12091210 write (int32_t (val));
12101211 }
12111212
1212- void write (const struct gguf_kv & kv) const {
1213+ void write (const struct gguf_kv & kv) {
12131214 const uint64_t ne = kv.get_ne ();
12141215
12151216 write (kv.get_key ());
@@ -1250,7 +1251,7 @@ struct gguf_writer {
12501251 }
12511252 }
12521253
1253- void write_tensor_meta (const struct gguf_tensor_info & info) const {
1254+ void write_tensor_meta (const struct gguf_tensor_info & info) {
12541255 write (info.t .name );
12551256
12561257 const uint32_t n_dims = ggml_n_dims (&info.t );
@@ -1263,14 +1264,33 @@ struct gguf_writer {
12631264 write (info.offset );
12641265 }
12651266
1266- void pad (const size_t alignment) const {
1267- while (buf. size () % alignment != 0 ) {
1267+ void pad (const size_t alignment) {
1268+ while (written_bytes % alignment != 0 ) {
12681269 const int8_t zero = 0 ;
12691270 write (zero);
12701271 }
12711272 }
1273+ };
1274+
1275+ // vector buffer based writer
1276+ struct gguf_writer_buf final : public gguf_writer_base {
1277+ std::vector<int8_t > & buf;
1278+
1279+ gguf_writer_buf (std::vector<int8_t > & buf) : buf(buf) {}
1280+
1281+ using gguf_writer_base::write;
1282+
1283+ void write (const int8_t val) override {
1284+ buf.push_back (val);
1285+ written_bytes++;
1286+ }
12721287
1273- void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
1288+ void write (const std::vector<int8_t > & val) override {
1289+ buf.insert (buf.end (), val.begin (), val.end ());
1290+ written_bytes += val.size ();
1291+ }
1292+
1293+ void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
12741294 GGML_ASSERT (buf.size () - offset_data == info.offset );
12751295
12761296 GGML_ASSERT (ggml_is_contiguous (&info.t ));
@@ -1284,14 +1304,58 @@ struct gguf_writer {
12841304 GGML_ASSERT (info.t .data );
12851305 memcpy (buf.data () + offset, info.t .data , nbytes);
12861306 }
1307+ written_bytes += nbytes;
12871308
12881309 pad (alignment);
12891310 }
12901311};
12911312
1292- void gguf_write_to_buf (const struct gguf_context * ctx, std::vector<int8_t > & buf, bool only_meta) {
1293- const struct gguf_writer gw (buf);
1313+ // file based writer
1314+ struct gguf_writer_file final : public gguf_writer_base {
1315+ FILE * file;
1316+
1317+ gguf_writer_file (FILE* file) : file(file) {}
1318+
1319+ using gguf_writer_base::write;
1320+
1321+ void write (const int8_t val) override {
1322+ const auto real_val = static_cast <uint8_t >(val);
1323+ const auto ret = fputc (real_val, file);
1324+ written_bytes++;
1325+ if (ret != real_val) {
1326+ throw std::runtime_error (" unexpected fputc result '" + std::to_string (ret) + " ' instead of '" + std::to_string ((int )real_val) + " '" );
1327+ }
1328+ }
1329+
1330+ void write (const std::vector<int8_t > & val) override {
1331+ const auto ret = fwrite (val.data (), 1 , val.size (), file);
1332+ written_bytes += val.size ();
1333+ if (ret != val.size ()) {
1334+ throw std::runtime_error (" unexpected fwrite number of bytes written, '" + std::to_string (ret) + " ' instead of '" + std::to_string (val.size ()) + " '" );
1335+ }
1336+ }
1337+
1338+ void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1339+ GGML_ASSERT (written_bytes - offset_data == info.offset );
1340+
1341+ GGML_ASSERT (ggml_is_contiguous (&info.t ));
1342+ const size_t nbytes = ggml_nbytes (&info.t );
12941343
1344+ std::vector<int8_t > buf (nbytes);
1345+ if (info.t .buffer ) {
1346+ ggml_backend_tensor_get (&info.t , buf.data (), 0 , nbytes);
1347+ } else {
1348+ GGML_ASSERT (info.t .data );
1349+ memcpy (buf.data (), info.t .data , nbytes);
1350+ }
1351+ write (buf);
1352+
1353+ pad (alignment);
1354+ }
1355+ };
1356+
1357+ template <typename writer_t >
1358+ static void gguf_write_out (const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
12951359 const int64_t n_kv = gguf_get_n_kv (ctx);
12961360 const int64_t n_tensors = gguf_get_n_tensors (ctx);
12971361
@@ -1321,14 +1385,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
13211385 return ;
13221386 }
13231387
1324- const size_t offset_data = gw.buf . size () ;
1388+ const size_t offset_data = gw.written_bytes ;
13251389
13261390 // write tensor data
13271391 for (int64_t i = 0 ; i < n_tensors; ++i) {
13281392 gw.write_tensor_data (ctx->info [i], offset_data, ctx->alignment );
13291393 }
13301394}
13311395
1396+ void gguf_write_to_buf (const struct gguf_context * ctx, std::vector<int8_t > & buf, bool only_meta) {
1397+ gguf_writer_buf gw (buf);
1398+ gguf_write_out (ctx, gw, only_meta);
1399+ }
1400+
13321401bool gguf_write_to_file (const struct gguf_context * ctx, const char * fname, bool only_meta) {
13331402 FILE * file = ggml_fopen (fname, " wb" );
13341403
@@ -1337,11 +1406,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
13371406 return false ;
13381407 }
13391408
1340- std::vector<int8_t > buf;
1341- gguf_write_to_buf (ctx, buf, only_meta);
1342- const bool ok = fwrite (buf.data (), 1 , buf.size (), file) == buf.size ();
1409+ try {
1410+ gguf_writer_file gw (file);
1411+ gguf_write_out (ctx, gw, only_meta);
1412+ } catch (const std::runtime_error& ex) {
1413+ GGML_LOG_ERROR (" %s: failed to write GGUF data into '%s': %s\n " , __func__, fname, ex.what ());
1414+ fclose (file);
1415+ return false ;
1416+ }
1417+
13431418 fclose (file);
1344- return ok ;
1419+ return true ;
13451420}
13461421
13471422size_t gguf_get_meta_size (const struct gguf_context * ctx) {
0 commit comments