@@ -1166,50 +1166,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
11661166    ctx->info [tensor_id].t .data  = (void  *)(uintptr_t )data; //  double cast suppresses warning about casting away const
11671167}
11681168
1169- struct  gguf_writer  {
1170-     std::vector<int8_t > & buf;
1169+ struct  gguf_writer_base  {
1170+     size_t  written_bytes {0u };
1171+ 
1172+     ~gguf_writer_base (void ) {}
11711173
1172-     gguf_writer (std::vector<int8_t > & buf) : buf(buf) {}
1174+     //  we bet on devirtualization
1175+     virtual  void  write (int8_t  val) = 0;
1176+     virtual  void  write (const  std::vector<int8_t > & val) = 0;
1177+     virtual  void  write_tensor_data (const  struct  gguf_tensor_info  & info, size_t  offset_data, size_t  alignment) = 0;
11731178
11741179    template  <typename  T>
1175-     void  write (const  T & val) const   {
1180+     void  write (const  T & val) {
11761181        for  (size_t  i = 0 ; i < sizeof (val); ++i) {
1177-             buf. push_back (reinterpret_cast <const  int8_t  *>(&val)[i]);
1182+             write (reinterpret_cast <const  int8_t  *>(&val)[i]);
11781183        }
11791184    }
11801185
1181-     void  write (const  std::vector<int8_t > & val) const  {
1182-         buf.insert (buf.end (), val.begin (), val.end ());
1183-     }
1184- 
1185-     void  write (const  bool  & val) const  {
1186+     void  write (const  bool  & val) {
11861187        const  int8_t  val8 = val ? 1  : 0 ;
11871188        write (val8);
11881189    }
11891190
1190-     void  write (const  std::string & val) const   {
1191+     void  write (const  std::string & val) {
11911192        {
11921193            const  uint64_t  n = val.length ();
11931194            write (n);
11941195        }
11951196        for  (size_t  i = 0 ; i < val.length (); ++i) {
1196-             buf. push_back ( reinterpret_cast < const   int8_t  *> (val.data ())[i]);
1197+             write ( (val.data ())[i]);
11971198        }
11981199    }
11991200
1200-     void  write (const  char  * val) const   {
1201+     void  write (const  char  * val) {
12011202        write (std::string (val));
12021203    }
12031204
1204-     void  write (const  enum  ggml_type & val) const   {
1205+     void  write (const  enum  ggml_type & val) {
12051206        write (int32_t (val));
12061207    }
12071208
1208-     void  write (const  enum  gguf_type & val) const   {
1209+     void  write (const  enum  gguf_type & val) {
12091210        write (int32_t (val));
12101211    }
12111212
1212-     void  write (const  struct  gguf_kv  & kv) const   {
1213+     void  write (const  struct  gguf_kv  & kv) {
12131214        const  uint64_t  ne = kv.get_ne ();
12141215
12151216        write (kv.get_key ());
@@ -1250,7 +1251,7 @@ struct gguf_writer {
12501251        }
12511252    }
12521253
1253-     void  write_tensor_meta (const  struct  gguf_tensor_info  & info) const   {
1254+     void  write_tensor_meta (const  struct  gguf_tensor_info  & info) {
12541255        write (info.t .name );
12551256
12561257        const  uint32_t  n_dims = ggml_n_dims (&info.t );
@@ -1263,14 +1264,33 @@ struct gguf_writer {
12631264        write (info.offset );
12641265    }
12651266
1266-     void  pad (const  size_t  alignment) const   {
1267-         while  (buf. size ()  % alignment != 0 ) {
1267+     void  pad (const  size_t  alignment) {
1268+         while  (written_bytes  % alignment != 0 ) {
12681269            const  int8_t  zero = 0 ;
12691270            write (zero);
12701271        }
12711272    }
1273+ };
1274+ 
1275+ //  vector buffer based writer
1276+ struct  gguf_writer_buf  final  : public gguf_writer_base {
1277+     std::vector<int8_t > & buf;
1278+ 
1279+     gguf_writer_buf (std::vector<int8_t > & buf) : buf(buf) {}
1280+ 
1281+     using  gguf_writer_base::write;
1282+ 
1283+     void  write (const  int8_t  val) override  {
1284+         buf.push_back (val);
1285+         written_bytes++;
1286+     }
12721287
1273-     void  write_tensor_data (const  struct  gguf_tensor_info  & info, const  size_t  offset_data, const  size_t  alignment) const  {
1288+     void  write (const  std::vector<int8_t > & val) override  {
1289+         buf.insert (buf.end (), val.begin (), val.end ());
1290+         written_bytes += val.size ();
1291+     }
1292+ 
1293+     void  write_tensor_data (const  struct  gguf_tensor_info  & info, const  size_t  offset_data, const  size_t  alignment) override  {
12741294        GGML_ASSERT (buf.size () - offset_data == info.offset );
12751295
12761296        GGML_ASSERT (ggml_is_contiguous (&info.t ));
@@ -1284,14 +1304,58 @@ struct gguf_writer {
12841304            GGML_ASSERT (info.t .data );
12851305            memcpy (buf.data () + offset, info.t .data , nbytes);
12861306        }
1307+         written_bytes += nbytes;
12871308
12881309        pad (alignment);
12891310    }
12901311};
12911312
1292- void  gguf_write_to_buf (const  struct  gguf_context  * ctx, std::vector<int8_t > & buf, bool  only_meta) {
1293-     const  struct  gguf_writer  gw (buf);
1313+ //  file based writer
1314+ struct  gguf_writer_file  final  : public gguf_writer_base {
1315+     FILE * file;
1316+ 
1317+     gguf_writer_file (FILE* file) : file(file) {}
1318+ 
1319+     using  gguf_writer_base::write;
1320+ 
1321+     void  write (const  int8_t  val) override  {
1322+         const  auto  real_val = static_cast <uint8_t >(val);
1323+         const  auto  ret = fputc (real_val, file);
1324+         written_bytes++;
1325+         if  (ret != real_val) {
1326+             throw  std::runtime_error (" unexpected fputc result '"   + std::to_string (ret) + " ' instead of '"   + std::to_string ((int )real_val) + " '"  );
1327+         }
1328+     }
1329+ 
1330+     void  write (const  std::vector<int8_t > & val) override  {
1331+         const  auto  ret = fwrite (val.data (), 1 , val.size (), file);
1332+         written_bytes += val.size ();
1333+         if  (ret != val.size ()) {
1334+             throw  std::runtime_error (" unexpected fwrite number of bytes written, '"   + std::to_string (ret) + " ' instead of '"   + std::to_string (val.size ()) + " '"  );
1335+         }
1336+     }
1337+ 
1338+     void  write_tensor_data (const  struct  gguf_tensor_info  & info, const  size_t  offset_data, const  size_t  alignment) override  {
1339+         GGML_ASSERT (written_bytes - offset_data == info.offset );
1340+ 
1341+         GGML_ASSERT (ggml_is_contiguous (&info.t ));
1342+         const  size_t  nbytes = ggml_nbytes (&info.t );
12941343
1344+         std::vector<int8_t > buf (nbytes);
1345+         if  (info.t .buffer ) {
1346+             ggml_backend_tensor_get (&info.t , buf.data (), 0 , nbytes);
1347+         } else  {
1348+             GGML_ASSERT (info.t .data );
1349+             memcpy (buf.data (), info.t .data , nbytes);
1350+         }
1351+         write (buf);
1352+ 
1353+         pad (alignment);
1354+     }
1355+ };
1356+ 
1357+ template  <typename  writer_t >
1358+ static  void  gguf_write_out (const  struct  gguf_context  * ctx, writer_t  & gw, bool  only_meta) {
12951359    const  int64_t  n_kv      = gguf_get_n_kv (ctx);
12961360    const  int64_t  n_tensors = gguf_get_n_tensors (ctx);
12971361
@@ -1321,14 +1385,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
13211385        return ;
13221386    }
13231387
1324-     const  size_t  offset_data = gw.buf . size () ;
1388+     const  size_t  offset_data = gw.written_bytes ;
13251389
13261390    //  write tensor data
13271391    for  (int64_t  i = 0 ; i < n_tensors; ++i) {
13281392        gw.write_tensor_data (ctx->info [i], offset_data, ctx->alignment );
13291393    }
13301394}
13311395
1396+ void  gguf_write_to_buf (const  struct  gguf_context  * ctx, std::vector<int8_t > & buf, bool  only_meta) {
1397+     gguf_writer_buf gw (buf);
1398+     gguf_write_out (ctx, gw, only_meta);
1399+ }
1400+ 
13321401bool  gguf_write_to_file (const  struct  gguf_context  * ctx, const  char  * fname, bool  only_meta) {
13331402    FILE * file = ggml_fopen (fname, " wb"  );
13341403
@@ -1337,11 +1406,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
13371406        return  false ;
13381407    }
13391408
1340-     std::vector<int8_t > buf;
1341-     gguf_write_to_buf (ctx, buf, only_meta);
1342-     const  bool  ok = fwrite (buf.data (), 1 , buf.size (), file) == buf.size ();
1409+     try  {
1410+         gguf_writer_file gw (file);
1411+         gguf_write_out (ctx, gw, only_meta);
1412+     } catch  (const  std::runtime_error& ex) {
1413+         GGML_LOG_ERROR (" %s: failed to write GGUF data into '%s': %s\n "  , __func__, fname, ex.what ());
1414+         fclose (file);
1415+         return  false ;
1416+     }
1417+ 
13431418    fclose (file);
1344-     return  ok ;
1419+     return  true ;
13451420}
13461421
13471422size_t  gguf_get_meta_size (const  struct  gguf_context  * ctx) {
0 commit comments