@@ -40,9 +40,9 @@ extern "C" {
40
40
typedef int llama_token;
41
41
42
42
typedef struct llama_token_data {
43
- llama_token id; // token id
44
- float logit; // log-odds of the token
45
- float p; // probability of the token
43
+ llama_token id; // token id
44
+ float logit; // log-odds of the token
45
+ float p; // probability of the token
46
46
} llama_token_data;
47
47
48
48
typedef struct llama_token_data_array {
@@ -73,23 +73,30 @@ extern "C" {
73
73
74
74
// model file types
75
75
enum llama_ftype {
76
- LLAMA_FTYPE_ALL_F32 = 0 ,
77
- LLAMA_FTYPE_MOSTLY_F16 = 1 , // except 1d tensors
78
- LLAMA_FTYPE_MOSTLY_Q4_0 = 2 , // except 1d tensors
79
- LLAMA_FTYPE_MOSTLY_Q4_1 = 3 , // except 1d tensors
76
+ LLAMA_FTYPE_ALL_F32 = 0 ,
77
+ LLAMA_FTYPE_MOSTLY_F16 = 1 , // except 1d tensors
78
+ LLAMA_FTYPE_MOSTLY_Q4_0 = 2 , // except 1d tensors
79
+ LLAMA_FTYPE_MOSTLY_Q4_1 = 3 , // except 1d tensors
80
80
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4 , // tok_embeddings.weight and output.weight are F16
81
- // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82
- // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
83
- LLAMA_FTYPE_MOSTLY_Q8_0 = 7 , // except 1d tensors
84
- LLAMA_FTYPE_MOSTLY_Q5_0 = 8 , // except 1d tensors
85
- LLAMA_FTYPE_MOSTLY_Q5_1 = 9 , // except 1d tensors
81
+ // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82
+ // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
83
+ LLAMA_FTYPE_MOSTLY_Q8_0 = 7 , // except 1d tensors
84
+ LLAMA_FTYPE_MOSTLY_Q5_0 = 8 , // except 1d tensors
85
+ LLAMA_FTYPE_MOSTLY_Q5_1 = 9 , // except 1d tensors
86
86
};
87
87
88
88
LLAMA_API struct llama_context_params llama_context_default_params ();
89
89
90
90
LLAMA_API bool llama_mmap_supported ();
91
91
LLAMA_API bool llama_mlock_supported ();
92
92
93
+ // TODO: not great API - very likely to change
94
+ // Initialize the llama + ggml backend
95
+ // Call once at the start of the program
96
+ LLAMA_API void llama_init_backend ();
97
+
98
+ LLAMA_API int64_t llama_time_us ();
99
+
93
100
// Various functions for loading a ggml llama model.
94
101
// Allocate (almost) all memory needed for the model.
95
102
// Return NULL on failure
0 commit comments