31
31
fprintf (stderr, " %s: built with %s for %s\n " , __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
32
32
} while (0 )
33
33
34
+ #define DEFAULT_MODEL_PATH " models/7B/ggml-model-f16.gguf"
35
+
34
36
// build info
35
37
extern int LLAMA_BUILD_NUMBER;
36
38
extern char const *LLAMA_COMMIT;
@@ -103,7 +105,7 @@ struct gpt_params {
103
105
// // sampling parameters
104
106
struct llama_sampling_params sparams;
105
107
106
- std::string model = " models/7B/ggml-model-f16.gguf " ; // model path
108
+ std::string model = " " ; // model path
107
109
std::string model_draft = " " ; // draft model for speculative decoding
108
110
std::string model_alias = " unknown" ; // model alias
109
111
std::string model_url = " " ; // model url to download
@@ -144,7 +146,7 @@ struct gpt_params {
144
146
bool multiple_choice = false ; // compute TruthfulQA score over random tasks from datafile supplied in prompt
145
147
size_t multiple_choice_tasks = 0 ; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed
146
148
147
- bool kl_divergence = false ; // compute KL- divergence
149
+ bool kl_divergence = false ; // compute KL divergence
148
150
149
151
bool random_prompt = false ; // do not randomize prompt if none provided
150
152
bool use_color = false ; // use color to distinguish generations and inputs
@@ -159,6 +161,7 @@ struct gpt_params {
159
161
bool multiline_input = false ; // reverse the usage of `\`
160
162
bool simple_io = false ; // improves compatibility with subprocesses and limited consoles
161
163
bool cont_batching = true ; // insert new sequences for decoding on-the-fly
164
+ bool flash_attn = false ; // flash attention
162
165
163
166
bool input_prefix_bos = false ; // prefix BOS to user inputs, preceding input_prefix
164
167
bool ignore_eos = false ; // ignore generated EOS tokens
@@ -172,15 +175,20 @@ struct gpt_params {
172
175
bool dump_kv_cache = false ; // dump the KV cache contents for debugging purposes
173
176
bool no_kv_offload = false ; // disable KV offloading
174
177
bool warmup = true ; // warmup run
178
+ bool check_tensors = false ; // validate tensor data
175
179
176
180
std::string cache_type_k = " f16" ; // KV cache data type for the K
177
181
std::string cache_type_v = " f16" ; // KV cache data type for the V
178
182
179
183
// multimodal models (see examples/llava)
180
- std::string mmproj = " " ; // path to multimodal projector
181
- std::string image = " " ; // path to an image file
184
+ std::string mmproj = " " ; // path to multimodal projector
185
+ std::vector<std:: string> image; // path to image file(s)
182
186
};
183
187
188
+ void gpt_params_handle_model_default (gpt_params & params);
189
+
190
+ bool parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
191
+
184
192
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
185
193
186
194
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
@@ -204,6 +212,7 @@ bool validate_file_name(const std::string & filename);
204
212
std::vector<llama_sampler_type> sampler_types_from_names (const std::vector<std::string> & names, bool allow_alt_names);
205
213
std::vector<llama_sampler_type> sampler_types_from_chars (const std::string & names_string);
206
214
std::vector<std::string> string_split (std::string input, char separator);
215
+ std::string string_strip (const std::string & str);
207
216
std::string sampler_type_to_name_string (llama_sampler_type sampler_type);
208
217
209
218
//
0 commit comments