27
27
#define die_fmt (fmt, ...) do { fprintf (stderr, " error: " fmt " \n " , __VA_ARGS__); exit (1 ); } while (0 )
28
28
29
29
#define print_build_info () do { \
30
- fprintf (stderr, " %s: build = %d (%s)\n " , __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
30
+ fprintf (stderr, " %s: build = %d (%s)\n " , __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
31
31
fprintf (stderr, " %s: built with %s for %s\n " , __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
32
32
} while (0 )
33
33
34
34
#define DEFAULT_MODEL_PATH " models/7B/ggml-model-f16.gguf"
35
35
36
36
// build info
37
37
extern int LLAMA_BUILD_NUMBER;
38
- extern char const *LLAMA_COMMIT;
39
- extern char const *LLAMA_COMPILER;
40
- extern char const *LLAMA_BUILD_TARGET;
38
+ extern char const * LLAMA_COMMIT;
39
+ extern char const * LLAMA_COMPILER;
40
+ extern char const * LLAMA_BUILD_TARGET;
41
41
42
42
struct llama_control_vector_load_info ;
43
43
44
- int get_math_cpu_count ();
45
- int32_t get_num_physical_cores ();
46
-
47
44
#define print_build_info () do { \
48
45
fprintf (stderr, " %s: build = %d (%s)\n " , __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
49
46
fprintf (stderr, " %s: built with %s for %s\n " , __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
@@ -55,14 +52,21 @@ extern char const *LLAMA_COMMIT;
55
52
extern char const *LLAMA_COMPILER;
56
53
extern char const *LLAMA_BUILD_TARGET;
57
54
55
+ //
56
+ // CPU utils
57
+ //
58
+
59
+ int32_t cpu_get_num_physical_cores ();
60
+ int32_t cpu_get_num_math ();
61
+
58
62
//
59
63
// CLI argument parsing
60
64
//
61
65
62
66
struct gpt_params {
63
67
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
64
68
65
- int32_t n_threads = get_math_cpu_count ();
69
+ int32_t n_threads = cpu_get_num_math ();
66
70
int32_t n_threads_draft = -1 ;
67
71
int32_t n_threads_batch = -1 ; // number of threads to use for batch processing (-1 = use n_threads)
68
72
int32_t n_threads_batch_draft = -1 ;
@@ -93,6 +97,7 @@ struct gpt_params {
93
97
float yarn_beta_slow = 1 .0f ; // YaRN high correction dim
94
98
int32_t yarn_orig_ctx = 0 ; // YaRN original context length
95
99
float defrag_thold = -1 .0f ; // KV cache defragmentation threshold
100
+ std::string rpc_servers = " " ; // comma separated list of RPC servers
96
101
97
102
lm_ggml_backend_sched_eval_callback cb_eval = nullptr ;
98
103
void * cb_eval_user_data = nullptr ;
@@ -151,6 +156,9 @@ struct gpt_params {
151
156
bool random_prompt = false ; // do not randomize prompt if none provided
152
157
bool use_color = false ; // use color to distinguish generations and inputs
153
158
bool interactive = false ; // interactive mode
159
+ bool interactive_specials = false ; // whether to allow special tokens from user, during interactive mode
160
+ bool special = false ; // enable special token output
161
+ bool conversation = false ; // conversation mode (does not print special tokens and suffix/prefix)
154
162
bool chatml = false ; // chatml mode (used for models trained on chatml syntax)
155
163
bool prompt_cache_all = false ; // save user input and generations to prompt cache
156
164
bool prompt_cache_ro = false ; // open the prompt cache read-only and do not update it
@@ -187,33 +195,34 @@ struct gpt_params {
187
195
188
196
void gpt_params_handle_model_default (gpt_params & params);
189
197
190
- bool parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
191
-
192
- bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
198
+ bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
199
+ bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
200
+ bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
201
+ void gpt_params_print_usage (int argc, char ** argv, const gpt_params & params);
193
202
194
- bool gpt_params_parse ( int argc, char ** argv, gpt_params & params);
203
+ std::string gpt_params_get_system_info ( const gpt_params & params);
195
204
196
- void gpt_print_usage (int argc, char ** argv, const gpt_params & params);
197
-
198
- bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
199
-
200
- std::string get_system_info (const gpt_params & params);
205
+ //
206
+ // String utils
207
+ //
201
208
202
- std::string gpt_random_prompt (std::mt19937 & rng );
209
+ std::vector<std:: string> string_split (std::string input, char separator );
203
210
204
- void process_escapes (std::string& input);
211
+ std::string string_strip (const std::string & str);
212
+ std::string string_get_sortable_timestamp ();
213
+ std::string string_random_prompt (std::mt19937 & rng);
205
214
206
- bool validate_file_name (const std::string & filename);
215
+ bool string_parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
216
+ void string_process_escapes (std::string & input);
207
217
208
218
//
209
- // String utils
219
+ // Filesystem utils
210
220
//
211
221
212
- std::vector<llama_sampler_type> sampler_types_from_names (const std::vector<std::string> & names, bool allow_alt_names);
213
- std::vector<llama_sampler_type> sampler_types_from_chars (const std::string & names_string);
214
- std::vector<std::string> string_split (std::string input, char separator);
215
- std::string string_strip (const std::string & str);
216
- std::string sampler_type_to_name_string (llama_sampler_type sampler_type);
222
+ bool fs_validate_filename (const std::string & filename);
223
+ bool fs_create_directory_with_parents (const std::string & path);
224
+
225
+ std::string fs_get_cache_directory ();
217
226
218
227
//
219
228
// Model utils
@@ -284,29 +293,15 @@ std::string llama_detokenize_bpe(
284
293
// defaults to true when model type is SPM, otherwise false.
285
294
bool llama_should_add_bos_token (const llama_model * model);
286
295
287
- //
288
- // YAML utils
289
- //
290
-
291
- bool create_directory_with_parents (const std::string & path);
292
- void dump_vector_float_yaml (FILE * stream, const char * prop_name, const std::vector<float > & data);
293
- void dump_vector_int_yaml (FILE * stream, const char * prop_name, const std::vector<int > & data);
294
- void dump_string_yaml_multiline (FILE * stream, const char * prop_name, const char * data);
295
- std::string get_sortable_timestamp ();
296
-
297
- void dump_non_result_info_yaml (
298
- FILE * stream, const gpt_params & params, const llama_context * lctx,
299
- const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc);
300
-
301
296
//
302
297
// KV cache utils
303
298
//
304
299
305
300
// Dump the KV cache view with the number of sequences per cell.
306
- void dump_kv_cache_view (const llama_kv_cache_view & view, int row_size = 80 );
301
+ void llama_kv_cache_dump_view (const llama_kv_cache_view & view, int row_size = 80 );
307
302
308
303
// Dump the KV cache view showing individual sequences in each cell (long output).
309
- void dump_kv_cache_view_seqs (const llama_kv_cache_view & view, int row_size = 40 );
304
+ void llama_kv_cache_dump_view_seqs (const llama_kv_cache_view & view, int row_size = 40 );
310
305
311
306
//
312
307
// Embedding utils
@@ -340,6 +335,20 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
340
335
//
341
336
// Split utils
342
337
//
338
+
343
339
static const char * const LLM_KV_SPLIT_NO = " split.no" ;
344
340
static const char * const LLM_KV_SPLIT_COUNT = " split.count" ;
345
341
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = " split.tensors.count" ;
342
+
343
+ //
344
+ // YAML utils
345
+ //
346
+
347
+ void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float > & data);
348
+ void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int > & data);
349
+ void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data);
350
+
351
+ void yaml_dump_non_result_info (
352
+ FILE * stream, const gpt_params & params, const llama_context * lctx,
353
+ const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc);
354
+
0 commit comments