- BenchResult
- CompletionBaseParams
- CompletionParams
- CompletionResponseFormat
- ContextParams
- EmbeddingParams
- JinjaFormattedChatResult
- NativeCompletionParams
- NativeCompletionResult
- NativeCompletionResultTimings
- NativeCompletionTokenProb
- NativeCompletionTokenProbItem
- NativeContextParams
- NativeEmbeddingParams
- NativeEmbeddingResult
- NativeLlamaContext
- NativeSessionLoadResult
- NativeTokenizeResult
- RNLlamaMessagePart
- RNLlamaOAICompatibleMessage
- TokenData
- addNativeLogListener
- convertJsonSchemaToGrammar
- initLlama
- loadLlamaModelInfo
- releaseAllLlama
- setContextLimit
- toggleNativeLog
Ƭ BenchResult: Object
Name | Type |
modelDesc |
string |
modelNParams |
number |
modelSize |
number |
ppAvg |
number |
ppStd |
number |
tgAvg |
number |
tgStd |
number |
Ƭ CompletionBaseParams: Object
Name | Type |
chatTemplate? |
string |
chat_template? |
string |
jinja? |
boolean |
messages? |
RNLlamaOAICompatibleMessage [] |
parallel_tool_calls? |
object |
prompt? |
string |
response_format? |
CompletionResponseFormat |
tool_choice? |
string |
tools? |
object |
Ƭ CompletionParams: Omit
, "emit_partial_completion"
| "prompt"
> & CompletionBaseParams
Ƭ CompletionResponseFormat: Object
Name | Type |
json_schema? |
{ schema : object ; strict? : boolean } |
json_schema.schema |
object |
json_schema.strict? |
boolean |
schema? |
object |
type |
"text" | "json_object" | "json_schema" |
Ƭ ContextParams: Omit
, "cache_type_k"
| "cache_type_v"
| "pooling_type"
> & { cache_type_k?
: "f16"
| "f32"
| "q8_0"
| "q4_0"
| "q4_1"
| "iq4_nl"
| "q5_0"
| "q5_1"
; cache_type_v?
: "f16"
| "f32"
| "q8_0"
| "q4_0"
| "q4_1"
| "iq4_nl"
| "q5_0"
| "q5_1"
; pooling_type?
: "none"
| "mean"
| "cls"
| "last"
| "rank"
Ƭ EmbeddingParams: NativeEmbeddingParams
Ƭ JinjaFormattedChatResult: Object
Name | Type |
additional_stops? |
string [] |
chat_format? |
number |
grammar? |
string |
grammar_lazy? |
boolean |
grammar_triggers? |
{ at_start : boolean ; word : string }[] |
preserved_tokens? |
string [] |
prompt |
string |
Ƭ NativeCompletionParams: Object
Name | Type | Description |
chat_format? |
number |
- |
dry_allowed_length? |
number |
Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: 2 |
dry_base? |
number |
Set the DRY repetition penalty base value. Default: 1.75 |
dry_multiplier? |
number |
Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: 0.0 , which is disabled. |
dry_penalty_last_n? |
number |
How many tokens to scan for repetitions. Default: -1 , where 0 is disabled and -1 is context size. |
dry_sequence_breakers? |
string [] |
Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: ['\n', ':', '"', '*'] |
emit_partial_completion |
boolean |
- |
grammar? |
string |
Set grammar for grammar-based sampling. Default: no grammar |
grammar_lazy? |
boolean |
Lazy grammar sampling, trigger by grammar_triggers. Default: false |
grammar_triggers? |
{ at_start : boolean ; word : string }[] |
Lazy grammar triggers. Default: [] |
ignore_eos? |
boolean |
Ignore end of stream token and continue generating. Default: false |
json_schema? |
string |
JSON schema for convert to grammar for structured JSON output. It will be override by grammar if both are set. |
logit_bias? |
number [][] |
Modify the likelihood of a token appearing in the generated text completion. For example, use "logit_bias": [[15043,1.0]] to increase the likelihood of the token 'Hello', or "logit_bias": [[15043,-1.0]] to decrease its likelihood. Setting the value to false, "logit_bias": [[15043,false]] ensures that the token Hello is never produced. The tokens can also be represented as strings, e.g.[["Hello, World!",-0.5]] will reduce the likelihood of all the individual tokens that represent the string Hello, World! , just like the presence_penalty does. Default: [] |
min_p? |
number |
The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: 0.05 |
mirostat? |
number |
Enable Mirostat sampling, controlling perplexity during text generation. Default: 0 , where 0 is disabled, 1 is Mirostat, and 2 is Mirostat 2.0. |
mirostat_eta? |
number |
Set the Mirostat learning rate, parameter eta. Default: 0.1 |
mirostat_tau? |
number |
Set the Mirostat target entropy, parameter tau. Default: 5.0 |
n_predict? |
number |
Set the maximum number of tokens to predict when generating text. Note: May exceed the set limit slightly if the last token is a partial multibyte character. When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: -1 , where -1 is infinity. |
n_probs? |
number |
If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings. Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings. Default: 0 |
n_threads? |
number |
- |
penalty_freq? |
number |
Repeat alpha frequency penalty. Default: 0.0 , which is disabled. |
penalty_last_n? |
number |
Last n tokens to consider for penalizing repetition. Default: 64 , where 0 is disabled and -1 is ctx-size. |
penalty_present? |
number |
Repeat alpha presence penalty. Default: 0.0 , which is disabled. |
penalty_repeat? |
number |
Control the repetition of token sequences in the generated text. Default: 1.0 |
preserved_tokens? |
string [] |
- |
prompt |
string |
- |
seed? |
number |
Set the random number generator (RNG) seed. Default: -1 , which is a random seed. |
stop? |
string [] |
Specify a JSON array of stopping strings. These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: [] |
temperature? |
number |
Adjust the randomness of the generated text. Default: 0.8 |
top_k? |
number |
Limit the next token selection to the K most probable tokens. Default: 40 |
top_p? |
number |
Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: 0.95 |
typical_p? |
number |
Enable locally typical sampling with parameter p. Default: 1.0 , which is disabled. |
xtc_probability? |
number |
Set the chance for token removal via XTC sampler. Default: 0.0 , which is disabled. |
xtc_threshold? |
number |
Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: 0.1 (> 0.5 disables XTC) |
Ƭ NativeCompletionResult: Object
Name | Type |
completion_probabilities? |
NativeCompletionTokenProb [] |
stopped_eos |
boolean |
stopped_limit |
number |
stopped_word |
string |
stopping_word |
string |
text |
string |
timings |
NativeCompletionResultTimings |
tokens_cached |
number |
tokens_evaluated |
number |
tokens_predicted |
number |
truncated |
boolean |
Ƭ NativeCompletionResultTimings: Object
Name | Type |
predicted_ms |
number |
predicted_n |
number |
predicted_per_second |
number |
predicted_per_token_ms |
number |
prompt_ms |
number |
prompt_n |
number |
prompt_per_second |
number |
prompt_per_token_ms |
number |
Ƭ NativeCompletionTokenProb: Object
Name | Type |
content |
string |
probs |
NativeCompletionTokenProbItem [] |
Ƭ NativeCompletionTokenProbItem: Object
Name | Type |
prob |
number |
tok_str |
string |
Ƭ NativeContextParams: Object
Name | Type | Description |
cache_type_k? |
string |
KV cache data type for the K (Experimental in llama.cpp) |
cache_type_v? |
string |
KV cache data type for the V (Experimental in llama.cpp) |
chat_template? |
string |
Chat template to override the default one from the model. |
embd_normalize? |
number |
- |
embedding? |
boolean |
- |
flash_attn? |
boolean |
Enable flash attention, only recommended in GPU device (Experimental in llama.cpp) |
is_model_asset? |
boolean |
- |
lora? |
string |
Single LoRA adapter path |
lora_list? |
{ path : string ; scaled? : number }[] |
LoRA adapter list |
lora_scaled? |
number |
Single LoRA adapter scale |
model |
string |
- |
n_batch? |
number |
- |
n_ctx? |
number |
- |
n_gpu_layers? |
number |
Number of layers to store in VRAM (Currently only for iOS) |
n_threads? |
number |
- |
n_ubatch? |
number |
- |
no_gpu_devices? |
boolean |
Skip GPU devices (iOS only) |
pooling_type? |
number |
- |
rope_freq_base? |
number |
- |
rope_freq_scale? |
number |
- |
use_mlock? |
boolean |
- |
use_mmap? |
boolean |
- |
use_progress_callback? |
boolean |
- |
vocab_only? |
boolean |
- |
Ƭ NativeEmbeddingParams: Object
Name | Type |
embd_normalize? |
number |
Ƭ NativeEmbeddingResult: Object
Name | Type |
embedding |
number [] |
Ƭ NativeLlamaContext: Object
Name | Type | Description |
androidLib? |
string |
Loaded library name for Android |
contextId |
number |
- |
gpu |
boolean |
- |
model |
{ chatTemplates : { llamaChat : boolean ; minja : { default : boolean ; defaultCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } ; toolUse : boolean ; toolUseCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } } } ; desc : string ; isChatTemplateSupported : boolean ; metadata : Object ; nEmbd : number ; nParams : number ; size : number } |
- |
model.chatTemplates |
{ llamaChat : boolean ; minja : { default : boolean ; defaultCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } ; toolUse : boolean ; toolUseCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } } } |
- |
model.chatTemplates.llamaChat |
boolean |
- |
model.chatTemplates.minja |
{ default : boolean ; defaultCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } ; toolUse : boolean ; toolUseCaps : { parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } } |
- |
model.chatTemplates.minja.default |
boolean |
- |
model.chatTemplates.minja.defaultCaps |
{ parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } |
- |
model.chatTemplates.minja.defaultCaps.parallelToolCalls |
boolean |
- |
model.chatTemplates.minja.defaultCaps.systemRole |
boolean |
- |
model.chatTemplates.minja.defaultCaps.toolCallId |
boolean |
- |
model.chatTemplates.minja.defaultCaps.toolCalls |
boolean |
- |
model.chatTemplates.minja.defaultCaps.toolResponses |
boolean |
- |
model.chatTemplates.minja.defaultCaps.tools |
boolean |
- |
model.chatTemplates.minja.toolUse |
boolean |
- |
model.chatTemplates.minja.toolUseCaps |
{ parallelToolCalls : boolean ; systemRole : boolean ; toolCallId : boolean ; toolCalls : boolean ; toolResponses : boolean ; tools : boolean } |
- |
model.chatTemplates.minja.toolUseCaps.parallelToolCalls |
boolean |
- |
model.chatTemplates.minja.toolUseCaps.systemRole |
boolean |
- |
model.chatTemplates.minja.toolUseCaps.toolCallId |
boolean |
- |
model.chatTemplates.minja.toolUseCaps.toolCalls |
boolean |
- |
model.chatTemplates.minja.toolUseCaps.toolResponses |
boolean |
- |
model.chatTemplates.minja.toolUseCaps.tools |
boolean |
- |
model.desc |
string |
- |
model.isChatTemplateSupported |
boolean |
- |
model.metadata |
Object |
- |
model.nEmbd |
number |
- |
model.nParams |
number |
- |
model.size |
number |
- |
reasonNoGPU |
string |
- |
Ƭ NativeSessionLoadResult: Object
Name | Type |
prompt |
string |
tokens_loaded |
number |
Ƭ NativeTokenizeResult: Object
Name | Type |
tokens |
number [] |
Ƭ RNLlamaMessagePart: Object
Name | Type |
text? |
string |
Ƭ RNLlamaOAICompatibleMessage: Object
Name | Type |
content? |
string | RNLlamaMessagePart [] | any |
role |
string |
Ƭ TokenData: Object
Name | Type |
completion_probabilities? |
NativeCompletionTokenProb [] |
token |
string |
▸ addNativeLogListener(listener
): Object
Name | Type |
listener |
(level : string , text : string ) => void |
Name | Type |
remove |
() => void |
▸ convertJsonSchemaToGrammar(«destructured»
): string
| Promise
Name | Type |
«destructured» |
Object |
› allowFetch? |
boolean |
› dotall? |
boolean |
› propOrder? |
SchemaGrammarConverterPropOrder |
› schema |
any |
| Promise
▸ initLlama(«destructured»
, onProgress?
): Promise
Name | Type |
«destructured» |
ContextParams |
onProgress? |
(progress : number ) => void |
▸ loadLlamaModelInfo(model
): Promise
Name | Type |
model |
string |
▸ releaseAllLlama(): Promise
▸ setContextLimit(limit
): Promise
Name | Type |
limit |
number |
▸ toggleNativeLog(enabled
): Promise
Name | Type |
enabled |
boolean |