Skip to content

Commit 812b91e

Browse files
committed
feat: add reasoning_format param & reasoning_content in completion result
1 parent e475f84 commit 812b91e

File tree

6 files changed

+62
-3
lines changed

6 files changed

+62
-3
lines changed

android/src/main/java/com/rnllama/LlamaContext.java

+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ public LlamaContext(int id, ReactApplicationContext reactContext, ReadableMap pa
7070
params.getString("model"),
7171
// String chat_template,
7272
params.hasKey("chat_template") ? params.getString("chat_template") : "",
73+
// String reasoning_format,
74+
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
7375
// boolean embedding,
7476
params.hasKey("embedding") ? params.getBoolean("embedding") : false,
7577
// int embd_normalize,
@@ -470,6 +472,7 @@ protected static native WritableMap modelInfo(
470472
protected static native long initContext(
471473
String model,
472474
String chat_template,
475+
String reasoning_format,
473476
boolean embedding,
474477
int embd_normalize,
475478
int n_ctx,

android/src/main/jni.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ Java_com_rnllama_LlamaContext_initContext(
223223
jobject thiz,
224224
jstring model_path_str,
225225
jstring chat_template,
226+
jstring reasoning_format,
226227
jboolean embedding,
227228
jint embd_normalize,
228229
jint n_ctx,
@@ -259,6 +260,13 @@ Java_com_rnllama_LlamaContext_initContext(
259260
const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
260261
defaultParams.chat_template = chat_template_chars;
261262

263+
const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
264+
if (reasoning_format_chars == "deepseek") {
265+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
266+
} else {
267+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
268+
}
269+
262270
defaultParams.n_ctx = n_ctx;
263271
defaultParams.n_batch = n_batch;
264272
defaultParams.n_ubatch = n_ubatch;
@@ -326,6 +334,7 @@ Java_com_rnllama_LlamaContext_initContext(
326334

327335
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
328336
env->ReleaseStringUTFChars(chat_template, chat_template_chars);
337+
env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
329338
env->ReleaseStringUTFChars(cache_type_k, cache_type_k_chars);
330339
env->ReleaseStringUTFChars(cache_type_v, cache_type_v_chars);
331340

@@ -884,10 +893,16 @@ Java_com_rnllama_LlamaContext_doCompletion(
884893
llama->is_predicting = false;
885894

886895
auto toolCalls = createWritableArray(env);
896+
std::string reasoningContent = "";
897+
std::string *content = nullptr;
887898
auto toolCallsSize = 0;
888899
if (!llama->is_interrupted) {
889900
try {
890901
common_chat_msg message = common_chat_parse(llama->generated_text, static_cast<common_chat_format>(chat_format));
902+
if (!message.reasoning_content.empty()) {
903+
reasoningContent = message.reasoning_content;
904+
}
905+
content = &message.content;
891906
for (const auto &tc : message.tool_calls) {
892907
auto toolCall = createWriteableMap(env);
893908
putString(env, toolCall, "type", "function");
@@ -908,6 +923,12 @@ Java_com_rnllama_LlamaContext_doCompletion(
908923

909924
auto result = createWriteableMap(env);
910925
putString(env, result, "text", llama->generated_text.c_str());
926+
if (content) {
927+
putString(env, result, "content", content->c_str());
928+
}
929+
if (!reasoningContent.empty()) {
930+
putString(env, result, "reasoning_content", reasoningContent.c_str());
931+
}
911932
if (toolCallsSize > 0) {
912933
putArray(env, result, "tool_calls", toolCalls);
913934
}

cpp/rn-llama.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ common_chat_params llama_rn_context::getFormattedChatWithJinja(
232232
if (!json_schema.empty()) {
233233
inputs.json_schema = json::parse(json_schema);
234234
}
235+
inputs.extract_reasoning = params.reasoning_format != COMMON_REASONING_FORMAT_NONE;
235236
inputs.stream = true;
236237

237238
// If chat_template is provided, create new one and use it (probably slow)

example/src/App.tsx

+4-2
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,12 @@ export default function App() {
144144
initLlama(
145145
{
146146
model: file.uri,
147-
n_ctx: 200,
148147
use_mlock: true,
149148
lora_list: loraFile ? [{ path: loraFile.uri, scaled: 1.0 }] : undefined, // Or lora: loraFile?.uri,
150149

150+
// If use deepseek r1 distill
151+
reasoning_format: 'deepseek',
152+
151153
// Currently only for iOS
152154
n_gpu_layers: Platform.OS === 'ios' ? 99 : 0,
153155
// no_gpu_devices: true, // (iOS only)
@@ -474,7 +476,7 @@ export default function App() {
474476
],
475477
}
476478
// Comment to test:
477-
jinjaParams = undefined
479+
jinjaParams = { jinja: true }
478480
}
479481

480482
// Test area

ios/RNLlamaContext.mm

+16-1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ + (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsig
9090
NSLog(@"chatTemplate: %@", chatTemplate);
9191
}
9292

93+
NSString *reasoningFormat = params[@"reasoning_format"];
94+
if (reasoningFormat && [reasoningFormat isEqualToString:@"deepseek"]) {
95+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
96+
} else {
97+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
98+
}
99+
93100
if (params[@"n_ctx"]) defaultParams.n_ctx = [params[@"n_ctx"] intValue];
94101
if (params[@"use_mlock"]) defaultParams.use_mlock = [params[@"use_mlock"]boolValue];
95102

@@ -610,10 +617,16 @@ - (NSDictionary *)completion:(NSDictionary *)params
610617
const auto timings = llama_perf_context(llama->ctx);
611618

612619
NSMutableArray *toolCalls = nil;
620+
NSString *reasoningContent = nil;
621+
NSString *content = nil;
613622
if (!llama->is_interrupted) {
614623
try {
615624
auto chat_format = params[@"chat_format"] ? [params[@"chat_format"] intValue] : COMMON_CHAT_FORMAT_CONTENT_ONLY;
616625
common_chat_msg message = common_chat_parse(llama->generated_text, static_cast<common_chat_format>(chat_format));
626+
if (!message.reasoning_content.empty()) {
627+
reasoningContent = [NSString stringWithUTF8String:message.reasoning_content.c_str()];
628+
}
629+
content = [NSString stringWithUTF8String:message.content.c_str()];
617630
toolCalls = [[NSMutableArray alloc] init];
618631
for (const auto &tc : message.tool_calls) {
619632
[toolCalls addObject:@{
@@ -631,7 +644,9 @@ - (NSDictionary *)completion:(NSDictionary *)params
631644
}
632645

633646
NSMutableDictionary *result = [[NSMutableDictionary alloc] init];
634-
result[@"text"] = [NSString stringWithUTF8String:llama->generated_text.c_str()];
647+
result[@"text"] = [NSString stringWithUTF8String:llama->generated_text.c_str()]; // Original text
648+
if (content) result[@"content"] = content;
649+
if (reasoningContent) result[@"reasoning_content"] = reasoningContent;
635650
if (toolCalls && toolCalls.count > 0) result[@"tool_calls"] = toolCalls;
636651
result[@"completion_probabilities"] = [self tokenProbsToDict:llama->generated_token_probs];
637652
result[@"tokens_predicted"] = @(llama->num_tokens_predicted);

src/NativeRNLlama.ts

+17
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ export type NativeContextParams = {
1212
*/
1313
chat_template?: string
1414

15+
reasoning_format?: string
16+
1517
is_model_asset?: boolean
1618
use_progress_callback?: boolean
1719

@@ -236,7 +238,18 @@ export type NativeCompletionResultTimings = {
236238
}
237239

238240
export type NativeCompletionResult = {
241+
/**
242+
* Original text (Ignored reasoning_content / tool_calls)
243+
*/
239244
text: string
245+
246+
/**
247+
* Reasoning content (parsed for reasoning model)
248+
*/
249+
reasoning_content: string
250+
/**
251+
* Tool calls
252+
*/
240253
tool_calls: Array<{
241254
type: 'function'
242255
function: {
@@ -245,6 +258,10 @@ export type NativeCompletionResult = {
245258
}
246259
id?: string
247260
}>
261+
/**
262+
* Content text (Filtered text by reasoning_content / tool_calls)
263+
*/
264+
content: string
248265

249266
tokens_predicted: number
250267
tokens_evaluated: number

0 commit comments

Comments
 (0)