File tree 2 files changed +4
-2
lines changed 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -241,10 +241,11 @@ def __init__(
241
241
device = self .device )
242
242
243
243
# OPTIMIZATION: Cache the tensors rather than creating them every step.
244
+ # Keep in int64 to avoid overflow with long context
244
245
self .arange_np = np .arange (max (self .max_num_reqs + 1 ,
245
246
self .max_model_len ,
246
247
self .max_num_tokens ),
247
- dtype = np .int32 )
248
+ dtype = np .int64 )
248
249
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
249
250
# a faster version of creating a new tensor every time. Thus, we should
250
251
# not make any assumptions about the values in these tensors.
Original file line number Diff line number Diff line change @@ -219,7 +219,8 @@ def __init__(
219
219
220
220
# Range tensor with values [0 .. self.max_num_tokens - 1].
221
221
# Used to initialize positions / context_lens / seq_lens
222
- self .arange_np = np .arange (self .max_num_tokens , dtype = np .int32 )
222
+ # Keep in int64 to avoid overflow with long context
223
+ self .arange_np = np .arange (self .max_num_tokens , dtype = np .int64 )
223
224
self .num_reqs_paddings = _get_req_paddings (
224
225
min_req_size = MIN_NUM_SEQS , max_req_size = self .max_num_reqs )
225
226
You can’t perform that action at this time.
0 commit comments