Revert "Revert "pass _in_feats.device() to stream capture""

This reverts commit 81b2c0f.
vllm-project · AlpinDale · Sep 23, 2024 · Sep 23, 2024 · Sep 24, 2024 · Sep 24, 2024
commit 0bdfb5db554accc94f4bb790905d3354242c476e
diff --git a/csrc/quantization/fp_eXmY/fp_eXmY_linear.cu b/csrc/quantization/fp_eXmY/fp_eXmY_linear.cu
@@ -230,7 +230,8 @@ torch::Tensor fp_eXmY_linear_forward_cuda(int64_t EXPONENT, int64_t MANTISSA,
   // NOTE(alpin): use at::cuda::getCurrentCUDAStream() instead of default
   // stream (0) this fixes problem with CUDA graphs when used with
   // torch.compile()
-  auto stream = at::cuda::getCurrentCUDAStream();
+  auto dev = _in_feats.device().index();
+  auto stream = at::cuda::getCurrentCUDAStream(dev);
 
   /*
    The heuristic is weight_bit - exponent_bit - 1 = mantissa_bit