Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit c093e65

Browse files
authoredMar 18, 2024··
Merge branch 'master' into gb_cuda_optimize_neighbor_sampler
2 parents a672a29 + a2c5472 commit c093e65

File tree

8 files changed

+227
-47
lines changed

8 files changed

+227
-47
lines changed
 

‎graphbolt/include/graphbolt/continuous_seed.h

+25
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,31 @@ class continuous_seed {
9494
#endif // __CUDA_ARCH__
9595
};
9696

97+
class single_seed {
98+
uint64_t seed_;
99+
100+
public:
101+
/* implicit */ single_seed(const int64_t seed) : seed_(seed) {} // NOLINT
102+
103+
single_seed(torch::Tensor seed_arr)
104+
: seed_(seed_arr.data_ptr<int64_t>()[0]) {}
105+
106+
#ifdef __CUDACC__
107+
__device__ inline float uniform(const uint64_t id) const {
108+
const uint64_t kCurandSeed = 999961; // Could be any random number.
109+
curandStatePhilox4_32_10_t rng;
110+
curand_init(kCurandSeed, seed_, id, &rng);
111+
return curand_uniform(&rng);
112+
}
113+
#else
114+
inline float uniform(const uint64_t id) const {
115+
pcg32 ng0(seed_, id);
116+
std::uniform_real_distribution<float> uni;
117+
return uni(ng0);
118+
}
119+
#endif // __CUDA_ARCH__
120+
};
121+
97122
} // namespace graphbolt
98123

99124
#endif // GRAPHBOLT_CONTINUOUS_SEED_H_

‎graphbolt/include/graphbolt/fused_csc_sampling_graph.h

+21-10
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
namespace graphbolt {
1818
namespace sampling {
1919

20-
enum SamplerType { NEIGHBOR, LABOR };
20+
enum SamplerType { NEIGHBOR, LABOR, LABOR_DEPENDENT };
21+
22+
constexpr bool is_labor(SamplerType S) {
23+
return S == SamplerType::LABOR || S == SamplerType::LABOR_DEPENDENT;
24+
}
2125

2226
template <SamplerType S>
2327
struct SamplerArgs;
@@ -27,6 +31,13 @@ struct SamplerArgs<SamplerType::NEIGHBOR> {};
2731

2832
template <>
2933
struct SamplerArgs<SamplerType::LABOR> {
34+
const torch::Tensor& indices;
35+
single_seed random_seed;
36+
int64_t num_nodes;
37+
};
38+
39+
template <>
40+
struct SamplerArgs<SamplerType::LABOR_DEPENDENT> {
3041
const torch::Tensor& indices;
3142
continuous_seed random_seed;
3243
int64_t num_nodes;
@@ -555,12 +566,12 @@ int64_t Pick(
555566
const torch::optional<torch::Tensor>& probs_or_mask,
556567
SamplerArgs<SamplerType::NEIGHBOR> args, PickedType* picked_data_ptr);
557568

558-
template <typename PickedType>
559-
int64_t Pick(
569+
template <SamplerType S, typename PickedType>
570+
std::enable_if_t<is_labor(S), int64_t> Pick(
560571
int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
561572
const torch::TensorOptions& options,
562-
const torch::optional<torch::Tensor>& probs_or_mask,
563-
SamplerArgs<SamplerType::LABOR> args, PickedType* picked_data_ptr);
573+
const torch::optional<torch::Tensor>& probs_or_mask, SamplerArgs<S> args,
574+
PickedType* picked_data_ptr);
564575

565576
template <typename PickedType>
566577
int64_t TemporalPick(
@@ -619,13 +630,13 @@ int64_t TemporalPickByEtype(
619630
PickedType* picked_data_ptr);
620631

621632
template <
622-
bool NonUniform, bool Replace, typename ProbsType, typename PickedType,
623-
int StackSize = 1024>
624-
int64_t LaborPick(
633+
bool NonUniform, bool Replace, typename ProbsType, SamplerType S,
634+
typename PickedType, int StackSize = 1024>
635+
std::enable_if_t<is_labor(S), int64_t> LaborPick(
625636
int64_t offset, int64_t num_neighbors, int64_t fanout,
626637
const torch::TensorOptions& options,
627-
const torch::optional<torch::Tensor>& probs_or_mask,
628-
SamplerArgs<SamplerType::LABOR> args, PickedType* picked_data_ptr);
638+
const torch::optional<torch::Tensor>& probs_or_mask, SamplerArgs<S> args,
639+
PickedType* picked_data_ptr);
629640

630641
} // namespace sampling
631642
} // namespace graphbolt

‎graphbolt/src/fused_csc_sampling_graph.cc

+46-34
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <limits>
1616
#include <numeric>
1717
#include <tuple>
18+
#include <type_traits>
1819
#include <vector>
1920

2021
#include "./macro.h"
@@ -660,26 +661,37 @@ c10::intrusive_ptr<FusedSampledSubgraph> FusedCSCSamplingGraph::SampleNeighbors(
660661
}
661662

662663
if (layer) {
663-
SamplerArgs<SamplerType::LABOR> args = [&] {
664-
if (random_seed.has_value()) {
665-
return SamplerArgs<SamplerType::LABOR>{
666-
indices_,
667-
{random_seed.value(), static_cast<float>(seed2_contribution)},
668-
NumNodes()};
669-
} else {
670-
return SamplerArgs<SamplerType::LABOR>{
671-
indices_,
672-
RandomEngine::ThreadLocal()->RandInt(
673-
static_cast<int64_t>(0), std::numeric_limits<int64_t>::max()),
674-
NumNodes()};
675-
}
676-
}();
677-
return SampleNeighborsImpl(
678-
nodes.value(), return_eids,
679-
GetNumPickFn(fanouts, replace, type_per_edge_, probs_or_mask),
680-
GetPickFn(
681-
fanouts, replace, indptr_.options(), type_per_edge_, probs_or_mask,
682-
args));
664+
if (random_seed.has_value() && random_seed->numel() >= 2) {
665+
SamplerArgs<SamplerType::LABOR_DEPENDENT> args{
666+
indices_,
667+
{random_seed.value(), static_cast<float>(seed2_contribution)},
668+
NumNodes()};
669+
return SampleNeighborsImpl(
670+
nodes.value(), return_eids,
671+
GetNumPickFn(fanouts, replace, type_per_edge_, probs_or_mask),
672+
GetPickFn(
673+
fanouts, replace, indptr_.options(), type_per_edge_,
674+
probs_or_mask, args));
675+
} else {
676+
auto args = [&] {
677+
if (random_seed.has_value() && random_seed->numel() == 1) {
678+
return SamplerArgs<SamplerType::LABOR>{
679+
indices_, random_seed.value(), NumNodes()};
680+
} else {
681+
return SamplerArgs<SamplerType::LABOR>{
682+
indices_,
683+
RandomEngine::ThreadLocal()->RandInt(
684+
static_cast<int64_t>(0), std::numeric_limits<int64_t>::max()),
685+
NumNodes()};
686+
}
687+
}();
688+
return SampleNeighborsImpl(
689+
nodes.value(), return_eids,
690+
GetNumPickFn(fanouts, replace, type_per_edge_, probs_or_mask),
691+
GetPickFn(
692+
fanouts, replace, indptr_.options(), type_per_edge_,
693+
probs_or_mask, args));
694+
}
683695
} else {
684696
SamplerArgs<SamplerType::NEIGHBOR> args;
685697
return SampleNeighborsImpl(
@@ -1297,7 +1309,7 @@ int64_t TemporalPick(
12971309
}
12981310
return picked_indices.numel();
12991311
}
1300-
if constexpr (S == SamplerType::LABOR) {
1312+
if constexpr (is_labor(S)) {
13011313
return Pick(
13021314
offset, num_neighbors, fanout, replace, options, masked_prob, args,
13031315
picked_data_ptr);
@@ -1383,12 +1395,12 @@ int64_t TemporalPickByEtype(
13831395
return pick_offset;
13841396
}
13851397

1386-
template <typename PickedType>
1387-
int64_t Pick(
1398+
template <SamplerType S, typename PickedType>
1399+
std::enable_if_t<is_labor(S), int64_t> Pick(
13881400
int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
13891401
const torch::TensorOptions& options,
1390-
const torch::optional<torch::Tensor>& probs_or_mask,
1391-
SamplerArgs<SamplerType::LABOR> args, PickedType* picked_data_ptr) {
1402+
const torch::optional<torch::Tensor>& probs_or_mask, SamplerArgs<S> args,
1403+
PickedType* picked_data_ptr) {
13921404
if (fanout == 0) return 0;
13931405
if (probs_or_mask.has_value()) {
13941406
if (fanout < 0) {
@@ -1438,9 +1450,9 @@ inline T invcdf(T u, int64_t n, T rem) {
14381450
return rem * (one - std::pow(one - u, one / n));
14391451
}
14401452

1441-
template <typename T>
1453+
template <typename T, typename seed_t>
14421454
inline T jth_sorted_uniform_random(
1443-
continuous_seed seed, int64_t t, int64_t c, int64_t j, T& rem, int64_t n) {
1455+
seed_t seed, int64_t t, int64_t c, int64_t j, T& rem, int64_t n) {
14441456
const T u = seed.uniform(t + j * c);
14451457
// https://mathematica.stackexchange.com/a/256707
14461458
rem -= invcdf(u, n, rem);
@@ -1474,13 +1486,13 @@ inline T jth_sorted_uniform_random(
14741486
* should be put. Enough memory space should be allocated in advance.
14751487
*/
14761488
template <
1477-
bool NonUniform, bool Replace, typename ProbsType, typename PickedType,
1478-
int StackSize>
1479-
inline int64_t LaborPick(
1489+
bool NonUniform, bool Replace, typename ProbsType, SamplerType S,
1490+
typename PickedType, int StackSize>
1491+
inline std::enable_if_t<is_labor(S), int64_t> LaborPick(
14801492
int64_t offset, int64_t num_neighbors, int64_t fanout,
14811493
const torch::TensorOptions& options,
1482-
const torch::optional<torch::Tensor>& probs_or_mask,
1483-
SamplerArgs<SamplerType::LABOR> args, PickedType* picked_data_ptr) {
1494+
const torch::optional<torch::Tensor>& probs_or_mask, SamplerArgs<S> args,
1495+
PickedType* picked_data_ptr) {
14841496
fanout = Replace ? fanout : std::min(fanout, num_neighbors);
14851497
if (!NonUniform && !Replace && fanout >= num_neighbors) {
14861498
std::iota(picked_data_ptr, picked_data_ptr + num_neighbors, offset);
@@ -1504,8 +1516,8 @@ inline int64_t LaborPick(
15041516
}
15051517
AT_DISPATCH_INDEX_TYPES(
15061518
args.indices.scalar_type(), "LaborPickMain", ([&] {
1507-
const index_t* local_indices_data =
1508-
args.indices.data_ptr<index_t>() + offset;
1519+
const auto local_indices_data =
1520+
reinterpret_cast<index_t*>(args.indices.data_ptr()) + offset;
15091521
if constexpr (Replace) {
15101522
// [Algorithm] @mfbalin
15111523
// Use a max-heap to get rid of the big random numbers and filter the

‎python/dgl/graphbolt/impl/fused_csc_sampling_graph.py

+34
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,32 @@ def sample_layer_neighbors(
791791
corresponding to each neighboring edge of a node. It must be a 1D
792792
floating-point or boolean tensor, with the number of elements
793793
equalling the total number of edges.
794+
random_seed: torch.Tensor, optional
795+
An int64 tensor with one or two elements.
796+
797+
The passed random_seed makes it so that for any seed node ``s`` and
798+
its neighbor ``t``, the rolled random variate ``r_t`` is the same
799+
for any call to this function with the same random seed. When
800+
sampling as part of the same batch, one would want identical seeds
801+
so that LABOR can globally sample. One example is that for
802+
heterogenous graphs, there is a single random seed passed for each
803+
edge type. This will sample much fewer nodes compared to having
804+
unique random seeds for each edge type. If one called this function
805+
individually for each edge type for a heterogenous graph with
806+
different random seeds, then it would run LABOR locally for each
807+
edge type, resulting into a larger number of nodes being sampled.
808+
809+
If this function is called without a ``random_seed``, we get the
810+
random seed by getting a random number from GraphBolt. Use this
811+
argument with identical random_seed if multiple calls to this
812+
function are used to sample as part of a single batch.
813+
814+
If given two numbers, then the ``seed2_contribution`` argument
815+
determines the interpolation between the two random seeds.
816+
seed2_contribution: float, optional
817+
A float value between [0, 1) that determines the contribution of the
818+
second random seed, ``random_seed[-1]``, to generate the random
819+
variates.
794820
795821
Returns
796822
-------
@@ -826,6 +852,14 @@ def sample_layer_neighbors(
826852
nodes = self._convert_to_homogeneous_nodes(nodes)
827853

828854
self._check_sampler_arguments(nodes, fanouts, probs_name)
855+
if random_seed is not None:
856+
assert (
857+
1 <= len(random_seed) <= 2
858+
), "There should be a 1 or 2 random seeds."
859+
if len(random_seed) == 2:
860+
assert (
861+
0 <= seed2_contribution <= 1
862+
), "seed2_contribution should be in [0, 1]."
829863
has_original_eids = (
830864
self.edge_attributes is not None
831865
and ORIGINAL_EDGE_ID in self.edge_attributes

‎python/dgl/graphbolt/impl/neighbor_sampler.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ class LayerNeighborSampler(NeighborSamplerImpl):
483483
Sampler that builds computational dependency of node representations via
484484
labor sampling for multilayer GNN from the NeurIPS 2023 paper
485485
`Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs
486-
<https://arxiv.org/abs/2210.13339>`__
486+
<https://proceedings.neurips.cc/paper_files/paper/2023/file/51f9036d5e7ae822da8f6d4adda1fb39-Paper-Conference.pdf>`__
487487
488488
Layer-Neighbor sampler is responsible for sampling a subgraph from given
489489
data. It returns an induced subgraph along with compacted information. In
@@ -526,6 +526,19 @@ class LayerNeighborSampler(NeighborSamplerImpl):
526526
Boolean indicating whether seeds between hops will be deduplicated.
527527
If True, the same elements in seeds will be deleted to only one.
528528
Otherwise, the same elements will be remained.
529+
layer_dependency: bool
530+
Boolean indicating whether different layers should use the same random
531+
variates. Results in a reduction in the number of nodes sampled and
532+
turns LayerNeighborSampler into a subgraph sampling method. Later layers
533+
will be guaranteed to sample overlapping neighbors as the previous
534+
layers.
535+
batch_dependency: int
536+
Specifies whether consecutive minibatches should use similar random
537+
variates. Results in a higher temporal access locality of sampled
538+
nodes and edges. Setting it to :math:`\\kappa` slows down the change in
539+
the random variates proportional to :math:`\frac{1}{\\kappa}`. Implements
540+
the dependent minibatching approach in `arXiv:2310.12403
541+
<https://arxiv.org/abs/2310.12403>__.
529542
530543
Examples
531544
-------

‎python/dgl/graphbolt/minibatch.py

+5
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,11 @@ def to_pyg_data(self):
536536
batch_size = len(next(iter(self.node_pairs.values()))[0])
537537
else:
538538
batch_size = len(self.node_pairs[0])
539+
elif self.seeds is not None:
540+
if isinstance(self.seeds, Dict):
541+
batch_size = len(next(iter(self.seeds.values())))
542+
else:
543+
batch_size = len(self.seeds)
539544
else:
540545
batch_size = None
541546
pyg_data = Data(

‎python/setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ def get_lib_file_path(lib_name, backend=""):
227227
"tqdm",
228228
"psutil>=5.8.0",
229229
"torchdata>=0.5.0",
230+
"pandas",
230231
]
231232
if "DGLBACKEND" in os.environ and os.environ["DGLBACKEND"] != "pytorch":
232233
install_requires.pop(install_requires.index("torchdata>=0.5.0"))

‎tests/python/pytorch/graphbolt/test_minibatch.py

+81-2
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ def test_dgl_link_predication_hetero(mode):
868868
)
869869

870870

871-
def test_to_pyg_data():
871+
def test_to_pyg_data_original():
872872
test_minibatch = create_homo_minibatch()
873873
test_minibatch.seed_nodes = torch.tensor([0, 1])
874874
test_minibatch.labels = torch.tensor([7, 8])
@@ -929,7 +929,86 @@ def test_to_pyg_data():
929929
try:
930930
pyg_data = test_minibatch.to_pyg_data()
931931
assert (
932-
pyg_data.x is None,
932+
pyg_data.x is None
933+
), "Multiple features case should raise an error."
934+
except AssertionError as e:
935+
assert (
936+
str(e)
937+
== "`to_pyg_data` only supports single feature homogeneous graph."
938+
)
939+
940+
941+
def test_to_pyg_data():
942+
test_minibatch = create_homo_minibatch()
943+
test_minibatch.seeds = torch.tensor([0, 1])
944+
test_minibatch.labels = torch.tensor([7, 8])
945+
946+
expected_edge_index = torch.tensor(
947+
[[0, 0, 1, 1, 1, 2, 2, 2, 2], [0, 1, 0, 1, 2, 0, 1, 2, 3]]
948+
)
949+
expected_node_features = next(iter(test_minibatch.node_features.values()))
950+
expected_labels = torch.tensor([7, 8])
951+
expected_batch_size = 2
952+
expected_n_id = torch.tensor([10, 11, 12, 13])
953+
954+
pyg_data = test_minibatch.to_pyg_data()
955+
pyg_data.validate()
956+
assert torch.equal(pyg_data.edge_index, expected_edge_index)
957+
assert torch.equal(pyg_data.x, expected_node_features)
958+
assert torch.equal(pyg_data.y, expected_labels)
959+
assert pyg_data.batch_size == expected_batch_size
960+
assert torch.equal(pyg_data.n_id, expected_n_id)
961+
962+
test_minibatch.seeds = torch.tensor([[0, 1], [2, 3]])
963+
assert pyg_data.batch_size == expected_batch_size
964+
965+
test_minibatch.seeds = {"A": torch.tensor([0, 1])}
966+
assert pyg_data.batch_size == expected_batch_size
967+
968+
test_minibatch.seeds = {"A": torch.tensor([[0, 1], [2, 3]])}
969+
assert pyg_data.batch_size == expected_batch_size
970+
971+
subgraph = test_minibatch.sampled_subgraphs[0]
972+
# Test with sampled_csc as None.
973+
test_minibatch = gb.MiniBatch(
974+
sampled_subgraphs=None,
975+
node_features={"feat": expected_node_features},
976+
labels=expected_labels,
977+
)
978+
pyg_data = test_minibatch.to_pyg_data()
979+
assert pyg_data.edge_index is None, "Edge index should be none."
980+
981+
# Test with node_features as None.
982+
test_minibatch = gb.MiniBatch(
983+
sampled_subgraphs=[subgraph],
984+
node_features=None,
985+
labels=expected_labels,
986+
)
987+
pyg_data = test_minibatch.to_pyg_data()
988+
assert pyg_data.x is None, "Node features should be None."
989+
990+
# Test with labels as None.
991+
test_minibatch = gb.MiniBatch(
992+
sampled_subgraphs=[subgraph],
993+
node_features={"feat": expected_node_features},
994+
labels=None,
995+
)
996+
pyg_data = test_minibatch.to_pyg_data()
997+
assert pyg_data.y is None, "Labels should be None."
998+
999+
# Test with multiple features.
1000+
test_minibatch = gb.MiniBatch(
1001+
sampled_subgraphs=[subgraph],
1002+
node_features={
1003+
"feat": expected_node_features,
1004+
"extra_feat": torch.tensor([[3], [4]]),
1005+
},
1006+
labels=expected_labels,
1007+
)
1008+
try:
1009+
pyg_data = test_minibatch.to_pyg_data()
1010+
assert (
1011+
pyg_data.x is None
9331012
), "Multiple features case should raise an error."
9341013
except AssertionError as e:
9351014
assert (

0 commit comments

Comments
 (0)
Please sign in to comment.