forked from jaak-s/macau
-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathdata_simulation.py
60 lines (43 loc) · 1.79 KB
/
data_simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Synthetic data generation for evaluating MAUCAU algorithm
## Basic ideas:
## 1. implant a number of biclusters in a matrix. Each row of a bicluser is sampled from the same distribution
## 2. geneate different set of features for each bicluster.
import numpy as np
def gen_bicluster(nrows, ncols, mu, sigma = 1.0):
bicluster = sigma * np.random.randn(nrows, ncols) + mu
return bicluster
def gen_side_feas(nrows, ncols, p):
fea = np.random.binomial(1, p, size = (nrows * ncols)).reshape(nrows, ncols)
return (fea)
def gen_matrix(nrows, ncols, nfeatures, nbiclusters, fea_prob = 0.1):
bic_nrows = round(nrows/nbiclusters)
bic_ncols = round(ncols/nbiclusters)
bic_nfeas = round(nfeatures/nbiclusters)
np.random.seed(1234)
bic_mus = 5*np.random.randn(nbiclusters)
bics = [gen_bicluster(bic_nrows, bic_ncols, mu) for mu in bic_mus]
feas = [gen_side_feas(bic_nrows, bic_nfeas, fea_prob) for mu in bic_mus]
matrix = np.random.randn(nrows, ncols)
sinfo = np.random.binomial(1, 0.01, size = (nrows * nfeatures)).reshape(nrows, nfeatures)
c = 0
r = 0
s = 0
for i in range(nbiclusters):
matrix[r : (r + bic_nrows), c : (c + bic_ncols)] = bics[i]
sinfo[r: (r + bic_nrows), s : (s + bic_nfeas)] = feas[i]
r = r + bic_nrows
c = c + bic_ncols
s = s + bic_nfeas
return {'matrix':matrix, 'sinfo':sinfo}
def sparsify(matrix, sparsity = 0.2):
nrows = matrix.shape[0]
ncols = matrix.shape[1]
n = nrows * ncols
m = round((1-sparsity)*n)
pos = np.random.choice(n, m, replace = False)
smatrix = matrix.copy()
for p in pos:
smatrix[(p // ncols), p - (p // ncols)*ncols] = 0
return (smatrix)
# plt.imshow(m1, cmap='jet')
# plt.colorbar()