Skip to content

Commit 6bcce93

Browse files
committedApr 18, 2020
retrying cohort size variation
1 parent d899e53 commit 6bcce93

6 files changed

+131
-9
lines changed
 

‎1_each_client_partially_iid.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ def go(self, num, batch):
7878
multi_labels = np.asarray(multi_labels)
7979
num_data_per_client = []
8080

81-
# print(multi_iid.shape)
82-
# print(multi_labels.shape)
83-
8481
# assign each client non-IID and IID data
8582
for client_num in range(self.CLIENTS):
8683
# number of data points for this client
@@ -118,14 +115,19 @@ def go(self, num, batch):
118115
indices_slice = indices[:data_per_label]
119116
client_sample_x = np.append(client_sample_x, label_data_x[indices_slice], axis=0)
120117
client_sample_y = np.append(client_sample_y, label_data_y[indices_slice], axis=0)
118+
121119
# count multiplicities
122120
for i in range(len(indices_slice)):
123121
multi_labels[label][int(indices_slice[i])] = multi_labels[label][int(indices_slice[i])] + 1
122+
124123
# check data
125124
if np.average(client_sample_y) > 9 or np.average(client_sample_y) < 0:
125+
print("Error: At least one label out of range")
126126
print(np.average(client_sample_y), label)
127127
print()
128128

129+
# TODO: print data multiplicities
130+
129131
# track number of data points per client
130132
num_data_per_client.append(len(client_sample_x))
131133

‎3_shard.py

+2
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ def go(self, num, batch):
102102

103103
# check data
104104
if np.average(client_sample_y) > 9 or np.average(client_sample_y) < 0:
105+
print("Error: At least one label out of range")
105106
print(np.average(client_sample_y))
107+
print()
106108

107109
# assign slices to single client
108110
dataset = tf.data.Dataset.from_tensor_slices((client_sample_x, client_sample_y))

‎graphing.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import numpy as np
2+
import math
3+
import csv
4+
import matplotlib.pyplot as plt
5+
6+
batch = 5355833
7+
test = [1, 2, 3, 4]
8+
9+
# for full batch:
10+
# number of rounds vs cohort size for all three schemas
11+
cohort_size = []
12+
rounds_s1 = []
13+
rounds_s3 = []
14+
rounds_s4 = []
15+
16+
for i in range(len(test)):
17+
# for each test:
18+
# accuracy vs round number for all schemas
19+
round_num = []
20+
accuracy_s1 = []
21+
accuracy_s3 = []
22+
accuracy_s4 = []
23+
24+
filename = 'results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.s1out.csv'
25+
with open(filename,'r') as csvfile:
26+
data = csv.reader(csvfile, delimiter=',')
27+
header = next(data)
28+
for row in data:
29+
round_num.append(int(row[0]))
30+
accuracy_s1.append(int(row[4]) * 100)
31+
32+
# append last line to full batch data
33+
rounds_s1.append(round_num[-1])
34+
35+
# filename = 'results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.s3out.csv'
36+
# with open(filename,'r') as csvfile:
37+
# data = csv.reader(csvfile, delimiter=',')
38+
# header = next(data)
39+
# for row in data:
40+
# round_num.append(int(row[0]))
41+
# accuracy_s3.append(int(row[4]) * 100)
42+
43+
# # append last line to full batch data
44+
# rounds_s3.append(round_num[-1])
45+
46+
# filename = 'results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.s4out.csv'
47+
# with open(filename,'r') as csvfile:
48+
# data = csv.reader(csvfile, delimiter=',')
49+
# header = next(data)
50+
# for row in data:
51+
# round_num.append(int(row[0]))
52+
# accuracy_s4.append(int(row[4]) * 100)
53+
54+
# # append last line to full batch data
55+
# rounds_s4.append(round_num[-1])
56+
57+
# get cohort size
58+
filename = 'results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.config.csv'
59+
with open(filename,'r') as csvfile:
60+
data = csv.reader(csvfile, delimiter=',')
61+
header = next(data)
62+
cohort_size.append(next(data)[0])
63+
64+
# # data for cohort size specific plot
65+
# max_round = max(round_num) + 1
66+
# round_num = range(1, max_round)
67+
# plt.clf()
68+
# plt.plot(round_num, accuracy_s1, label='Schema 1: Clients partially IID')
69+
# plt.plot(round_num, accuracy_s3, label='Schema 3: Sharding')
70+
# plt.plot(round_num, accuracy_s4, label='Schema 4: IID')
71+
# plt.xlabel('Round Number')
72+
# plt.ylabel('SCA Accuracy')
73+
# plt.title('Round Accuracy for Cohort Size ' + str(cohort_size[-1]))
74+
# plt.legend()
75+
# plt.show()
76+
# plt.savefig('results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.accuracy_vs_round.png')
77+
78+
79+
# get 2CPU data
80+
81+
batch = 5355833
82+
test = [1, 2, 3, 4]
83+
84+
rounds_s1_2 = []
85+
86+
for i in range(len(test)):
87+
# for each test:
88+
# accuracy vs round number for all schemas
89+
round_num = []
90+
accuracy_s1_2 = []
91+
92+
filename = 'results/' + str(batch) + '/' + str(batch) + '.' + str(test[i]) + '.s1out.csv'
93+
with open(filename,'r') as csvfile:
94+
data = csv.reader(csvfile, delimiter=',')
95+
header = next(data)
96+
for row in data:
97+
round_num.append(int(row[0]))
98+
accuracy_s1_2.append(int(row[4]) * 100)
99+
100+
# append last line to full batch data
101+
rounds_s1_2.append(round_num[-1])
102+
103+
104+
105+
# data for efficiency by cohort size plot
106+
plt.clf()
107+
plt.plot(cohort_size, rounds_s1, label='1 CPU')
108+
# plt.plot(cohort_size, rounds_s3, label='Schema 3: Sharding')
109+
# plt.plot(cohort_size, rounds_s4, label='Schema 4: IID')
110+
plt.plot(cohort_size, rounds_s1_2, label='2 CPU')
111+
plt.xlabel('Cohort Size')
112+
plt.ylabel('Number of Rounds to Reach 99% Accuracy')
113+
plt.title('Model Efficiency for Varying Cohort Size, Clients with Partially IID Data')
114+
plt.legend()
115+
plt.show()
116+
plt.savefig('results/' + str(batch) + '/' + str(batch) + '.rounds_vs_cohortsize.png')

‎info/batches.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ batch # code description & schema #
99
5321083 error invalid label values (only s1) s134, some tests finished
1010
5324828 debug invalid label debugging with output
1111
5352912 debug invalid label debugging with output, solved!
12-
varying cohort size, 1 CPU per
13-
varying cohort size, 2 CPU per
12+
5355833 timed out varying cohort size, 1 CPU per, s134
13+
14+
varying cohort size, 2 CPU per, s134

‎test_tff.sh

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ module load cuda/10.1.105
1919
module list
2020

2121
mkdir results/${SLURM_ARRAY_JOB_ID}
22+
mkdir results/${SLURM_ARRAY_JOB_ID}/log
2223

2324
python tff_main.py $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_JOB_ID
2425

‎tff_main.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737
# # p2.go(test, batch)
3838
# # print(datetime.now())
3939

40-
p3.go(test, batch)
41-
print(datetime.now())
40+
# p3.go(test, batch)
41+
# print(datetime.now())
4242

43-
p4.go(test, batch)
44-
print(datetime.now())
43+
# p4.go(test, batch)
44+
# print(datetime.now())

0 commit comments

Comments
 (0)