Skip to content

Commit 3d9f529

Browse files
committed
Cleaned the directory structure, removed redundant code
1 parent c552c41 commit 3d9f529

File tree

60 files changed

+2919
-279
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2919
-279
lines changed

.ipynb_checkpoints/Prog5-checkpoint.ipynb

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"def separateByClass(dataset):\n",
4949
" separated = {}\n",
5050
" for i in range(len(dataset)):\n",
51-
" x = dataset[i]\n",
51+
" x = dataset[i] # current row\n",
5252
" if (x[-1] not in separated):\n",
5353
" separated[x[-1]] = []\n",
5454
" separated[x[-1]].append(x)\n",
@@ -57,34 +57,38 @@
5757
"def compute_mean_std(dataset):\n",
5858
" mean_std = [ (st.mean(attribute), st.stdev(attribute))\n",
5959
" for attribute in zip(*dataset)]; #zip(*res) transposes a matrix (2-d array/list)\n",
60-
" del mean_std[-1] # Exclude label\n",
60+
" del mean_std[-1] # Exclude label, i.e., target\n",
6161
" return mean_std\n",
6262
"\n",
63-
"def summarizeByClass(dataset):\n",
63+
"def summarizeByClass(dataset): # summary is the mean and STD of class values\n",
6464
" separated = separateByClass(dataset);\n",
6565
" summary = {} # to store mean and std of +ve and -ve instances\n",
6666
" for classValue, instances in separated.items():\n",
6767
" #summaries is a dictionary of tuples(mean,std) for each class value\n",
6868
" summary[classValue] = compute_mean_std(instances)\n",
6969
" return summary\n",
70-
"#For continuous attributes p is estimated using Gaussion distribution\n",
70+
"\n",
71+
"#For continuous attributes, p is estimated using Gaussian distribution\n",
7172
"def estimateProbability(x, mean, stdev):\n",
7273
" exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))\n",
7374
" return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent\n",
7475
"\n",
75-
"\n",
76+
"# calculate class probabilities of that entire row (testVector)\n",
7677
"def calculateClassProbabilities(summaries, testVector):\n",
7778
" p = {}\n",
7879
" #class and attribute information as mean and sd\n",
7980
" for classValue, classSummaries in summaries.items():\n",
8081
" p[classValue] = 1\n",
8182
" for i in range(len(classSummaries)):\n",
8283
" mean, stdev = classSummaries[i]\n",
83-
" x = testVector[i] #testvector's first attribute\n",
84+
" x = testVector[i] #testvector's i-th attribute\n",
85+
" \n",
8486
" #use normal distribution\n",
85-
" p[classValue] *= estimateProbability(x, mean, stdev);\n",
87+
" p[classValue] *= estimateProbability(x, mean, stdev)\n",
88+
" \n",
8689
" return p\n",
8790
"\n",
91+
"# calculate best out of all class probabilities of that entire row (testVector)\n",
8892
"def predict(summaries, testVector):\n",
8993
" all_p = calculateClassProbabilities(summaries, testVector)\n",
9094
" bestLabel, bestProb = None, -1\n",
@@ -94,6 +98,7 @@
9498
" bestLabel = lbl\n",
9599
" return bestLabel\n",
96100
"\n",
101+
"# find predicted class for each row in testSet\n",
97102
"def perform_classification(summaries, testSet):\n",
98103
" predictions = []\n",
99104
" for i in range(len(testSet)):\n",
@@ -116,14 +121,20 @@
116121
"print(\"First Five instances of dataset:\")\n",
117122
"for i in range(5):\n",
118123
" print(i+1 , ':' , dataset[i])\n",
124+
" \n",
125+
"\n",
119126
"splitRatio = 0.2\n",
120127
"trainingSet, testSet = splitDataset(dataset, splitRatio)\n",
128+
"\n",
121129
"print('\\nDataset is split into training and testing set.')\n",
122-
"print('Training examples = {0} \\nTesting examples = {1}'.format(len(trainingSet),\n",
123-
"len(testSet)))\n",
130+
"print('Training examples = {0} \\nTesting examples = {1}'.format(len(trainingSet), len(testSet)))\n",
131+
"\n",
124132
"summaries = summarizeByClass(trainingSet);\n",
133+
"\n",
125134
"predictions = perform_classification(summaries, testSet)\n",
135+
"\n",
126136
"accuracy = getAccuracy(testSet, predictions)\n",
137+
"\n",
127138
"print('\\nAccuracy of the Naive Baysian Classifier is :', accuracy)"
128139
]
129140
},

0 commit comments

Comments
 (0)