This repository was archived by the owner on Jan 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathNBC.py
71 lines (56 loc) · 2.44 KB
/
NBC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file.
Compute the accuracy of the classifier, considering few test data sets. """
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd
msg = pd.read_csv('NBC.csv', names=['message', 'label'])
print("Total Instances of Dataset: ", msg.shape[0])
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
X = msg.message
y = msg.labelnum
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
count_v = CountVectorizer()
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_dm = count_v.transform(Xtest)
df = pd.DataFrame(Xtrain_dm.toarray(), columns=count_v.get_feature_names())
print(df[0:5])
clf = MultinomialNB()
clf.fit(Xtrain_dm, ytrain)
pred = clf.predict(Xtest_dm)
for doc, p in zip(Xtrain, pred):
p = 'pos' if p == 1 else 'neg'
print("%s -> %s" % (doc, p))
print('Accuracy Metrics: \n')
print('Accuracy: ', accuracy_score(ytest, pred))
print('Recall: ', recall_score(ytest, pred))
print('Precision: ', precision_score(ytest, pred))
print('Confusion Matrix: \n', confusion_matrix(ytest, pred))
########################################################################################################################
# OUTPUT:
# Ignore single quotes at beginning and end
########################################################################################################################
'''
Total Instances of Dataset: 18
about am amazing an awesome beers ... today very view went what work
0 0 0 0 1 1 0 ... 0 0 1 0 1 0
1 0 0 0 0 0 0 ... 0 0 0 0 0 0
2 0 0 0 0 0 0 ... 0 0 0 0 0 0
3 0 0 0 0 0 0 ... 1 0 0 1 0 0
4 0 0 0 1 1 0 ... 0 0 0 0 0 0
[5 rows x 39 columns]
What an awesome view -> pos
I love to dance -> neg
He is my sworn enemy -> pos
I went to my enemy's house today -> pos
This is an awesome place -> neg
Accuracy Metrics:
Accuracy: 0.6
Recall: 1.0
Precision: 0.3333333333333333
Confusion Matrix:
[[2 2]
[0 1]]
Process finished with exit code 0
'''