-
Notifications
You must be signed in to change notification settings - Fork 15
/
cli_bot.py
144 lines (118 loc) · 4.2 KB
/
cli_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import random
import sys
import time
import logging
import pickle
import nltk
from nltk.stem.porter import PorterStemmer
from scipy.sparse import csr_matrix, hstack
from langdetect import detect
import praw
import classifier
from termcolor import colored
stemmer = PorterStemmer()
class color:
PURPLE = '\033[95m'
CYAN = '\033[96m'
DARKCYAN = '\033[36m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
def stem_tokens(tokens, stemmer):
"""stemmer helper"""
stemmed = []
for item in tokens:
stemmed.append(stemmer.stem(item))
return stemmed
def tokenize(text):
"""returns stemmed tokens"""
tokens = nltk.word_tokenize(text)
stems = stem_tokens(tokens, stemmer)
return stems
def create_features(text):
"""returns csr_matrix of all features for given text"""
text = classifier.remove_url(text)
text = classifier.remove_stopwords(text)
sentiment_arr = csr_matrix(classifier.find_sentiment([text]))
tfid_pkl = open('tfid.pkl', 'rb')
tfidf = pickle.load(tfid_pkl)
tfs_arr = tfidf.transform([text])
pos_pkl = open('pos.pkl', 'rb')
vec = pickle.load(pos_pkl)
pos_arr = vec.transform(classifier.get_pos_features([text]))
topic_pkl = open('topic.pkl', 'rb')
lda = pickle.load(topic_pkl)
topic_arr = lda.transform(tfs_arr)
features = hstack([sentiment_arr, tfs_arr, pos_arr, topic_arr])
tfid_pkl.close()
pos_pkl.close()
topic_pkl.close()
return features
lg_pkl = open('logistic_regression.pkl', 'rb')
logistic_model = pickle.load(lg_pkl)
svm_pkl = open('svm.pkl', 'rb')
svm_model = pickle.load(svm_pkl)
linear_svm_pkl = open('linear_svm.pkl', 'rb')
linear_svm_model = pickle.load(linear_svm_pkl)
rf_pkl = open('rf.pkl', 'rb')
rf_model = pickle.load(rf_pkl)
def predictor(text):
"""returns boolean decision based on text is sarcasm or not"""
features = create_features(text)
out = []
out.append(logistic_model.predict(features)[0])
out.append(svm_model.predict(features)[0])
out.append(linear_svm_model.predict(features)[0])
out.append(rf_model.predict(features)[0])
count = 0
for i in range(4):
if out[i] == 'sarc':
count += 1
if count > 2:
return True
return False
replies = ["PMSL", "ROFLMAO", "ROFLCOPTER", "LULZ", "BWAHAHA", "LOL", "LMAO", "ROFL", "OMG", ]
reddit = praw.Reddit('bot1', user_agent='pyMubu.v0.1 (by /u/mubumbz)')
subreddit = reddit.subreddit('india')
comments = subreddit.stream.comments()
stores_exception = None
count = 0
logging.basicConfig(filename='comments.log', level=logging.INFO,
format='%(asctime)s %(message)s')
for comment in comments:
try:
text = comment.body
if not (classifier.is_too_short(text, 10)) and detect(text) == 'en':
if predictor(text):
print(colored("Comment => ", 'magenta')+ colored(text, 'cyan'))
wish = input("Do " + color.BOLD + "you" + color.END + " wish to respond? [y/n] ")
if wish.lower().startswith('y'):
message = input("Enter the reply: ")
print(colored("Replying with: ", "yellow") + colored(message, "green"))
comment.reply(message)
count += 1
info = text + '\n' + message + '\n' + '---------'
logging.info(info)
time.sleep(120)
else:
wish2 = input("Would you like the " + color.BOLD + "bot" + color.END + " to respond? [y/n] ")
if wish2.lower().startswith('y'):
message = random.choice(replies)
print(colored("Replying with: ", "yellow") + colored(message, "green"))
comment.reply(message)
count += 1
info = text + '\n' + message + '\n' + '---------'
logging.info(info)
time.sleep(120)
print()
except KeyboardInterrupt:
print("\nTotal Replies: ", count)
sys.exit()
lg_pkl.close()
svm_pkl.close()
linear_svm_pkl.close()
rf_pkl.close()