-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
SentimentAnalysis.py
72 lines (65 loc) · 2.21 KB
/
SentimentAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from profanity_filter import ProfanityFilter
from profanity_check import predict, predict_prob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize.treebank import TreebankWordDetokenizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import nltk
import twitter
import pandas as pd
import os
def preprocess_texts(texts):
preprocessed_text = []
stop_words = stopwords.words('english')
for text in texts:
punc = text.translate(str.maketrans('','', string.punctuation))
tokens = word_tokenize(punc)
words = [w for w in tokens if not w in stop_words]
untokenized_text = TreebankWordDetokenizer().detokenize(words)
preprocessed_text.append(untokenized_text)
return preprocessed_text
def Predict(texts):
pf = ProfanityFilter()
sid = SentimentIntensityAnalyzer()
labels = []
for text in texts:
if(pf.is_profane(text)):
labels.append(0)
else:
ss = sid.polarity_scores(text)
if(ss['compound'] <= -0.05):
labels.append(0)
else:
labels.append(1)
return labels
def uploaded_file(path):
if str(path)[-3::] != "csv":
print(path[-3::])
print("Incorrect file")
input()
df = pd.read_csv(path)
os.system('rm temp.csv')
df.to_csv('temp.csv',index=False)
df_text = df['text']
labels = Predict(df_text)
for i,j in zip(df_text,labels):
print(i)
print("\n\n")
print(j)
def fetch_tweets(keyword,num_of_tweets):
time_stamp,location_list,twitter_user,subjectivity,polarity,tweet_list = twitter.get_tweets(keyword,num_of_tweets)
print("[INFO] successfuly obtained tweets")
prep_text = preprocess_texts(tweet_list)
labels = Predict(prep_text)
df = pd.DataFrame(list(zip(time_stamp,location_list,twitter_user,prep_text,subjectivity,polarity, labels)), columns =['time_stamp','location','user name','text','Polarity','Subjectivity','Sentiments'])
os.system('rm file.csv')
os.system('rm temp.csv')
df.to_csv('temp.csv',index=False)
df.to_csv('file.csv',index=False)
print("file is written")
for i,j in zip(tweet_list,labels):
print(i)
print("\n\n")
print(j)
#fetch_tweets('modi',10)