-
Notifications
You must be signed in to change notification settings - Fork 0
/
readFile.py
115 lines (85 loc) · 4.17 KB
/
readFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import re
import mysql.connector
from datetime import datetime
config = {
'host': 'localhost',
'user': 'root',
'password': '12345',
'database': 'app',
'raise_on_warnings': True,
}
def makedatetime(timestamp):
date = datetime.fromtimestamp( int(timestamp))
return date.strftime('%Y-%m-%d %H:%M:%S')
def searchRegex(expression, chunk):
ex = re.compile(expression)
result = ex.search(chunk)[1]
return result
def insert_uniqueid_element(connection,cursor,query,values):
cursor.execute(query, values)
connection.commit()
def add_uniqueid_element(connection, addquery, searchquery, searchvalues, addvalues = None):
new = 0
if addvalues == None:
addvalues = searchvalues
searchcursor = connection.cursor(buffered=True)
searchcursor.execute(searchquery, searchvalues)
if searchcursor.rowcount ==0:
new = 1
insert_uniqueid_element(connection, connection.cursor(buffered=True), addquery, addvalues)
connection.commit()
searchcursor.execute(searchquery, searchvalues)
return searchcursor.fetchone()[0], new
def process_helpfulness(expression):
n1, n2 = expression.strip().split("/")
if n2 == '0' or n1 == '0':
return 0
return round(int(n1)/int(n2), 2)
with open('movies.txt','r', encoding="Latin-1") as f:
dic = {}
i = 0
chunk = ""
keys = ["productId","userId","profileName","helpfulness",
"score","time","summary","text"]
regexCodes = [r'product\/productId: (.{10})',r'review\/userId: (.{14})',r'review\/profileName: (.+) review\/h',r'review\/helpfulness: (.+) review\/sc',r'review\/score: (.+) review\/ti',r'review\/time: (.+) review\/su',r'review\/summary: (.+) review\/t',r'review\/text: (.+)']
current_product = ""
last_product = ""
new_entries = [0,0,0]
cnx = mysql.connector.connect(**config, auth_plugin = 'mysql_native_password')
for line in iter(f.readline,""):
if i < 8:
chunk += line.strip() + " "
i += 1
elif i == 8:
values = []
for j in range(len(keys)):
values.append(searchRegex(regexCodes[j], chunk))
check_product = ("SELECT productcol FROM products WHERE productId = %s")
check_user = ("SELECT userscol FROM users WHERE usersId = %s")
check_text = ("SELECT textid FROM texts WHERE summary = %s AND text = %s")
check_review = ("SELECT reviewId FROM reviews WHERE productcol = %s AND "
"usercol = %s AND timestamp = %s")
add_user = ("INSERT INTO users (usersId, profileName) VALUES (%s, %s)")
add_product = ("INSERT INTO products (productId) VALUES (%s)")
add_text = ("INSERT INTO texts (summary, text) VALUES (%s,%s)")
add_review = ("INSERT INTO reviews (productcol,usercol,helpfulness,score,timestamp, textId) VALUES (%s,%s,%s,%s,%s,%s)")
usercol,newuser = add_uniqueid_element(cnx,add_user,check_user,(values[1],),(values[1], values[2],))
productcol, newproduct = add_uniqueid_element(cnx,add_product,check_product,(values[0],))
textid, newtext = add_uniqueid_element(cnx,add_text,check_text,(values[6],values[7],))
helpfulness = process_helpfulness(values[3])
reviewid, newreview = add_uniqueid_element(cnx,add_review,check_review,(str(productcol), str(usercol), makedatetime(values[5])), (str(productcol), str(usercol), str(helpfulness), values[4], makedatetime(values[5]), str(textid)))
if current_product != values[0]:
last_product = current_product
current_product = values[0]
if last_product != "":
print("\tNew Reviews:",str(new_entries[2]))
print("\tNew Users:",str(new_entries[0]))
new_entries = [0,0,0]
print("\nINSERTING REVIEWS FOR PRODUCT #%s %s\n" %(productcol, current_product))
new_entries[0] += newuser
new_entries[1] += newproduct
new_entries[2] += newreview
i = 0
chunk = ""
cnx.close()
print("All reviews have been inserted in the database.")