-
Notifications
You must be signed in to change notification settings - Fork 2
/
mongodb.py
151 lines (129 loc) · 4.63 KB
/
mongodb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from pymongo import MongoClient
import pandas as pd
import os, json
'''This script with the Mongo database of BASTO, first it perform the conexion to the
database through localhost on the default port 27017'''
client = MongoClient('localhost', 27017)
basto_db = client.basto
def pathing(paths: list):
'''just check if the required folders exist'''
for path in paths:
if not os.path.exists(path):
os.makedirs(path)
'''to filter the data to deal with, it is used the following aggregation function
on BASTO database'''
def aggregate(farm: str, device: str):
print('Extracting from Mongodb', end='\r')
pipeline = [
{
"$match":{
"dataRowType": "{}".format(device)
}
},
{
"$lookup":
{
"from": "devices",
"localField": "UUID",
"foreignField": "deviceMACAddress",
"as": "devices"
}
},
{
"$lookup":
{
"from": "animals",
"localField": "devices.deviceAnimalID",
"foreignField": "_id",
"as": "animals"
}
},
{
"$lookup":
{
"from": "settlements",
"localField": "animals.animalSettlement",
"foreignField": "_id",
"as": "settlements"
}
},
{
"$match": {
"settlements.name": "{}".format(farm)
}
},
{
"$unwind": "$devices"
},
{
"$project":
{
"_id": 0,
"dataRowData": 1,
"UUID": 1,
"createdAt": 1
}
}
]
datarows = basto_db.datarows
this_aggregate = datarows.aggregate(pipeline)
return this_aggregate
'''Once the data has filtered, it is filtered again to take into account only data recorded by the
GPS'''
def GPS(farm: str, device: str, path: str):
this_aggregate = [x for x in aggregate(farm, device)]
uuid = []
for doc in this_aggregate:
u = doc['UUID']
if u not in uuid:
uuid.append(u)
time_position = {uu: {'timestamp': [], 'lat': [], 'lng': []} for uu in uuid}
for doc in this_aggregate:
dataRowData = doc['dataRowData']
time_position[doc['UUID']]['timestamp'].append(doc['createdAt'])
for keys in list(dataRowData.keys())[1:3]:
time_position[doc['UUID']][keys].append(dataRowData[keys])
'''Create a csv for each GPS with its historical records of position'''
ii = 1
for k, v in time_position.items():
df = pd.DataFrame(v).dropna()
df.to_csv(path + str(k) + '.csv', index=False)
print(f'Creating csv {ii} of {len(uuid)}', end='\r')
ii += 1
'''Also it extract the limits of each fields contained in the farm, which here is named as plots'''
def plots(farm: str, plot_path: str):
settlements = basto_db.settlements
settlements = [x for x in settlements.find({'name': '{}'.format(farm)})]
settlements_plots = []
for doc in settlements:
for plot in doc['plots']:
settlements_plots.append(plot)
plots = basto_db.plots
plots = [x for x in plots.find({'_id': {"$in": settlements_plots}})]
virtualFenceGeoPoints = {ii: {'lat': [], 'lng': []} for ii in settlements_plots}
geoPoints = {ii: {'lat': [], 'lng': []} for ii in settlements_plots}
for plot in plots:
vFG = plot['virtualFenceGeoPoints']
gP = plot['geoPoints']
if vFG:
for x in vFG[0]:
virtualFenceGeoPoints[plot['_id']]['lat'].append(x['lat'])
virtualFenceGeoPoints[plot['_id']]['lng'].append(x['lng'])
if gP:
for x in gP:
geoPoints[plot['_id']]['lat'].append(x['lat'])
geoPoints[plot['_id']]['lng'].append(x['lng'])
geoPoints = {ii: geoPoints[key] for ii, key in zip(range(len(geoPoints)), list(geoPoints.keys()))}
plots_json = json.dumps(geoPoints)
with open(plot_path + farm.replace(' ', '_') + '.json', 'w') as f:
print('Saving plots json', end='\r')
f.write(plots_json)
if __name__ == "__main__":
farm = 'MACSA'
device = 'GPS'
gps_path = './basto_dataset/gps_{}/'.format(farm.replace(' ', '_'))
plot_path = './basto_dataset/plots/'
'''let's create the needed data'''
pathing([gps_path, plot_path])
GPS(farm, device, gps_path)
plots(farm, plot_path)