-
Notifications
You must be signed in to change notification settings - Fork 0
/
gizoogle.py
315 lines (271 loc) · 11.5 KB
/
gizoogle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Giz00gle - Google API Testing and Evaluation. The primary libraries
# that are tested incldue the Google Speech API, VideoIntelligence API
# Translation API, and Image API
#
#
# CAO: 1826 February 4th, 2017
#
#################################################################
from __future__ import print_function
from __future__ import absolute_import
import re
import os
from cmd import Cmd
from pyfiglet import Figlet
from werkzeug.datastructures import FileStorage
from modules.image import ImageIntelligence
from modules.video import VideoIntelligence
from modules.audio import AudioIntelligence
from modules.translate import TranslateIntelligence
from modules.config import Config
from modules.common import Common
# Main prompt for the application. Used instead of a while loop. Uses keywords
# to trigger functions.
class GooglePrompt(Cmd):
'''
# Initial logo and version display
'''
def preloop(self):
fig = Figlet(font='big')
print(fig.renderText('GizOOgle'))
print('Version 1.0a')
'''
# Analyze an image from the Internet or from a local file
Usage:
image <gs://<bucket>/file.jpeg : Analyze an image in your bucket
image https://myfile.com/file.jpeg : Analyze an image on a webpage
image /home/devnet/file.jpeg : Convert, upload, and analyze
'''
def do_image(self, path):
common.opening_label('# IMAGE ANALYSIS')
# Instantiate a image client
ii = ImageIntelligence()
client = ii.get_client()
resp = None
# Get file by URI, else upload local file
if path.startswith('http') or path.startswith('gs:'):
resp = ii.analyze_image(path, client)
else:
file_exists = common.check_file_exists(path)
if not file_exists:
return
with open(path, 'rb') as fp:
img = FileStorage(fp)
url = common.upload_file(img.read(), img.filename, img.content_type,
config.IMAGE_STORAGE_BUCKET)
if url is not '':
resp = ii.analyze_image(url, client)
else:
return
# Run all ImageIntelligence modules and print results
if resp is not None:
ii.run_all(resp)
'''
# Display information about the do_audio command
Usage:
help image
'''
def help_image(self):
print('image <url>\nimage <local_path>\nImage analysis using Google Vision')
'''
# Transcribe an audio file and attempt to auto-translate to english
Usage:
audio <lang> <gs://<bucket>/file.flac : Analyze a FLAC file in your bucket
audio <lang> /home/devnet/file.mp3 : Convert, upload, and analyze
'''
def do_audio(self, arg):
common.opening_label('# AUDIO TRANSCRIPTION')
# Split the arg variable
x = arg.split(' ')
code = x[0]
path = x[1]
# Instantiate a speech client
ai = AudioIntelligence()
client = ai.get_client()
# Determine if the file is remote or local. If the file is large,
# then use try_long_run. File must be flac if remote, and in the
# bucket gs://bucket_name/file.flac
if path.startswith('gs:') and path.endswith('flac'):
try:
ai.analyze_audio(path, client, code)
except:
ai.try_long_run(path, client, code)
else:
fileExists = common.check_file_exists(path)
if not fileExists:
return
# Convert the audio to FLAC and upload to audio bucket. Assuming
# the file is not FLAC here. Save the file in the same path.
base = os.path.splitext(path)[0]
new_path = base + '.flac'
common.convert_to_flac(path, new_path)
# Open and upload the file to the storage bucket. Split resulting
# URL to get the filename and use the same gs:// method to analyze
# the FLAC audio file.
with open(new_path, 'rb') as fp:
audio = FileStorage(fp)
url = common.upload_file(audio.read(), audio.filename, audio.content_type, config.AUDIO_STORAGE_BUCKET)
if url is not '':
gs_file = url.split("/")[-1]
try:
ai.analyze_audio('gs://' + Config.AUDIO_STORAGE_BUCKET + '/' + gs_file, client, code)
except:
ai.try_long_run('gs://' + Config.AUDIO_STORAGE_BUCKET + '/' + gs_file, client, code)
else:
return
'''
# Display information about the do_audio command
Usage:
help audio
'''
def help_audio(self):
print('audio <lang_code> <gs://<bucket_name>/<file_name>>\naudio <lang_code> <local_path>\nSpeech analysis using Google Speech')
'''
# Analyze a video file and attempt to auto-translate and transcribe to english
Usage:
video <lang> <gs://<bucket>/file.mp4 : Analyze a FLAC file in your bucket
video <lang> /home/devnet/file.avi : Convert, upload, and analyze
'''
def do_video(self, arg):
common.opening_label('# VIDEO ANALYSIS')
# Split the arg variable
x = arg.split(' ')
code = x[0]
path = x[1]
# Instantiate a videointelligence client
vi = VideoIntelligence()
client = vi.get_client()
# If the video is already uploaded, process the file. If the file is
# local, upload the video and process.
if path.startswith('gs:'):
vi.analyze_video(path, client)
else:
file_exists = common.check_file_exists(path)
if not file_exists:
return
with open(path, 'rb') as fp:
video = FileStorage(fp)
url = common.upload_file(video.read(), video.filename, video.content_type, config.VIDEO_STORAGE_BUCKET)
if url is not '':
gs_file = url.split("/")[-1]
vi.analyze_video('gs://' + config.VIDEO_STORAGE_BUCKET + '/' + gs_file, client)
else:
return
# Convert the video file to a mp4, then convert the video file
# to FLAC. Unable to directly convert some formats to FLAC, so
# first convert to mp4, then FLAC.
base = os.path.splitext(path)[0]
# There is a bug with some video to audio conversions. This primarily
# occurs when converting avi to mp4, then from mp4 to FLAC.
# TODO: Fix alternative video formats to FLAC
if path.endswith('.mp4') or path.endswith('.MP4'):
new_path = path
else:
new_path = base + '.mp4'
common.convert_to_mp4(path, new_path)
# Convert the mp4 to FLAC
new_path_audio = base + '.flac'
common.convert_to_audio(new_path, new_path_audio)
# Analyze the FLAC and transcribe.
with open(new_path_audio, 'rb') as fp:
audio = FileStorage(fp)
url = common.upload_file(audio.read(), audio.filename, audio.content_type, config.AUDIO_STORAGE_BUCKET)
if url is not '':
ai = AudioIntelligence()
client = ai.get_client()
gs_file = url.split("/")[-1]
try:
ai.analyze_audio('gs://' + config.AUDIO_STORAGE_BUCKET + '/' + gs_file, client, code)
except:
ai.try_long_run('gs://' + config.AUDIO_STORAGE_BUCKET + '/' + gs_file, client, code)
else:
return
'''
# Display information about the do_audio command
Usage:
help video
'''
def help_video(self):
print('video <lang_code> <gs://<bucket_name>/<file_name>>\nvideo <lang_code> <local_path>\nAnalyze video with Google Intelligence API')
'''
# Translate a document locally or one on the Internet
Usage:
translate gs://<bucket>/file.txt : Translate from a bucket
translate https://myfile.com/index.html : Translate from a URL
translate /home/devnet/file.txt : Translate from local file
'''
def do_translate(self, path):
common.opening_label('# DOCUMENT TRANSLATION')
# Instantiate a translate client
ti = TranslateIntelligence()
client = ti.get_client()
# Determine if remote or local file.
if path.startswith('http') or path.startswith('gs:'):
pass # TODO: Fetch URL, parse w/ beautiful soup
else:
file_exists = common.check_file_exists(path)
if not file_exists:
return
# Read document into memory
with open(path) as f:
data = f.readline()
# First detect the language
resp = ti.detect_language(client, data)
print('Language: {}\nConfidence: {}%'.format(resp['language'], round(resp['confidence']*100),2))
# Translate the document and display the output to the user
with open(path) as f:
data = f.readlines()
resp_trans = ti.translate(client, data)
print('\nTranslation:')
for text in resp_trans:
print(text['translatedText'])
'''
# Display information about the do_audio command
Usage:
help translate
'''
def help_translate(self):
print('translate gs://<bucket>/file.txt')
print('translate <text_file>\nTranslate a local or remote document')
'''
# Identify the appropriate language code for translation / speech recognition
Usage:
lang : display a complete list of available languages
lang arabic : search for a language code associated with "arabic"
'''
def do_lang(self, search_term):
# Allow user to search available language codes. Used for audio
# and video analysis; specifically do_video and do_audio
if search_term:
re_search = re.compile(search_term, re.IGNORECASE)
print('{0:12} {1}'.format('CODE', 'DESCRIPTION'))
for key, value in config.LANGUAGE.iteritems():
if re_search.search(value):
print('{0:12} {1}'.format(key, value))
else:
# If no arg is given, show all available languages and their
# respective descriptions
print('\n{0:12} {1}'.format('CODE', 'DESCRIPTION'))
for lang in config.LANGUAGE:
print('{0:12} {1}'.format(lang, config.LANGUAGE[lang]))
'''
# Display information about the do_audio command
Usage:
help lang
'''
def help_lang(self):
print('lang\nlang <search-term>\nLocate a language code for translation or speech')
'''
############ START SCRIPT ###############
'''
common = Common()
config = Config()
def main():
print()
prompt = GooglePrompt()
prompt.prompt = '> '
prompt.cmdloop('\nThey\'ve done studies you know. Sixty percent of the time it works every time....')
if __name__ == '__main__':
main()