diff --git a/.gitignore b/.gitignore index 0c0236d..59f80f7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ DataProcessing/logs DataProcessing/source.py GUI/config temp +TODO.txt *.pyc *.idea diff --git a/DataProcessing/collectors.py b/DataProcessing/collectors.py index 9cde1a0..4a9afda 100644 --- a/DataProcessing/collectors.py +++ b/DataProcessing/collectors.py @@ -20,14 +20,13 @@ class UserCollector: - def __init__(self, url, header, verify): + def __init__(self, url, header): self.url = url self.header = header - self.verify = verify def get_associated_courses(self): api_target = r"{}/api/v1/courses?enrollment_state=active&per_page=50" - courses = requests.put(api_target.format(self.url), headers=self.header, verify=self.verify) + courses = requests.put(api_target.format(self.url), headers=self.header, verify=True) course_dict = {} for courses in courses.json(): @@ -41,7 +40,7 @@ def get_associated_courses(self): class Collector: """Collects data from the Canvas API """ - def __init__(self, url, header, class_id, verify=True): + def __init__(self, url, header, class_id): """ :param url: The URL used for Canvas ex. http://stanford.instructure.com :param header: The authorization information for the canvas API @@ -50,7 +49,6 @@ def __init__(self, url, header, class_id, verify=True): self.header = header self.url = url self.class_id = class_id - self.verify = verify def get_class_users(self, output_folder, output_file_name): """Gets all users in a specific class @@ -59,7 +57,7 @@ def get_class_users(self, output_folder, output_file_name): :return: A dictionary containing user ids linked to user names """ api_target = r'{}/api/v1/courses/{}/enrollments?per_page=50' - enrollment = requests.put(api_target.format(self.url, self.class_id), headers=self.header, verify=self.verify) + enrollment = requests.put(api_target.format(self.url, self.class_id), headers=self.header, verify=True) with open('{}/{}.json'.format(output_folder, output_file_name), 'w') as f: json.dump(enrollment.json(), f) @@ -74,7 +72,7 @@ def get_class_users(self, output_folder, output_file_name): def get_course_modules(self): api_target = "{}/api/v1/courses/{}/modules?per_page=50" - module = requests.put(api_target.format(self.url, self.class_id), headers=self.header, verify=self.verify) + module = requests.put(api_target.format(self.url, self.class_id), headers=self.header, verify=True) module_dict = {} for modules in module.json(): if modules['published']: @@ -88,9 +86,9 @@ def get_course_modules(self): class Module(Collector): """Represents and collects data from a single module """ - def __init__(self, module_id, url, header, class_id, verify=True): + def __init__(self, module_id, url, header, class_id): self.module_id = module_id - super(Module, self).__init__(url=url, header=header, class_id=class_id, verify=verify) + super(Module, self).__init__(url=url, header=header, class_id=class_id) @staticmethod def _get_module_quizzes(module_items): @@ -128,7 +126,7 @@ def get_module_items(self): """ api_target = r"{}/api/v1/courses/{}/modules/{}/items" module = requests.put(api_target.format(self.url, self.class_id, self.module_id), - headers=self.header, verify=self.verify) + headers=self.header, verify=True) notes = self._get_module_notes(module.json()) quizzes = self._get_module_quizzes(module.json()) module_dict = {'Subsections': {'Notes': notes, 'Quizzes': quizzes}, 'Overall': module.json()} @@ -141,16 +139,16 @@ def module_times(self): for students in self.get_class_users('temp', 'temp'): # url = r'{}/api/v1/courses/{}/analytics/users/{}/activity' api_target = r'{}/api/v1/users/{}/page_views' - response = requests.put(api_target.format(self.url, students), headers=self.header, verify=self.verify) + response = requests.put(api_target.format(self.url, students), headers=self.header, verify=True) # print(response.json()) class Quiz(Collector): """Represents a single quiz """ - def __init__(self, quiz_id, class_id, header, url, verify=True): + def __init__(self, quiz_id, class_id, header, url): self.quiz_id = quiz_id - super(Quiz, self).__init__(class_id=class_id, header=header, url=url, verify=verify) + super(Quiz, self).__init__(class_id=class_id, header=header, url=url) self.submissions = self._get_quiz_submissions() def get_quiz_question_ids(self): @@ -159,7 +157,7 @@ def get_quiz_question_ids(self): """ api_target = r'{}/api/v1/courses/{}/quizzes/{}/questions?per_page=100' quiz_response = requests.put(api_target.format(self.url, self.class_id, self.quiz_id), - headers=self.header, verify=self.verify) + headers=self.header, verify=True) quiz_question_id_dict = {} for questions in quiz_response.json(): @@ -178,7 +176,7 @@ def _get_quiz_submissions(self): """ api_target = r'{}/api/v1/courses/{}/quizzes/{}/submissions?per_page=100' quiz = requests.put(url=api_target.format(self.url, self.class_id, self.quiz_id), - headers=self.header, verify=self.verify) + headers=self.header, verify=True) submission_list = [] submission_dict = [] @@ -255,15 +253,26 @@ def get_correct_answers(self): """ event_dict = self.get_quiz_events() user_specific_correct_answer_dict = {} - cur_correct_answer_dict = {} + questions_answered = self.get_questions_answered() + for students, event_dict in event_dict.items(): + cur_correct_answer_dict = {} + student_dict = {} cur_quiz_data = list(event_dict.values())[0][0]['event_data']['quiz_data'] for questions in cur_quiz_data: cur_correct_answer_dict[questions["id"]] = [i["id"] for i in questions["answers"] if i["weight"] > 0] - user_specific_correct_answer_dict[students] = cur_correct_answer_dict + cur_questions_answered = questions_answered[students] + correct_questions = cur_correct_answer_dict.keys() + for questions in cur_questions_answered: + for ids in correct_questions: + if ids == int(questions['event_data'][0]['quiz_question_id']): + if cur_correct_answer_dict[ids][0] == int(questions['event_data'][0]['answer']): + student_dict[ids] = cur_correct_answer_dict[ids] + + user_specific_correct_answer_dict[students] = student_dict return user_specific_correct_answer_dict @@ -288,7 +297,7 @@ def get_page_leaves(self): current_list = [] for items in event_dict['quiz_submission_events']: - if items['event_type'] == 'page_blurred' or items['event_type'] == 'page_focused': + if items['event_type'] == 'page_blurred': current_list.append(items) page_leaves_dict[user_id] = current_list @@ -316,7 +325,7 @@ def get_quiz_events(self): quiz_events = {} for submission_id, user_id in self.submissions[0]: events = requests.put(api_target.format(self.url, self.class_id, self.quiz_id, submission_id), - headers=self.header, verify=self.verify) + headers=self.header, verify=True) quiz_events[user_id] = events.json() with open('{}/{}.json'.format(temp_dir, 'events_{}'.format(self.quiz_id)), 'w') as f: @@ -342,7 +351,7 @@ def get_discussion(self, output_folder='Discussions', output_file_name='discussi """ api_target = '{}/api/v1/courses/{}/discussion_topics/{}/view' r = requests.put(api_target.format(self.url, self.class_id, self.discussion_id), - headers=self.header, verify=self.verify) + headers=self.header, verify=True) with open('{}/{}.json'.format(output_folder, output_file_name), 'w') as f: json.dump(r.json(), f) diff --git a/DataProcessing/constructors.py b/DataProcessing/constructors.py index 01066be..80a92cf 100644 --- a/DataProcessing/constructors.py +++ b/DataProcessing/constructors.py @@ -17,22 +17,21 @@ class QuizEvents: """A builder of data sets for quiz event information""" - def __init__(self, quiz, questions_answered=None, anon=True, pre_flags=False): - print("Initializing Quiz") + def __init__(self, quiz, data_options, anon=True, pre_flags=False, controller=None): self.anon = anon # anon = anonymous (false: index = user id & true: index = user name) self.data_set = {} self.quiz = quiz - if not questions_answered: self._get_questions_answered() - else: self.questions_answered = questions_answered + self._get_questions_answered() self.pre_flags = pre_flags + self.controller = controller + self.data_options = data_options self._init_data_set() def _get_questions_answered(self): """Gets submission events and questions answered events """ - print("Building Questions Answered") self.submissions = self.quiz.submissions[1] self.questions_answered = self.quiz.get_questions_answered() @@ -50,12 +49,45 @@ def _init_data_set(self): self.data_set[user_id] = {} self.data_set['Overall'] = {} - # self._build_changed_questions(correct_only=True) - self._build_average_question_time() - # self._build_user_scores() - self._build_time_taken() - self._build_user_page_leaves() - if not self.anon: self._non_anon_data_set() + options = { + "Changed Questions": self._build_changed_questions, + "Average Question Time": self._build_average_question_time, + "User Scores": self._build_user_scores, + "Time Taken": self._build_time_taken, + "Page Leaves": self._build_user_page_leaves, + 'Difficulty Index': self._build_difficulty_index + } + + for keys in self.data_options: + if self.controller is not None: + self.controller.labelvar.set("Building {}".format(keys)) + self.controller.update() + options[keys]() + + if not self.anon: + if self.controller is not None: + self.controller.labelvar.set("Formatting With Names") + self.controller.update() + self._non_anon_data_set() + + def _build_difficulty_index(self): + + api_target = '{}/api/v1/courses/{}/quizzes/{}/statistics?per_page=10000' + question_stats = requests.put(api_target.format(self.quiz.url, self.quiz.class_id, self.quiz.quiz_id), + headers=self.quiz.header) + + correct_answers = self.quiz.get_correct_answers() + + for students, answers in self.questions_answered.items(): + self.data_set[str(students)]['difficulty_index'] = 0 + + current_correct_answers = correct_answers[students] + for correct_questions in current_correct_answers: + for question in question_stats.json()['quiz_statistics'][0]['question_statistics']: + if int(question['id']) == correct_questions: + self.data_set[str(students)]['difficulty_index'] += question['difficulty_index'] + + self.data_set['Overall']['difficulty_index'] = 0 def _build_changed_questions(self, correct_only=False): correct_answers = self.quiz.get_correct_answers() @@ -153,8 +185,8 @@ def _build_user_page_leaves(self): # Converts the dictionary keys into a list, takes the length to get the total number of questions # the multiplies by .5 to get 50 percent of the total number of questions if self.pre_flags: - questions_no_subdivision = len(list(self.quiz.get_quiz_question_ids().keys())) * .75 - if (cur_length / 2) >= questions_no_subdivision: + questions_no_subdivision = len(list(self.quiz.get_quiz_question_ids().keys())) * .70 + if cur_length >= questions_no_subdivision: self.data_set[user_id]['page_leaves'] = 'CA' else: self.data_set[user_id]['page_leaves'] = cur_length @@ -192,7 +224,6 @@ def _non_anon_data_set(self): if len(quiz_users['Overall']) != len(self.data_set['Overall']) or len(quiz_users) != len(self.data_set): rebuild = True else: - print("Getting Already Made User Name List") # Sets pandas options for displaying a much larger data set pd.set_option('display.max_columns', 100) pd.set_option('display.width', 100000) @@ -202,7 +233,6 @@ def _non_anon_data_set(self): else: rebuild = True if rebuild: - print("Rebuilding Name List") name_set = {} for ids, values in self.data_set.items(): profile = requests.put(r'{}/api/v1/users/{}/profile'.format(self.quiz.url, ids), headers=self.quiz.header) @@ -278,7 +308,3 @@ def build_dataframe(self): return pre_flags, data_frame else: return data_frame - - -# if risk > everything: -# program.work() \ No newline at end of file diff --git a/DataProcessing/source.py b/DataProcessing/source.py deleted file mode 100644 index 6c8a18a..0000000 --- a/DataProcessing/source.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Data Analysis -""" -# Created on Tues. October 17 11:05:37 2017 -# Main file for data analytics -# @author: ByrdOfAFeather - -from collectors import * -from constructors import * -from secrets import keys, EuroDataSet, PsychDataSet -from sklearn.feature_selection import SelectKBest -from sklearn.feature_selection import f_classif -import numpy as np -from pandas.tools.plotting import scatter_matrix -import matplotlib.pyplot as plt -from ModelTraining import predictors - -nccs_token = keys[0] -nccs_url = 'http://nccs.instructure.com' -nccs_header = {'Authorization': 'Bearer {}'.format(nccs_token)} - - -def main(): - - EuroDataSet.dropna(axis=0, how='any', inplace=True) - print(EuroDataSet) - stds = [] - for items in EuroDataSet.columns.values: - stds.append(np.std(EuroDataSet[items])) - print(stds) - - PsychDataSet.dropna(axis=0, how='any', inplace=True) - print(PsychDataSet) - - # print(EuroDataSet) - # EuroDataSetFeatures = EuroDataSet.drop("Cheat", axis=1) - # result = SelectKBest(f_classif, k=4).fit_transform(EuroDataSetFeatures, EuroDataSet['Cheat']) - # print(result) - # print(EuroDataSet.corr()) - - # print(PsychDataSet) - # PsychDataSetFeatures = PsychDataSet.drop("Cheat", axis=1) - # result = SelectKBest(f_classif, k=4).fit_transform(PsychDataSetFeatures, PsychDataSet['Cheat']) - # print(result) - # print(PsychDataSet.corr()) - - # start = time.time() - # - # gatherer = Quiz(class_id=9713, quiz_id=10553, url=nccs_url, header=nccs_header) - # constructor = QuizEvents(gatherer, anon=False) - # dev_set = constructor.build_dataframe() - # - # print(dev_set) - # - # jack_walsh = predictors.AutoEncoder(dev_set, dev_set) - # jack_walsh.PCA() - # - # test_thresh = .1 - # n_init = 500000 - # learning_rate = .08 - # - # # Iterates for a obscene amount of times to produce results to analyze [IGNORABLE] - # for i in range(0, 1001): - # test = jack_walsh.separate(learning_rate=learning_rate, test_thresh=test_thresh) - # print(test) - # test_file = open(r"..\.\temp/model_info/classification/classification_{}_{}.txt".format( - # datetime.datetime.now().strftime("%Y-%m-%d T-%H-%M-%S"), - # str(jack_walsh.loss)), - # 'w', encoding='utf-8') - # - # omega = predictors.KMeansSeparator(test) - # results = omega.classify(clusters=2, n_init=n_init) - # - # class0 = results[results['Class'] == 0] - # class1 = results[results['Class'] == 1] - # class0_page_leaves = class0[2] - # class1_page_leaves = class1[2] - # - # class0_average_page_leaves = (sum(class0_page_leaves) / len(class0_page_leaves)) ** 2 - # class1_average_page_leaves = (sum(class1_page_leaves) / len(class1_page_leaves)) ** 2 - # - # results['Temp'] = None - # - # if class0_average_page_leaves > class1_average_page_leaves: - # results.loc[results['Class'] == 1, 'Temp'] = 0 - # results.loc[results['Class'] == 0, 'Temp'] = 1 - # - # results.loc[results['Temp'] == 1, 'Class'] = 1 - # results.loc[results['Temp'] == 0, 'Class'] = 0 - # - # results['Actual'] = EuroDataSet['Cheat'] - # results.Actual = results.Actual.astype(float) - # results['Result'] = None - # - # false_negative = len(results[(results['Class'] == 0) & (results['Actual'] == 1)]) - # print(results[(results['Class'] == 0) & (results['Actual'] == 1)]) - # results.loc[(results['Class'] == 0) & (results['Actual'] == 1), 'Result'] = u'❌' - # false_positive = len(results[(results['Class'] == 1) & (results['Actual'] == 0)]) - # results.loc[(results['Class'] == 1) & (results['Actual'] == 0), 'Result'] = u'❌' - # true_positive = len(results[(results['Class'] == 1) & (results['Actual'] == 1)]) - # results.loc[(results['Class'] == 1) & (results['Actual'] == 1), 'Result'] = u'✓' - # true_negative = len(results[(results['Class'] == 0) & (results['Actual'] == 0)]) - # results.loc[(results['Class'] == 0) & (results['Actual'] == 0), 'Result'] = u'✓' - # - # indexers = EuroDataSet.index.values - # missed_cheaters = 0; missed_non_cheaters = 0 - # for items in indexers: - # try: results.loc[items, 'Class'] - # except KeyError: - # did_cheat = EuroDataSet.loc[items, 'Cheat'] - # if did_cheat == 0: - # missed_non_cheaters += 1 - # true_negative += 1 - # elif did_cheat == 1: - # missed_cheaters += 1 - # false_negative += 1 - # - # test_file.write("THRESHOLD FOR TESTING : {}\n".format(test_thresh)) - # test_file.write("EPOCHS : {}\n".format(500000)) - # test_file.write("LEARNING RATE : {}\n".format(learning_rate)) - # test_file.write("N_INIT : {}\n".format(n_init)) - # test_file.write(u"{}".format(str(results))) - # test_file.write('\n\n\n') - # test_file.write("TRUE POSITIVES {} FALSE POSITIVES {}\n".format(true_positive, false_positive)) - # test_file.write("TRUE NEGATIVES {} FALSE NEGATIVES {}\n".format(true_negative, false_negative)) - # test_file.write("MISSED CHEATERS AS ANOMALIES {}\n".format(missed_cheaters)) - # test_file.write("MISSED NON-CHEATERS AS ANOMALIES {}\n".format(missed_non_cheaters)) - # - # end = time.time() - # test_file.close() - # - # print("TOTAL {}".format(end - start)) - - -if __name__ == "__main__": main() diff --git a/GUI/displayers.py b/GUI/displayers.py index 4d6e909..58a1031 100644 --- a/GUI/displayers.py +++ b/GUI/displayers.py @@ -1,5 +1,6 @@ import tkinter as tk import tkinter.ttk as ttk +from PIL import Image, ImageTk from DataProcessing import collectors import ml_displayers as tfd import requests @@ -53,17 +54,22 @@ def __init__(self, *args, **kwargs): self.url = None self.token = None self.headers = None - self.is_trusted_ssl = True + + # Sets up the default type of separation {AutoEncoder = Autoencoder and KMeans, Anomaly = OneClassSVM, + # No Exception = Always if page leaves > 1} + self.separation_options = ['Auto Encoder', 'Basic Anomaly', 'No Exceptions'] + self.separation_type = self.separation_options[0] # Sets up the container frame which will serve as a important piece of related classes container = tk.Frame(self) # Formats the container frame - container.grid(sticky='nsew') + container.pack(side='top', fill='both', expand=True) # Builds a dictionary linking {frame_name: frame_object_reference} self.frames = {} - for frames in (EULAMenu, TokenSelector, MainMenu, tfd.DevSettingsMenu, tfd.AutoEncoderSettingsMenu): + for frames in (EULAMenu, TokenSelector, MainMenu, tfd.DevSettingsMenu, tfd.AutoEncoderSettingsMenu, + SettingsMenu): cur_frame = frames(container, self) self.frames[frames.__name__] = cur_frame cur_frame.grid(sticky='nsew') @@ -95,145 +101,19 @@ def change_frame(self, frame): self.frames[frame].grid(sticky='nsew') self.frames[frame].init_gui() - def autoencoder_frame(self, **kwargs): + def autoencoder_frame(self, data_options=[], **kwargs): for frame_values in self.frames.values(): frame_values.grid_remove() self.frames['AutoEncoderSettingsMenu'].winfo_toplevel().geometry("") self.frames['AutoEncoderSettingsMenu'].tkraise() self.frames['AutoEncoderSettingsMenu'].grid(sticky='nsew') - self.frames['AutoEncoderSettingsMenu'].init_gui(**kwargs) + self.frames['AutoEncoderSettingsMenu'].init_gui(data_options, **kwargs) def _setup_token(self): token = json.load(open("{}/token.json".format(temp_dir))) self.token = token['token'] self.url = token['url'] self.headers = {'Authorization': 'Bearer {}'.format(self.token)} - self.is_trusted_ssl = token['trusted_ssl'] - - -class TokenSelector(tk.Frame): - """Allows the user to setup Canvas API information and store it in a .json format in the temp/data folder - """ - def __init__(self, parent, controller): - """Simple __init__ function to call the parent init and setup default values - - The values have to exist during the first call to these classes in the backend class values are unable to be - filled due to the uncertain condition of self.controller.token or self.controller.url - - :param parent: The parent containing the child frame, this is set to the MainBackEnd's self.container - :type: tk.Frame() - :param controller: The tkinter root that is able to add and refresh the GUI - :type: tk.Tk() - """ - tk.Frame.__init__(self, parent) - - self.parent = parent - self.controller = controller - - # Both of these are reclassified later as tk.Entry() objects to allow for users to input Canvas API information - self.token_input = None - self.url_input = None - - self.ssl_trusted_variable = None - self.ssl_trusted_drop_down = None - self.ssl_options = None - - # These are both placeholders to take on the value of input values from token and url input objects - self.token = None - self.url = None - - # Setups confirmation information that will be used to verify API information - self.confirmed_button = None - - self.error = None - self.default = [True, True] - - def delete_default(self, event): - """Changes the default text to nothing or resets it if nothing has been put in - :param event: Tkinter GUI event - :return: None - """ - if event.widget == self.url_input: - if self.default[0]: - event.widget.delete(0, "end") - self.default[0] = False - - if not self.default[0] and not self.token_input.get(): - self.token_input.insert(0, "Put your API key here!") - self.default[1] = True - - elif event.widget == self.token_input: - if self.default[1]: - event.widget.delete(0, "end") - self.default[1] = False - - if not self.default[1] and not self.url_input.get(): - self.url_input.insert(0, "Insert your url here! Ex. nccs, ncvps, stanford, etc") - self.default[0] = True - return None - - def init_gui(self): - """Re-assigns default values to GUI values - """ - url_default = "Insert your url here! Ex. nccs, ncvps, stanford, etc" - self.token_input = ttk.Entry(self, width=50) - self.url_input = ttk.Entry(self, width=50) - - self.token_input.delete(0, tk.END), self.url_input.delete(0, tk.END) - self.token_input.insert(0, "Put your API key here!"), self.url_input.insert(0, url_default) - self.token_input.bind("", self.delete_default), self.url_input.bind("", self.delete_default) - - self.token_input.grid(row=0, column=0, sticky='nsew'), self.url_input.grid(row=1, column=0, sticky='nsew') - - self.ssl_trusted_variable = tk.StringVar(self) - self.ssl_options = {'Regular SSL': True, 'Zscaler or Similar Network Security': False} - - self.ssl_trusted_drop_down = ttk.OptionMenu(self, self.ssl_trusted_variable, 'Regular SSL', *self.ssl_options) - self.ssl_trusted_drop_down.grid(sticky='nsew') - - self.confirmed_button = ttk.Button(self, command=self._confirm_token, width=20, text='Confirm Information!') - self.confirmed_button.grid(sticky='nsew') - - self.error = ttk.Label(self, text="Sorry, either your URL or API key is incorrect!") - - def set_ssl(self, _): - self.controller.is_trusted_ssl = self.ssl_options[self.ssl_trusted_variable.get()] - - def _confirm_token(self): - """Confirms the token and url match together and a single query to the Canvas API can be made successfully - """ - - # Gets the current tokens from the ttk.Entry menus - see self.init_gui() - cur_token = self.token_input.get() - cur_url = self.url_input.get() - - # Sets up the target url for the api and token information - api_target = "http://{}.instructure.com/api/v1/users/activity_stream".format(cur_url) - headers = {'Authorization': 'Bearer {}'.format(cur_token)} - try: - test = requests.put(api_target, headers=headers, verify=self.controller.is_trusted_ssl) - - if test.status_code == 200: - # If the response is positive the information is saved for storage in a token.json file - with open('{}/token.json'.format(temp_dir), 'w') as f: - json.dump({'token': cur_token, 'url': "http://" + cur_url + ".instructure.com", - 'trusted_ssl': self.controller.is_trusted_ssl}, f) - - # Sets up the information so that the program can continue to run without having to reload the data - self.controller.url = "http://" + cur_url + ".instructure.com" - self.controller.token = cur_token - self.controller.headers = {"Authorization": "Bearer {}".format(cur_token)} - self.controller.change_frame('MainMenu') - - else: - self.error.grid(sticky='nsew') - - # Assumes any error given as a connection error is probably a error with API or URL - except requests.exceptions.ConnectionError as e: - print("Something has gone wrong with setting up the url! Here's all the information I know: ") - print("{}".format(api_target), "{}".format(headers), "{}".format(cur_url), "{}".format(cur_token)) - print(e) - self.error.grid(sticky='nsew') class MainMenu(tk.Frame): @@ -255,6 +135,9 @@ def __init__(self, parent, controller): self.parent = parent self.controller = controller + # Placeholder variables for Settings Menu + self.settings_button = None + # Placeholder variables for Collector objects to be created when a section is chosen self.user_collector = None self.course_collector = None @@ -289,6 +172,9 @@ def __init__(self, parent, controller): self.module_error_label = None self.quiz_error_label = None + self.test_var = None + self.other_test_var = None + def init_gui(self): """Function to setup the menu with just a list of courses linked to the current API User's account """ @@ -308,13 +194,27 @@ def init_gui(self): # Gets a basic UserCollector Object to get course information self.user_collector = collectors.UserCollector(self.controller.url, - self.controller.headers, self.controller.is_trusted_ssl) + self.controller.headers) + + temp_label = ttk.Label(self, text='Getting Courses...') + temp_label.grid() + + self.controller.update() # Declares variables used for the course selection self.course_variable = tk.StringVar(self) self.courses = self.user_collector.get_associated_courses() course_names = list(self.courses.keys()) + temp_label.grid_forget() + temp_label.destroy() + + # self.test_var = Image.open(r'C:\Users\soult\OneDrive\Pictures\DCshHh3UQAEFkpf.png') + # self.other_test_var = ImageTk.PhotoImage(self.test_var) + self.settings_button = tk.Button(self, text='Settings', + command=lambda: self.controller.change_frame('SettingsMenu'), borderwidth=0) + self.settings_button.grid(row=0, column=1, sticky='n') + self.course_drop_down = ttk.OptionMenu(self, self.course_variable, "Select A Course!", *course_names) self.course_drop_down.grid(row=0, column=0, sticky='nsew') @@ -365,12 +265,19 @@ def update_module(self): # Gets the Module ID based on the already made dictionary from a collector class self.course_collector = collectors.Collector(url=self.controller.url, header=self.controller.headers, - class_id=self.cur_course_id, - verify=self.controller.is_trusted_ssl) + class_id=self.cur_course_id) + + temp_label = ttk.Label(self, text='Getting Modules...') + temp_label.grid() + self.controller.update() # Gets all the modules associated with a course self.modules = self.course_collector.get_course_modules() + temp_label.grid_forget() + temp_label.destroy() + self.controller.update() + # Sets up the StringVar to re-query every time the option is changed. module_names = list(self.modules.keys()) self.module_variable = tk.StringVar(self) @@ -378,7 +285,8 @@ def update_module(self): # Sets up the module drop down based on the StringVar and declares it's fixed GUI position self.module_drop_down = ttk.OptionMenu(self, self.module_variable, module_names[0], *module_names) - self.module_drop_down.grid(row=1, column=0, sticky='nsew') + self.module_drop_down.grid(row=2, column=0, sticky='nsew') + self.settings_button.grid(column=1, row=0) def update_quiz(self, *_): """Updates the quiz menu @@ -401,16 +309,23 @@ def update_quiz(self, *_): # Creates a module collector and sets up a quiz dictionary self.module_collector = collectors.Module(module_id=self.cur_module_id, class_id=self.cur_course_id, - url=self.controller.url, header=self.controller.headers, - verify=self.controller.is_trusted_ssl) + url=self.controller.url, header=self.controller.headers) + + temp_label = ttk.Label(self, text='Getting Quizzes...') + temp_label.grid() + self.controller.update() # Sets up a list of quizzes and their respective names self.quizzes = self.module_collector.get_module_items()['Subsections']['Quizzes'] quiz_names = list(self.quizzes.keys()) + temp_label.grid_forget() + temp_label.destroy() + self.controller.update() + # Creates a button that links to starting the auto_encoder GUI with confirmed settings self.select_quiz = ttk.Button(self, text='Select Quiz!', command=self.start_autoencoder_gui) - self.select_quiz.grid(row=1, column=1, sticky='nsew') + self.select_quiz.grid(row=2, column=1, sticky='nsew') # Labels a default value if no quizzes are found if len(quiz_names) == 0: @@ -424,7 +339,9 @@ def update_quiz(self, *_): self.quiz_variable = tk.StringVar(self) self.quiz_drop_down = ttk.OptionMenu(self, self.quiz_variable, default, *quiz_names) - self.quiz_drop_down.grid(row=2, column=0, sticky='nsew') + self.quiz_drop_down.grid(row=3, column=0, sticky='nsew') + + self.settings_button.grid(column=1, row=0) def start_autoencoder_gui(self): """Starts the GUI for the autoencoder @@ -432,64 +349,68 @@ def start_autoencoder_gui(self): current_quiz_key = self.quiz_variable.get() self.cur_quiz_id = self.quizzes[current_quiz_key] self.quiz_collector = collectors.Quiz(quiz_id=self.cur_quiz_id, class_id=self.cur_course_id, - url=self.controller.url, header=self.controller.headers, - verify=self.controller.is_trusted_ssl) + url=self.controller.url, header=self.controller.headers) self.controller.cur_quiz = self.quiz_collector self.controller.autoencoder_frame() -class SettingsMenu(tk.Frame): - """ - TODO: Retool as a child of TokenSelector/Make General Settings Menu - TODO: [Settings Icon] https://www.reddit.com/r/learnpython/comments/4kjie3/how_to_include_gui_images_with_pyinstaller/ - """ +class GeneralSettings(tk.Frame): def __init__(self, parent, controller): + """Simple __init__ function to call the parent init and setup default values + + The values have to exist during the first call to these classes in the backend class values are unable to be + filled due to the uncertain condition of self.controller.token or self.controller.url + + :param parent: The parent containing the child frame, this is set to the MainBackEnd's self.container + :type: tk.Frame() + :param controller: The tkinter root that is able to add and refresh the GUI + :type: tk.Tk() + """ tk.Frame.__init__(self, parent) + self.parent = parent self.controller = controller - self.token_change_entry = None - self.url_change_entry = None - self.CA_change_entry = None - self.confirm_button = None - self.error = None + # Both of these are reclassified later as tk.Entry() objects to allow for users to input Canvas API information + self.token_input = None + self.url_input = None - def init_gui(self): - self.error = ttk.Entry(self, text='This Information Does Not Appear To Work!') - self.token_change_entry = ttk.Entry(self, text=self.controller.token) - self.url_change_entry = ttk.Entry(self, text=self.controller.url) - self.CA_change_entry = ttk.Entry(self, text=self.controller.is_trusted_ssl) - self.confirm_button = ttk.Button(self, text="Confirm Information!", command=self._confirm_token) + # These are both placeholders to take on the value of input values from token and url input objects + self.token = None + self.url = None + + # Setups confirmation information that will be used to verify API information + self.confirmed_button = None - self.token_change_entry.grid(sticky='nsew') - self.url_change_entry.grid(sticky='nsew') - self.CA_change_entry.grid(sticky='nsew') - self.confirm_button.grid(sticky='nsew') + self.error = None + self.default = [True, True] + + # Sets up default value values that will fill in the blank when nothing is typed + self.default_values = None def _confirm_token(self): """Confirms the token and url match together and a single query to the Canvas API can be made successfully """ - # Gets the current tokens from the tk.Entry menus - see self.init_gui() - cur_token = self.token_change_entry.get() - cur_url = self.url_change_entry.get() + # Gets the current tokens from the ttk.Entry menus - see self.init_gui() + cur_token = self.token_input.get() + cur_url = self.url_input.get() # Sets up the target url for the api and token information api_target = "http://{}.instructure.com/api/v1/users/activity_stream".format(cur_url) headers = {'Authorization': 'Bearer {}'.format(cur_token)} try: - test = requests.put(api_target, headers=headers, verify=self.CA_change_entry.get()) + test = requests.put(api_target, headers=headers, verify=True) + if test.status_code == 200: # If the response is positive the information is saved for storage in a token.json file with open('{}/token.json'.format(temp_dir), 'w') as f: - json.dump({'token': cur_token, 'url': "http://" + cur_url + ".instructure.com", - 'trusted_ssl': self.controller.is_trusted_ssl}, f) + json.dump({'token': cur_token, 'url': "http://" + cur_url + ".instructure.com"}, f) # Sets up the information so that the program can continue to run without having to reload the data self.controller.url = "http://" + cur_url + ".instructure.com" self.controller.token = cur_token self.controller.headers = {"Authorization": "Bearer {}".format(cur_token)} - self.controller.change_frame('MainMenu') else: self.error.grid(sticky='nsew') @@ -501,6 +422,130 @@ def _confirm_token(self): print(e) self.error.grid(sticky='nsew') + def delete_default(self, event): + """Changes the default text to nothing or resets it if nothing has been put in + :param event: Tkinter GUI event + :return: None + """ + if event.widget == self.url_input: + if self.default[0]: + event.widget.delete(0, "end") + self.default[0] = False + + if not self.default[0] and not self.token_input.get(): + self.token_input.insert(0, self.default_values[0]) + self.default[1] = True + + elif event.widget == self.token_input: + if self.default[1]: + event.widget.delete(0, "end") + self.default[1] = False + + if not self.default[1] and not self.url_input.get(): + self.url_input.insert(0, self.default_values[1]) + self.default[0] = True + return None + + +class TokenSelector(GeneralSettings): + """Allows the user to setup Canvas API information and store it in a .json format in the temp/data folder + """ + def __init__(self, parent, controller): + GeneralSettings.__init__(self, parent, controller) + + def init_gui(self): + """Re-assigns default values to GUI values + """ + self.default_values = [ + 'Put your API key here', + 'Insert your url here! Ex. nccs, ncvps, stanford, etc' + ] + + url_default = "Insert your url here! Ex. nccs, ncvps, stanford, etc" + self.token_input = ttk.Entry(self, width=50) + self.url_input = ttk.Entry(self, width=50) + + self.token_input.delete(0, tk.END), self.url_input.delete(0, tk.END) + self.token_input.insert(0, "Put your API key here!"), self.url_input.insert(0, url_default) + self.token_input.bind("", self.delete_default), self.url_input.bind("", self.delete_default) + + self.token_input.grid(row=0, column=0, sticky='nsew'), self.url_input.grid(row=1, column=0, sticky='nsew') + + self.confirmed_button = ttk.Button(self, command=self.confirm_settings, width=20, text='Confirm Information!') + self.confirmed_button.grid(sticky='nsew') + + self.error = ttk.Label(self, text="Sorry, either your URL or API key is incorrect!") + + def confirm_settings(self): + self._confirm_token() + self.controller.change_frame('MainMenu') + + +class SettingsMenu(GeneralSettings): + """ + TODO: Retool as a child of TokenSelector/Make General Settings Menu + TODO: [Settings Icon] https://www.reddit.com/r/learnpython/comments/4kjie3/how_to_include_gui_images_with_pyinstaller/ + """ + def __init__(self, parent, controller): + GeneralSettings.__init__(self, parent, controller) + self.type_of_separation = None + self.type_of_separation_var = None + + self.separation_label = None + self.url_label = None + self.token_label = None + + def init_gui(self): + """Initializes GUI values + """ + for widget in self.winfo_children(): + widget.pack_forget() + widget.destroy() + + self.default_values = [ + self.controller.token, + self.controller.url.split('/')[2].split('.')[0] + ] + + self.token_label = ttk.Label(self, text="API KEY:") + self.token_label.grid(sticky='n') + + # Sets up Token Input + self.token_input = ttk.Entry(self, width=60) + self.token_input.grid() + + self.url_label = ttk.Label(self, text="URL:") + self.url_label.grid(sticky='n') + + # Sets up URL Input Button + self.url_input = ttk.Entry(self, width=60) + self.url_input.grid() + + self.token_input.delete(0, tk.END), self.url_input.delete(0, tk.END) + self.token_input.insert(0, self.default_values[0]), self.url_input.insert(0, self.default_values[1]) + self.token_input.bind("", self.delete_default), self.url_input.bind("", self.delete_default) + + self.separation_label = ttk.Label(self, text='Choose The Type Of Separation You want To Use:') + self.separation_label.grid(row=0, column=2) + + # Sets up the type of separation drop down + separation_type_style = ttk.Style() + separation_type_style.configure("OP.TMenubutton", background='#dedede') + self.type_of_separation_var = tk.StringVar() + self.type_of_separation_var.set(self.controller.separation_type) + self.type_of_separation = ttk.OptionMenu(self, self.type_of_separation_var, + self.controller.separation_type, *self.controller.separation_options, style='OP.TMenubutton') + self.type_of_separation.grid(sticky='ew', row=1, column=2) + + # Sets up confirm button + self.confirmed_button = ttk.Button(self, text='Confirm Settings!', command=self.confirm_settings) + self.confirmed_button.grid(column=1, sticky='n') + + def confirm_settings(self): + self._confirm_token() + self.controller.separation_type = self.type_of_separation_var.get() + self.controller.change_frame('MainMenu') + class EULAMenu(tk.Frame): """Simple class to gain acceptance of EULA diff --git a/GUI/ml_displayers.py b/GUI/ml_displayers.py index 4cc4ba4..0d70486 100644 --- a/GUI/ml_displayers.py +++ b/GUI/ml_displayers.py @@ -35,6 +35,8 @@ def __init__(self, parent, controller): self.threshold_input = None self.epochs_input = None + self.options_buttons = None + def init_gui(self): """Sets up input for hyper parameters and their default values, along with a warning """ @@ -46,7 +48,7 @@ def init_gui(self): style = ttk.Style() style.configure('R.TLabel', foreground='red') - overall_label = ttk.Label(self, text="WARNING THESE ARE DEVELOPER FEATURES, IF YOU DON'T KNOW WHAT THESE DO, " + overall_label = ttk.Label(self, text="WARNING THESE ARE DEVELOPER OPTIONS, IF YOU DON'T KNOW WHAT THESE DO, " "YOU MAY NOT WANT TO EDIT THESE!", style='R.TLabel') overall_label.grid(row=0, column=0) @@ -75,6 +77,20 @@ def init_gui(self): confirm_button = ttk.Button(self, text='Confirm!', command=self.params_config) confirm_button.grid() + self.options_buttons = { + 'Changed Questions': tk.IntVar(), + 'Average Question Time': tk.IntVar(), + 'User Scores': tk.IntVar(), + 'Time Taken': tk.IntVar(), + 'Page Leaves': tk.IntVar(), + 'Difficulty Index': tk.IntVar() + } + + check_list = [ttk.Checkbutton(self, text=text, var=var) for text, var in self.options_buttons.items()] + + for col_i, buttons in enumerate(check_list): + buttons.grid(row=col_i + 1, column=2, columnspan=2, sticky='w') + def params_config(self): """Sets up parameters to pass to the AutoEncoder This function finds all the values that can be converted to float (except epochs that has to be converted to @@ -123,7 +139,11 @@ def params_config(self): else: kwargs[index] = float(values) - self.controller.autoencoder_frame(**kwargs) + options = [] + for index in self.options_buttons.keys(): + if self.options_buttons[index].get(): + options.append(index) + self.controller.autoencoder_frame(options, **kwargs) class AutoEncoderSettingsMenu(tk.Frame): @@ -139,29 +159,48 @@ def __init__(self, parent, controller): self.data_set = None self.pre_flags = None self.labelvar = None + self.start_button = None + self.view_data_button = None + self.data_window = None self.params = None + self.data_options = None self.built_data_set = False + self.previous_options = None + self.previous_course = None def build_data_set(self): """Builds data set Sets up the pre_flags which are cheaters pre separated based on unreasonable values in page_leaves """ + self.previous_course = self.controller.cur_quiz.class_id self.quiz = self.controller.cur_quiz - self.quiz_constructor = constructors.QuizEvents(self.quiz, anon=False, pre_flags=True) + self.quiz_constructor = constructors.QuizEvents(self.quiz, self.data_options, False, True, self) data_sets = self.quiz_constructor.build_dataframe() self.pre_flags = data_sets[0] self.data_set = data_sets[1] + self.labelvar.set("Data Set Built.") self.built_data_set = True - def init_gui(self, **kwargs): + def init_gui(self, data_options, **kwargs): """Initializes GUI for Tensorflow start menu :param kwargs: Arguments passed to override the default values of the AutoEncoder. These come from the DevSettingsMenu """ + self.previous_options = self.data_options + + if len(data_options): + self.data_options = data_options + else: + if self.controller.separation_type == 'No Exceptions': + self.data_options = ['Page Leaves'] + else: + self.data_options = ['Average Question Time', 'Time Taken', 'Page Leaves'] self.params = kwargs + self.built_data_set = 1 if self.previous_options == self.data_options else 0 + # Resets the menu for widget in self.winfo_children(): widget.pack_forget() @@ -169,26 +208,68 @@ def init_gui(self, **kwargs): self.labelvar = tk.StringVar(self) label = ttk.Label(self, textvar=self.labelvar) - label.grid(sticky='nsew') + label.grid(sticky='nsew', columnspan=10) - # TODO: Settings for data set - if not self.built_data_set: + # Checks if the data set is built or if the quiz has changed since the last build + if not self.built_data_set or self.previous_course != self.controller.cur_quiz.class_id: self.labelvar.set("Building Data Set!") self.controller.update() self.build_data_set() + else: self.labelvar.set("Data Set Built!") - print(self.data_set) - print(self.pre_flags) # Opens dev settings menu if Ctrl + F12 is pressed self.controller.bind('', lambda _: self.controller.change_frame('DevSettingsMenu')) - self.start_button = ttk.Button(self, text="Start Separation Process!", command=self.start_autoencoder) + self.start_button = ttk.Button(self, text="Start Separation Process!", command=self.start_separator) self.start_button.grid(sticky='nsew') + def start_separator(self): + """Simple function to check for what type of separation the user wants to do + """ + if self.controller.separation_type == 'Auto Encoder': + self.start_autoencoder() + + # Sets up the view data button + self.view_data_button = ttk.Button(self, text="View Data!", command=self.open_data_window) + self.view_data_button.grid(sticky='nsew') + + elif self.controller.separation_type == 'Basic Anomaly': + self.start_basic_anomaly() + + # Sets up the view data button + self.view_data_button = ttk.Button(self, text="View Data!", command=self.open_data_window) + self.view_data_button.grid(sticky='nsew') + + elif self.controller.separation_type == 'No Exceptions': + self.start_no_exception() + + def open_data_window(self): + """Displays the current data set in a new window, with data summaries""" + self.view_data_button.grid_forget() + self.view_data_button.destroy() + + self.data_window = tk.Toplevel() + c = 0 + for cols in self.data_set.columns.values: + c += 1 + col_label = ttk.Label(self.data_window, text="{} ".format(cols)) + col_label.grid(row=0, column=c) + + i, j = 0, 0 + for index in self.data_set.index.values: + i += 1 + j = 0 + cur_index_label = ttk.Label(self.data_window, text="{}".format(index)) + cur_index_label.grid(row=i, column=0) + for values in self.data_set.loc[index]: + j += 1 + cur_value_label = ttk.Label(self.data_window, text='{}'.format(values)) + cur_value_label.grid(row=i, column=j) + def start_autoencoder(self): """Starts the process of separating the data through a autoencoder. @@ -198,7 +279,6 @@ def start_autoencoder(self): as cheaters. They are found by comparing the index of the original data_set that is fed into the autoencoder and the index of the pre_flag. If a item is in pre_flag but not in the original data_set, then it is labeled as a positive for cheating. - """ self.built_data_set = False @@ -221,11 +301,6 @@ def start_autoencoder(self): # otherwise, use the default function output = jack_walsh.separate(labelvar=self.labelvar, controller=self.controller) - # Reset the temp_back_button - if temp_back_button is not None: - temp_back_button.pack_forget() - temp_back_button.destroy() - # Starts the clustering algorithm (won't start until the separation finishes) self.labelvar.set("Starting Clustering Algorithm!") self.controller.update() @@ -234,12 +309,15 @@ def start_autoencoder(self): # outputs the cheaters self.labelvar.set("DO NOT TAKE AT FACE VALUE") - self.display_outputs(results) + self.display_autoencoder_outputs(results) - def display_outputs(self, results): # Gets the index values of specific classes + def display_autoencoder_outputs(self, results): """Sets up the display with outputs from the KMeansSeparator function :param results: return value of KMeansSeparator.classify() function """ + + temp_back_button = None + # Gets a series where the class is equal to 1 or 0 class0 = results[results['Class'] == 0] class1 = results[results['Class'] == 1] @@ -285,17 +363,16 @@ def display_outputs(self, results): # Gets the index values of specific classes list_of_labels = [] # Builds labels for the participants, if they don't appear in the results index, they are labeled as # non-anomalous students, therefore, they are classified as non-cheaters. - print(results) for items in iterable_index: if items not in results.index.values or str(results.loc[items, 'Cheat']) == 'nan': - list_of_labels.append(tk.Label(self, text=items + '\n' + u'❌')) + list_of_labels.append(tk.Label(self, text='{}\n{}'.format(items, u'❌'))) else: if str(results.loc[items, 'Opposite Distance']) == 'nan': list_of_labels.append(tk.Label(self, text="{}\n{}".format( items, results.loc[items, 'Cheat']))) else: - list_of_labels.append(tk.Label(self, text="{}\n{}\n{}".format( + list_of_labels.append(tk.Label(self, text="{}\n{}\n{}" .format( items, results.loc[items, 'Cheat'], "OD: {}\nADL {}".format( round(results.loc[items, 'Opposite Distance'], 2), @@ -304,13 +381,74 @@ def display_outputs(self, results): # Gets the index values of specific classes ))) # Grids labels - for items in list_of_labels: - items.grid() + i = -1 + row_index = 0 + for labels in list_of_labels: + i += 1 + + if i % 5 == 0: + row_index += 1 + i = 0 + + labels.config(font=("TkDefaultFont", 9)) + labels.grid(row=row_index + 2, column=i, sticky='n', padx=10, pady=5) + + # Reset the temp_back_button + if temp_back_button is not None: + temp_back_button.pack_forget() + temp_back_button.destroy() # Sets a back button to return and select another quiz temp_back_button = ttk.Button(self, text="Select Another Quiz", command=lambda: self.controller.change_frame('MainMenu')) - temp_back_button.grid() + temp_back_button.grid(row=0, column=4) + + def start_basic_anomaly(self): + self.built_data_set = False + jack_walsh = predictors.OneClassSVMSeperator(self.data_set) + jack_walsh.run() + + def start_no_exception(self): + # Sets a default value for the temp_back_button + temp_back_button = None + + self.built_data_set = False + + self.labelvar.set("{} Indicates a cheat, {} Indicates a not cheater".format(u'✓', u'❌')) + + # Resets the start button if it already exists (Returning from DevSettings Menu or by Selecting Another Quiz) + self.start_button.grid_forget() + self.start_button.destroy() + + label_list = [] + jack_walsh = self.data_set.loc[self.data_set['page_leaves'] >= 1] + self.data_set.drop(jack_walsh.index.values, inplace=True) + for items in self.data_set.index.values: + label_list.append(ttk.Label(self, text="{}\n{}".format(items, u'❌'))) + for items in jack_walsh.index.values: + label_list.append(ttk.Label(self, text="{}\n{}\nPage Leaves: {}".format(items, u'✓', + jack_walsh.loc[items, 'page_leaves']))) + + # Grids labels + i = -1 + row_index = 0 + for labels in label_list: + i += 1 + if i % 5 == 0: + row_index += 1 + i = 0 + + labels.config(font=("TkDefaultFont", 9)) + labels.grid(row=row_index, column=i, sticky='w', padx=10, pady=5) + + # Reset the temp_back_button + if temp_back_button is not None: + temp_back_button.pack_forget() + temp_back_button.destroy() + + temp_back_button = ttk.Button(self, text="Select Another Quiz", + command=lambda: self.controller.change_frame('MainMenu')) + temp_back_button.grid(row=0, column=4) diff --git a/ModelTraining/predictors.py b/ModelTraining/predictors.py index 19a1960..0d54da6 100644 --- a/ModelTraining/predictors.py +++ b/ModelTraining/predictors.py @@ -4,6 +4,7 @@ import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans +from sklearn.svm import OneClassSVM from datetime import datetime os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' @@ -25,9 +26,9 @@ # For easy access from GUI modules AUTOENCODER_DEFAULTS = dict( learning_rate=.08, - layer_1_f=10, - layer_2_f=5, - layer_3_f=2, + layer_1_f=10, # 10 + layer_2_f=5, # 5 + layer_3_f=2, # 2 epochs=500000, test_thresh=.1, test=True, @@ -210,7 +211,6 @@ def train(x, log=False): return anomaly_list -# noinspection PyUnresolvedReferences class KMeansSeparator: """Main class representing a implementation of SKLearn's KMeans algorithm """ @@ -250,6 +250,7 @@ def classify(self, n_init=1000, clusters=2): classifier.fit_transform(data_dict) indexs = data_dict.index.values + op_distance = [] for index in range(0, len(data_dict)): # TODO: Replace Euclidean distance in case of better metrics cur_index = indexs[index] @@ -269,6 +270,8 @@ def classify(self, n_init=1000, clusters=2): distances.append(distance) cur_distance = sum(distances) ** (1/2) + op_distance.append(cur_distance) + results_dict.at[cur_index, 'Opposite Distance'] = cur_distance cur_class = 1 if predict else 0 @@ -281,7 +284,50 @@ def classify(self, n_init=1000, clusters=2): distances.append(distance) cur_distance = sum(distances) ** (1/2) - # noinspection PyUnresolvedReferences results_dict.at[cur_index, 'Assigned Distance'] = cur_distance + if sum(op_distance) <= 1: + results_dict['Class'] = 0 + return results_dict + + +class OneClassSVMSeperator(OneClassSVM): + """Class representing a implementation of SKLearn's one class support vector machine + """ + def __init__(self, data_set, **kwargs): + """Initializes the object with a scaled data set and fit_transform functions + :param data_set: The data set to find anomalies from + :param kwargs: Normal arguments from sklearn's OneClassSVM + *see http://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html + """ + OneClassSVM.__init__(self, **kwargs) + self.original_data_set = data_set + self.scaler = StandardScaler().fit(self.original_data_set) + self.scaled_data_set = self.scaler.transform(self.original_data_set) + + self.initial_anomaly_labels = None + self.anomaly_user_list = [] + + def run(self): + """Starts the separation process""" + self.fit(self.scaled_data_set) + self._initial_separation() + self._secondary_separation() + + def _initial_separation(self): + """Initially finds the anomalies""" + self.initial_anomaly_labels = self.predict(self.scaled_data_set) + for index, labels in enumerate(self.initial_anomaly_labels): + if labels == -1: + self.anomaly_user_list.append(self.original_data_set.index.values[index]) + + def _secondary_separation(self): + org_anomaly_data_set = self.original_data_set.loc[self.anomaly_user_list] + print(org_anomaly_data_set.index.values) + scaled_anomaly_data_set = self.scaler.transform(org_anomaly_data_set) + self.fit(scaled_anomaly_data_set) + anomaly_predictions = self.predict(scaled_anomaly_data_set) + for index, pred in enumerate(anomaly_predictions): + if pred == 1: + print(org_anomaly_data_set.index.values[index]) diff --git a/README.md b/README.md index 87e18a0..e28ad22 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- +

Product

@@ -123,13 +123,6 @@ ID as the index or the user name. The data will contain the original testing dat sci-kit learn standard scalar applied to it and the predicted cluster (by default there will either be 1 or 0). -

Notes

- -The inclusion of the ability in the GUI to turn off SSL Certificate checks with the requests module is done -so that invalid configurations of popular network security software, such as Zscaler, can still -use this software. This is to be done at the user’s own risk. This is a temporary solution -while a more secure one is being developed. For more information about security -risks and the vulnerability to "man-in-the-middle" attacks when using this mode, see [here](http://docs.python-requests.org/en/master/user/advanced/)

Related Works

@@ -139,6 +132,3 @@ risks and the vulnerability to "man-in-the-middle" attacks when using this mode,

License

MIT License, See [License.md](https://github.com/ByrdOfAFeather/NARC/blob/master/LICENSE) for details - - -