-
Notifications
You must be signed in to change notification settings - Fork 5
/
sensitive_filter.py
83 lines (70 loc) · 2.52 KB
/
sensitive_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# vim: ts=4:sw=4:sts=4:et
# -*- coding:utf-8 -*-
from .sensitive_tree import SensitiveTree
class SensitiveFilter(object):
def __init__(self, sensitive_tree=None, excludes=[]):
if not sensitive_tree:
st = SensitiveTree()
sensitive_tree = st.fetch_sensitive_tree()
self.sensitive_tree = sensitive_tree
self.excludes = excludes
def fetch_node(self, node, keys_queue):
if not keys_queue:
return node
key = keys_queue[0]
child_node = node.get(key)
return self.fetch_node(child_node, keys_queue[1:])
def sensitive_words_count(self, txt):
txt = self.clear_words(txt)
match_count = 0
keys_queue = []
for word in txt:
keys_queue.append(word)
match = self.fetch_node(self.sensitive_tree, keys_queue)
if not match:
keys_queue = []
continue
if match.get("is_end"):
keys_queue = []
match_count += 1
return match_count
def find_sensitive_words(self, txt):
txt = self.clear_words(txt)
keys_queue = []
for word in txt:
keys_queue.append(word)
match = self.fetch_node(self.sensitive_tree, keys_queue)
if not match:
keys_queue = []
continue
if match.get("is_end"):
return True
return False
def replace_sensitive_words(self, txt, replace="*"):
if not type(replace) is str or replace == "":
raise Exception("value error: the param replace " /
"only support string type and not blank")
txt = self.clear_words(txt)
keys_queue = []
replace_list = []
cache_value = ""
for word in txt:
keys_queue.append(word)
match = self.fetch_node(self.sensitive_tree, keys_queue)
if not match:
keys_queue = []
cache_value = ""
match_tree = self.sensitive_tree.copy()
continue
cache_value += word
if match.get("is_end"):
keys_queue = []
replace_list.append(cache_value)
cache_value = ""
for sensitive_word in replace_list:
txt = txt.replace(sensitive_word, replace*len(sensitive_word))
return txt
def clear_words(self, txt):
for letter in self.excludes:
txt = txt.replace(letter, "")
return txt