-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
155 lines (121 loc) · 4.48 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os, sys, logging
from library.lib import parse_args, get_filenames_and_count_of_documents, \
WORD_TYPES, DynamicFields
from library.annotations import convert_to_objects
from library.lib import statistic_of_corpus
from library.lib import base_line_model
import numpy as np
import os
logging.basicConfig(filename="app.log",
level=logging.INFO,
format ='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('main.py')
class App(DynamicFields):
document_count = 0
#Default properies
text_encoding = "utf-8"
word_type = WORD_TYPES[0]
fetures=False
laplace=False
#Path of project
BASE_PATH = os.path.dirname(__file__)
# unknown_word_freq = 0.5
def __init__(self, *args, **kwargs):
kwargs = args[0].__dict__
super(App, self).__init__(*args, **kwargs)
def first(self, ):
self.document_count, self.d_paths, self.a_paths = get_filenames_and_count_of_documents(self.src_train_texts)
self.documents = convert_to_objects(self.a_paths, self.src_train_texts, self.text_encoding, self.train_size)
statistic_of_corpus(self)
'''
The data param should be zipped from 2 lists of entyties
Example:
data = zip(('ledocaine', 'word1'),('anesthesia', 'word2'))
ent0[0] = ledocaine; ent0[1] = anesthesia
ent1[0] = word1; ent1[1] = word2
Save current pipeline
'''
def second(self, data):
self.pipeline = base_line_model(self, data)
def third(self,):
'''
Return self -> for choose required type
'''
return self
def relation_in_one_sentence(self, ):
'''
Features:
Relation in sentence = [
CPOS(part of speech in relation),
WVNULL(when no verb in between),
WVFL(when only verb in between),
WBNULL(no words in between)],
WBFL(when only one word in between),
]
'''
#A
self.pipeline.ref_in_one_cpos()
self.pipeline.ref_in_one_wvnull()
self.pipeline.ref_in_one_wvfl()
self.pipeline.ref_in_one_wbnull()
self.pipeline.ref_in_one_wbfl()
#B
if self.language != 'rus':
self.pipeline.init_stanford_dependency_searching()
try:
self.pipeline.ref_in_one_dpr2c()
self.pipeline.ref_in_one_dpr2d()
except Exception as e:
print(e)
finally:
self.pipeline.dependency_core.close()
def relation_in_different_sentence(self, ):
self.pipeline.ref_in_diff_sdist()
self.pipeline.entity_freq_in_doc()
self.pipeline.whether_type_of_entity_is_unique_in_doc()
def __getattr__(self, attr):
try:
return super(App, self).__getattr__()
except AttributeError as er:
return None
a_paths = ()
@property
def annotation_paths(self,):
return self.a_paths
d_paths = ()
@property
def document_paths(self,):
return self.d_paths
def get_references_from_documents(self, ):
initial = list()
map(lambda doc: initial.extend(doc.references), self.documents)
return initial
'''
Setter for in || out references
'''
def set_refs_in_out(self, ref_in, ref_out):
self.all_references = self.get_references_from_documents()
if __name__ == '__main__':
args = parse_args()
app = App(args)
#----------------------------------------------------------------------------
logger.info("First task started : Find relations")
app.first()
#----------------------------------------------------------------------------
logger.info("Second task started : Baseline model")
'''
Baseline test_data
'''
ent2 = ('anesthesia', )
ent1 = ('ledocaine',)
app.second(zip(ent1, ent2))
#----------------------------------------------------------------------------
logger.info("Third task started : Add extra features for data")
#---------------------------A-------------------------------------------
logger.info("Third task started : Extra features for relation in one sentence")
#AB
app.third().relation_in_one_sentence()
#CD
app.third().relation_in_different_sentence()
#-----------------------------------------------------------------------
#----------------------------------------------------------------------------