-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
70 lines (63 loc) · 1.98 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# -*- coding: utf-8 -*-
"""Config file to specify setup variables.
This file contains setup variables the models and features rely on.
"""
import os
DATASETS = {
"dataset_2014": {
"base_path": os.path.join("data", "IBM_Debater_(R)_CE-ACL-2014.v0"),
"claim_file": "2014_7_18_ibm_CDCdata.xls",
"articles_file": "2014_7_18_ibm_CDCdata.xls",
"name": "IBM_Debater_(R)_CE-ACL-2014.v0",
"data": "CE-ACL_processed.csv",
},
"dataset_2018": {
"base_path": os.path.join("data", "IBM_Debater_(R)_claim_sentences_search"),
"name": "IBM_Debater_(R)_claim_sentences_search",
"data": "claim_sentence_search.csv",
},
"dataset_2014_de": {
"base_path": "data",
"name": "IBM_Debater_(R)_CE-ACL-2014.v0_translated",
"data": "CE-ACL_processed_de_g.csv",
},
"dataset_2018_de": {
"base_path": "data",
"name": "IBM_Debater_(R)_claim_sentences_search_translated",
"data": "claim_sentence_search_de_g.csv",
},
"SMC_2000": {
"base_path": "data",
"name": "SMC_2000",
"data": "SMC_2000.csv",
},
"SMC_1000": {
"base_path": "data",
"name": "SMC_1000",
"data": "SMC_1000.csv",
},
"SMC_Full": {
"base_path": "data",
"name": "SMC_Full",
"data": "SMC_Full.csv",
},
"dataset_1418": {
"base_path": "data",
"name": "dataset_1418",
"data": "dataset_1418.csv",
},
}
# NLTK
NLTK_DATA_PATH = os.path.join("data", "nltk_data")
# Spacy
SPACY_MODEL_NAME = "en_core_web_sm"
SPACY_DATA_PATH = os.path.join("data", "spacy_data", SPACY_MODEL_NAME)
# Pyserini
PYSERINI_PATH = os.path.join("data", "pyserini")
INDEX_PATH = os.path.join(PYSERINI_PATH, "index")
CLAIM_LEXICON_PATH = os.path.join("data", "claim_lexicon.txt")
# FastText
FASTTEXT_PATH = os.path.join("data", "fasttext")
FASTTEXT_BIN_MODEL_PATH = os.path.join("data", "fasttext", "ce.bin")
# WandB
PROJECT_NAME = "Claim detection models"