-
Notifications
You must be signed in to change notification settings - Fork 1
/
TXT_find-keywords-single-file.py
28 lines (18 loc) · 1.1 KB
/
TXT_find-keywords-single-file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import csv
IN_FILE='C:\\Users\\mobarget\\Documents\\Zedlers_Reallexikon_IMG&OCR_Einleitung und Inhalt\\Zedler_bsb10765064.txt' # define input file
searchterms=['Inſul', 'inſul', 'Inſula', 'Inſulae', 'Inſuln', 'Inſulen', 'Inſel', 'Inſeln'] # define searchterms
result_list=[] # create result list
with open(IN_FILE, 'r', encoding='utf-8') as f: # read file
data = f.read()
lines = data.split("\n") # split the file into individual lines
print(len(lines)) # print overall number of lines in TXT file
for line in lines: # look for searchterms in all lines
for i in searchterms:
if i in line:
result_list.append(line) # add lines with matches to result list
else: continue
print(len(result_list)) # print number of matching lines
with open('C:\\Users\\mobarget\\Documents\\Zedlers_Reallexikon_IMG&OCR_Einleitung und Inhalt\\ResultTable.csv','w', encoding='utf-8') as g:
writer = csv.writer(g) # create new CSV table for results
writer.writerows(result_list) # write each result into a new line
g.close() # close new CSV file