-
Notifications
You must be signed in to change notification settings - Fork 0
/
GeneratePolyglot.py
executable file
·130 lines (107 loc) · 5.17 KB
/
GeneratePolyglot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
"""
Create a polyglot LaTeX document from bible passage txt files.
Get Bible passages from txt files (e.g. created by BibleScraper) and produce a
polyglot document for processing by LaTeX.
"""
# Author: Andy Holt
# Date: Tue 22 Dec 2015 13:54
import re
import datetime
import os
import codecs
import jinja2
import click
@click.command()
@click.option('-p', '--passage', default='Genesis 1:1',
help='Bible passage to use.')
@click.option('-v', '--version', default='ESVUK', help='Translation(s) to use.')
@click.option('--verbose', is_flag=True, help='Output status information.')
def generate_polyglot_cli(passage, version, verbose):
"""Generate a polyglot LaTeX file of PASSAGE in VERSIONS.
Requires files of correct name with content to be placed in the
directory. These should be generated using BibleScraper.py.
generate_polyglot_cli provides a command line interface to generate_polyglot"""
generate_polyglot(passage, version, verbose)
def generate_polyglot(passage, version, verbose):
"""Generate a polyglot LaTeX file of PASSAGE in VERSIONS.
Requires files of correct name with content to be placed in the
directory. These should be generated using BibleScraper.py.
"""
if verbose:
print('Constructing polyglot...')
# print 'Constructing polyglot...'
version_list = re.split(r';', version)
passage_name = re.sub(r'([1-3]*\s*[A-Za-z]{1,3})[A-Za-z]*\s*',
r'\1',
passage, flags=re.UNICODE)
passage_name = re.sub(r':', r'_', passage_name, flags=re.UNICODE)
passage_name = re.sub(r'\s', r'', passage_name, flags=re.UNICODE)
# get metadata based on inputs
meta = {'filename': passage_name + 'Polyglot.tex',
'passagename': passage,
'datestamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'no_of_translations': len(version_list),
'translations': version_list}
input_files = [passage_name + v + '.txt' for v in version_list]
# get alignments from file
alignments_file_name = passage_name + "-".join(version_list) + '.aln'
with open(alignments_file_name, 'r') as f:
alignments = f.readlines()
# strip '\n' from alignment strings
for index, aln in enumerate(alignments):
alignments[index] = re.sub(r'\n', '', aln)
# get the texts from the input files
# [todo] - first check the files exist?
trans_split_regexp = '|'.join(['(\\\\vn\{' + r + '\})' for r in alignments])
# get text from input files
translations = []
for index, file in enumerate(input_files):
with codecs.open(file, mode='r', encoding='utf-8') as f:
# split translation into aligned paragraphs
a_translation = re.split(trans_split_regexp,
f.read())
# remove None values caused by regexp
# [todo] - find way of not creating Nones in regexp split?
a_translation = filter(None, a_translation)
# if any new-paragraph-verses are also starts of verses, re-attach
# '\begin{verse}' to the correct verse
for index, substr in enumerate(a_translation):
if re.search(r'\\begin\{verse\}\n$', substr):
a_translation[index] = re.sub(r'\\begin\{verse\}\n$',
'', substr)
a_translation[index+1] = ('\\begin{verse}\n'
+ a_translation[index+1])
# reattach verse numbers to their paragraphs (split off by regexp
# split)
# print(a_translation)
if len(a_translation) > 2:
for vn in range(len(a_translation) / 2):
# print(vn)
a_translation[vn+1] = (a_translation[vn+1]
+ a_translation[vn+2])
a_translation.pop(vn+2)
translations.append(a_translation)
aligned_paragraphs = zip(*translations)
# create LaTeX document
polyglot_renderer = jinja2.Environment(block_start_string = '%{',
block_end_string = '%}',
variable_start_string = '%{{',
variable_end_string = '%}}',
trim_blocks = True,
lstrip_blocks = True,
keep_trailing_newline = True,
loader = jinja2.FileSystemLoader('/Users/adh/Projects/Erasmus'))
template = polyglot_renderer.get_template('polyglot_template.tex')
if verbose:
print('Writing to ' + meta['filename'] + '...')
# print 'Writing to ' + meta['filename'] + '...'
with codecs.open(meta['filename'], mode='w', encoding='utf-8') as f:
f.write(template.render(meta=meta,
aligned_paragraphs = aligned_paragraphs))
if verbose:
print('Done.')
# print 'Done.'
if __name__ == '__main__':
generate_polyglot_cli()