-
Notifications
You must be signed in to change notification settings - Fork 2
/
render_words_on_memrise.py
496 lines (482 loc) · 19.7 KB
/
render_words_on_memrise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
"""This module provides a script with a GUI that can access a course edit page
on Memrise (memrise.com) through a browser, render the words of the course's
levels (using either Pango or XeLaTeX), and add those rendered images to the
words in a (pre-existing) image column.
"""
import os
import time
# interaction with browser
from selenium import webdriver
# parsing HTML
from bs4 import BeautifulSoup
# running commands
import subprocess
# GUI elements
import tkinter as tk
from tkinter import ttk as ttk, font as tkf
def debug(s, indent=0):
"""Debugging log function."""
if __debug__:
print((' ' * indent) + s)
class PngTextRenderer:
"""Interface for a class that can render text to a PNG image file."""
def __init__(self):
"""Constructor."""
raise NotImplementedError('Abstract class PngTextRenderer')
#
def render_text(self, filename, text, font='Arial'):
"""Render the text with the given font to the given image filename."""
raise NotImplementedError('Abstract class PngTextRenderer')
class XelatexImageMagickPngTextRenderer(PngTextRenderer):
"""Class to render text with (Xe)LaTeX and convert to PNG
with ImageMagick.
"""
#
def __init__(self, hebrew_rtl=False):
"""Constructor."""
# set up the template for the Pango string
if hebrew_rtl:
self._xelatex_format = r'''
%!TEX TS-program = xelatex
%!TEX encoding = UTF-8 Unicode
\documentclass[border=10mm]{{standalone}}
\nofiles
\usepackage{{polyglossia}}
\setmainlanguage{{hebrew}}
\usepackage{{fontspec}}
\defaultfontfeatures{{Mapping=tex-text,Scale=MatchLowercase}}
\setmainfont{{{1}}}
\begin{{document}}
\fontsize{{30pt}}{{30pt}}
\selectfont
{0}
\end{{document}}
'''[1:].replace(' ', '')
else:
self._xelatex_format = r'''
%!TEX TS-program = xelatex
%!TEX encoding = UTF-8 Unicode
\documentclass[border=10mm]{{standalone}}
\nofiles
\usepackage{{fixltx2e}}
\usepackage{{fontspec}}
\usepackage{{xunicode,xltxtra}}
\defaultfontfeatures{{Mapping=tex-text,Scale=MatchLowercase}}
\setmainfont{{{1}}}
\begin{{document}}
\fontsize{{30pt}}{{30pt}}
\selectfont
{0}
\end{{document}}
'''[1:].replace(' ', '')
# create a translation table for unsafe characters in a XeLaTeX string
self._trans = str.maketrans({
'#': r'\#',
'&': r'\&',
'%': r'\%',
'$': r'\$',
'_': r'\_',
'{': r'\{',
'}': r'\}',
'~': r'\textasciitilde{}',
'^': r'\textasciicircum{}',
'\\': r'\textbackslash{}',
'"': r'\char"22'
})
#
def make_string_xelatex_safe(self, unsafe_string):
"""Escape unsafe characters in a text to go into a XeLaTeX string."""
return unsafe_string.translate(self._trans)
#
def render_text(self, filename, text, font='Arial'):
"""Render the text with the given font to the given image filename."""
debug('<XeLaTeX.render_text>', indent=4)
# set up the XeLaTeX document
xelatex_string = self._xelatex_format.format(
self.make_string_xelatex_safe(text),
font
)
# render the text (to a PDF file) using XeLaTeX
with subprocess.Popen(
['xelatex', '-output-directory={0}'.format(os.getcwd())],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=False
) as xelatex:
xelatex.communicate(input=xelatex_string.encode('utf-8'))
# convert the PDF to PNG using ImageMagick
filename_tex = 'texput.pdf'
filename_texlog = 'texput.log'
subprocess.run([
'magick', '-antialias', '-density', '1200',
filename_tex,
'-trim',
filename
])
# check the existence of the PNG
if os.path.isfile(filename):
debug('file "{}" produced'.format(filename), indent=4)
# cleanup XeLaTeX files
for file in [filename_tex, filename_texlog]:
if os.path.isfile(file):
os.remove(file)
debug('temporary file "{}" removed'.format(file), indent=4)
class ImageMagickPangoPngTextRenderer(PngTextRenderer):
"""Class to render text to an image using ImageMagick with Pango."""
#
def __init__(self):
"""Constructor."""
# set up the template for the Pango string
self._pango_format = (
'pango:<markup>'
'<span font_family="{1}" size="192000"> \n {0} \n </span>'
'</markup>'
)
# create a translation table for unsafe characters in a Pango string
self._trans = str.maketrans({
'&': r'&\;',
'<': r'<\;',
'>': r'>\;',
'"': r'"\;',
'\'': r'&apos\;',
'\\': r'\\;',
'%': r'%\;'
})
#
def make_string_pango_safe(self, unsafe_string):
"""Escape unsafe characters in a text to go into a Pango string."""
return unsafe_string.translate(self._trans)
#
def render_text(self, filename, text, font='Arial'):
"""Render the text with the given font to the given image filename."""
debug('<Pango.render_text>', indent=4)
# set up the Pango string
pango_string = self._pango_format.format(
self.make_string_pango_safe(text),
font
)
# render the Pango string to PNG using ImageMagick
subprocess.run([
'magick', '-background', 'white', '-density', '600',
pango_string,
'-transparent', 'white', '-antialias', '-resize', '25%', '-trim',
filename
])
class MemriseImageAdder:
"""Class to sign in to Memrise using Chrome and render and upload
images for the words in a given column of an open level in a course
edit page."""
#
def __init__(self):
"""Constructor."""
self._driver = None
#
def __enter__(self):
"""Enter runtime context."""
return self
#
def __exit__(self, exc_type, exc_value, traceback):
"""Exit runtime context."""
return True
#
def __del__(self):
""""Destructor."""
# close any currently active webdriver session
self._quit_driver()
#
def _quit_driver(self):
"""Close current webdriver session, if any."""
if self._driver:
self._driver.quit()
self._driver = None
#
def _get_renderer(self, engine='pango'):
"""Instantiate a text to PNG renderer."""
if engine.lower() == 'xelatex':
return XelatexImageMagickPngTextRenderer(hebrew_rtl=True)
else:
return ImageMagickPangoPngTextRenderer()
#
def render_and_upload(self, column, engine, font, skip_existing_images,
hebrew_rtl=False):
"""Render and upload images for the words in the unfolded levels of
the currently opened Memrise course edit page.
"""
debug(
'<MIA.render_and_upload>(column={}, engine="{}", font="{}", '
'skip_existing_images={}, hebrew_rtl={})'.format(
column, engine, font, skip_existing_images, hebrew_rtl
)
)
# make the column zero-indexed
column -= 1
# instantiate renderer
renderer = self._get_renderer(engine)
debug('renderer: {}'.format(renderer))
# parse the currently opened page's HTML source
html = self._driver.page_source
soup = BeautifulSoup(html, 'lxml')
# find all the unfolded class levels and process them
i = 0
for level in soup.select('div[class="level"]'):
i += 1
debug('open lesson number {}'.format(i), indent=1)
# go through each table row (entry)
j = 0
for entry in level.find_all('tr', {'class': 'thing'}):
j += 1
# get the entry's ID
tr_id = entry.get('data-thing-id')
debug('entry number {} with id {}'.format(j, tr_id), indent=2)
# extract the word (first text column)
cells = entry.find_all('td', {'class': 'cell'})
debug('column count is {}'.format(len(cells)), indent=2)
if column < len(cells):
word_cell = cells[column]
word = word_cell.find('div', {'class': 'text'}).string
# check if it has an image
image = entry.select_one('td.cell.image button')
has_image = 'disabled' not in image.get('class')
debug('word = "{}"'.format(word.encode('utf-8')), indent=3)
debug('has_image = "{}"'.format(has_image), indent=3)
# generate and upload image if it doesn't have one
if not (skip_existing_images and has_image):
# find the input field in Chrome
input_field = (
self._driver.find_element_by_css_selector(
('tr[data-thing-id=\'{}\'] '
'td[class~=\'image\'] input').format(tr_id)
)
)
debug('input field = {}'.format(
input_field.get_attribute('outerHTML')),
indent=3
)
# render the image
image_name = 'render{}.png'.format(tr_id)
try:
renderer.render_text(image_name, word, font)
except:
debug('"Failed to generate image.', indent=3)
# if rendered successfully, upload and delete image
if os.path.isfile(image_name):
debug('sending image to input', indent=3)
input_field.send_keys(
os.path.join(os.getcwd(), image_name)
)
time.sleep(1)
debug('removing file "{}"'.format(image_name), indent=3)
os.remove(image_name)
debug('lesson {} done, processed {} entries'.format(i, j), indent=1)
debug('Done, processed {} lessons'.format(i))
#
def start_chrome(self):
"""Start a Chrome session."""
debug('<MIA.start_chrome>')
# close any inadvertently active webdriver session
self._quit_driver()
# start a new session
self._driver = webdriver.Chrome()
self._driver.get('https://www.memrise.com/login/')
#
def start_firefox(self):
"""Start a Firefox session."""
debug('<MIA.start_firefox>')
# close any inadvertently active webdriver session
self._quit_driver()
# start a new session
self._driver = webdriver.Firefox()
self._driver.get('https://www.memrise.com/login/')
class GUI:
_msg_readmefirst = '''
In order to use this script that interacts with a browser, you need
to have either ChromeDriver (for Chrome) or geckodriver (for Firefox)
and have its executable included in your operating system's PATH
variable).
ChromeDriver can be found at http://chromedriver.chromium.org .
geckodriver can be found at https://github.com/mozilla/geckodriver .
Furthermore you need ImageMagick to be installed (and its executables
need to be included in your operating system's PATH variable).
ImageMagick can be found at https://www.imagemagick.org .
Additionally, if you wish to render text using the XeLaTeX engine,
a TeX distribution needs to be installed on your system and the
xelatex command needs to be included in your operating system's PATH
variable).
TeX distributions can be found at
https://www.latex-project.org/get/#tex-distributions .
For conversion from XeLaTeX's PDF output to a PNG image, ImageMagick
also needs Ghostscript to be installed. Ghostscript can be found at
https://www.ghostscript.com .
Make sure the above requirements are met before proceeding to launch
a browser window.
'''.replace(' ', '')[1:-1]
_msg_navigate = '''
Please log in to Memrise in the browser window that was launched.
Then, navigate to the course you wish to add images to.
Go to the course's edit page and unfold the levels that you wish to be
processed.
You may then press the continue button.
(In the next step, you will be able to specify which column's text you
want to render, which rendering engine to use (Pango or XeLaTeX),
which font you want to use, and if you wish to skip items that already
have an image.
Clicking the Start button will then prompt the script to process all
unfolded levels.)
'''.replace(' ', '')[1:-1]
#
def _show_warning_dialog(self, master, title, text, ok_object_function):
"""Make a modal dialog window that displays a warning
on prerequisites.
"""
p = 20
cancel_action = self.close_main_window
# create a new window
self._dialog = tk.Toplevel(master)
dlg = self._dialog
dlg.title(title)
# add warning text
tk.Message(dlg, text=text, borderwidth=p).pack()
# add buttons
frame_buttons = tk.Frame(dlg)
frame_buttons.pack()
ok_object_function(frame_buttons).pack(side=tk.LEFT, padx=p, pady=p//2)
tk.Button(
frame_buttons, text='Cancel', command=cancel_action
).pack(side=tk.RIGHT, padx=p, pady=p//2)
# only one window in the task bar
dlg.transient(self._tk)
# make sure a close is treated as a cancel
dlg.protocol('WM_DELETE_WINDOW', cancel_action)
# modal behaviour (keep focus)
dlg.grab_set()
master.wait_window(dlg)
#
def _build_main_window(self, master):
"""Build the main application window."""
p = 10
# set window title
master.title('Memrise image renderer')
# use a frame for the comboboxes
frame_options = tk.Frame(master)
frame_options.pack()
# add spinbox to select the text source column
tk.Label(
frame_options, text='Text column'
).grid(row=1, column=1, padx=p, pady=p)
self._spinbox_column = tk.Spinbox(frame_options, from_=1, to=99)
self._spinbox_column.grid(row=1, column=2, padx=p, pady=p)
# add combobox to choose Pango/XeLaTeX engine
tk.Label(
frame_options, text='Rendering engine'
).grid(row=2, column=1, padx=p, pady=p)
engines = ['Pango', 'XeLaTeX']
self._combobox_engine = ttk.Combobox(
frame_options, values=engines, state='readonly'
)
self._combobox_engine.grid(row=2, column=2, padx=p, pady=p)
self._combobox_engine.set('Pango')
# add combobox to choose an available font
tk.Label(
frame_options, text='Select font'
).grid(row=3, column=1, padx=p, pady=p)
fonts = sorted((x for x in tkf.families() if not x.startswith('@')))
self._combobox_font = ttk.Combobox(
frame_options, values=fonts, state='readonly'
)
self._combobox_font.grid(row=3, column=2, padx=p, pady=p)
self._combobox_font.set('Arial' if 'Arial' in fonts else fonts[0])
# add checkboxes for additional options
# - skip items that already have an image
self._checkbox_skip_images_status = tk.IntVar()
self._checkbox_skip_images = tk.Checkbutton(
frame_options, text='Skip items that already have an image',
variable=self._checkbox_skip_images_status,
)
self._checkbox_skip_images.grid(
row=4, column=1, columnspan=2, padx=p, pady=p
)
self._checkbox_skip_images.select()
# - Hebrew right-to-left
self._checkbox_hebrew_rtl_status = tk.IntVar()
self._checkbox_hebrew_rtl = tk.Checkbutton(
frame_options, text='Hebrew right-to-left',
variable=self._checkbox_hebrew_rtl_status
)
self._checkbox_hebrew_rtl.grid(
row=5, column=1, columnspan=2, padx=p, pady=p // 2
)
# add execution button
tk.Button(
master, text='Start', command=self._button_start_click
).pack(padx=p, pady=p)
#
def __init__(self, memrise_image_adder):
"""Build a Tk window with a combobox to select column/engine/font
and a button to execute.
"""
self._memrise_image_adder = memrise_image_adder
self._tk = tk.Tk()
self._build_main_window(self._tk)
self._show_warning_dialog(
self._tk, 'Step 1: read me first!',
self._msg_readmefirst, self._make_browser_buttons_frame
)
if self._tk:
self._show_warning_dialog(
self._tk, 'Step 2: navigate to course',
self._msg_navigate, lambda x: tk.Button(
x, text='Continue', command=self._btn_ok2_click
)
)
if self._tk:
self._tk.mainloop()
#
def close_main_window(self):
"""Close the GUI's main window, ending the main loop."""
self._tk.destroy()
#
def _make_browser_buttons_frame(self, master):
"""Make a frame with buttons for launching browsers."""
frame_browsers = tk.Frame(master)
tk.Button(
frame_browsers, text='Chrome', command=self._btn_chrome_click
).pack()
tk.Button(
frame_browsers, text='Firefox', command=self._btn_firefox_click
).pack(pady=10)
return frame_browsers
#
def _btn_chrome_click(self):
"""Handle a click on the OK button in the (1st) warning dialog."""
self._memrise_image_adder.start_chrome()
self._dialog.destroy()
#
def _btn_firefox_click(self):
"""Handle a click on the OK button in the (1st) warning dialog."""
self._memrise_image_adder.start_firefox()
self._dialog.destroy()
#
def _btn_ok2_click(self):
"""Handle a click on the OK button in the (2nd) warning dialog."""
self._dialog.destroy()
#
def _button_start_click(self):
"""Handle a click on the Start button."""
# gather the chosen settings
column = int(self._spinbox_column.get())
engine = self._combobox_engine.get()
font = self._combobox_font.get()
skip_existing_images = self._checkbox_skip_images_status.get() != 0
hebrew_rtl = self._checkbox_hebrew_rtl_status.get() != 0
# get to rendering and uploading
self._memrise_image_adder.render_and_upload(
column, engine, font, skip_existing_images,
hebrew_rtl=hebrew_rtl
)
def main():
"""Main method."""
with MemriseImageAdder() as mia:
GUI(mia)
if __name__ == '__main__':
main()