diff --git a/README.md b/README.md index b76d594a..798f67cf 100644 --- a/README.md +++ b/README.md @@ -37,15 +37,29 @@ and accompanying [blog post](https://massimmersionapproach.com/table-of-contents See the [MorphMan wiki](https://github.com/kaegi/MorphMan/wiki) for more information. # Development +## Linux - Set up local environment: - - The best is to use a Python virtual environment and install prebuilt Anki wheels: ``` - python -m virtualenv pyenv - source pyenv/bin/activate - python -m pip install aqt==2.1.54 anki==2.1.54 pyqt6-webengine pylint - export PYTHONPATH=./ + python -m virtualenv venv + source venv/bin/activate + python -m pip install aqt[qt6] anki pylint mypy types-setuptools ``` -- Run tests: `python test.py` -- Build Qt Developer UI with `python scripts/build_ui.py` - Install git commit hook to run tests and pylint ` scripts/setup_dev.sh` +- Run tests: `python test.py` +- If ui files have been changed: + - Build Qt Developer UI with `python scripts/build_ui.py` + +## Windows: +- Set up local environment: + ``` + python -m virtualenv venv + .\venv\Scripts\activate + python -m pip install aqt[qt6] anki pylint mypy types-setuptools + ``` +- Run type checking: mypy filename.py +- Run lint checking: pylint filename.py +- Run tests: `python test.py` +- If ui files have been changed: + - Build Qt Developer UI with `python scripts/build_ui.py` + diff --git a/__init__.py b/__init__.py index e43fb62d..a793c806 100644 --- a/__init__.py +++ b/__init__.py @@ -1,84 +1,40 @@ -from .morph.util import * from PyQt6.QtWidgets import * -import anki.stats -from anki.hooks import wrap +from aqt.reviewer import Reviewer +from aqt.utils import tooltip +from aqt import gui_hooks +# TODO: importlib is seemingly used to patch over and disguise veeeeery bad bugs... remove its usages and fix the bugs import importlib -try: - from anki.lang import _ -except: - pass - - -def onMorphManRecalc(): - from .morph import main - importlib.reload(main) - main.main() - - -def onMorphManManager(): - mw.toolbar.draw() - from .morph import manager - importlib.reload(manager) - manager.main() +import anki.stats +from anki import hooks +from anki.collection import Collection +from anki.lang import _ # TODO: deprecated? +from .morph.util import * # TODO: replace this star +from .morph import morph_stats +from .morph import reviewing_utils +from .morph import main as main_module # TODO: change the file name 'main' to something more fitting like 'recalc' +from .morph import manager +from .morph import readability +from .morph import preferencesDialog +from .morph import graphs +from .morph import preferences -def onMorphManReadability(): - mw.toolbar.draw() - from .morph import readability - importlib.reload(readability) - readability.main() +morphman_sub_menu = None +morphman_sub_menu_creation_action = None -def onMorphManPreferences(): - from .morph import preferencesDialog - importlib.reload(preferencesDialog) - preferencesDialog.main() +def main(): + # Support anki version 2.1.50 and above + # Hooks should be in the order they are executed! -def morphGraphsWrapper(*args, **kwargs): - from .morph import graphs - importlib.reload(graphs) - return graphs.morphGraphs(args, kwargs) + gui_hooks.profile_did_open.append(preferences.init_preferences) + # Adds morphman to menu multiples times when profiles are changed + gui_hooks.profile_did_open.append(init_actions_and_submenu) -def main(): - # Add MorphMan submenu - morphmanSubMenu = QMenu("MorphMan", mw) - mw.form.menuTools.addMenu(morphmanSubMenu) - - # Add recalculate menu button - a = QAction('&Recalc', mw) - a.setStatusTip(_("Recalculate all.db, note fields, and new card ordering")) - a.setShortcut(_("Ctrl+M")) - a.triggered.connect(onMorphManRecalc) - morphmanSubMenu.addAction(a) - - # Add gui preferences menu button - a = QAction('&Preferences', mw) - a.setStatusTip(_("Change inspected cards, fields and tags")) - a.setShortcut(_("Ctrl+O")) - a.triggered.connect(onMorphManPreferences) - morphmanSubMenu.addAction(a) - - # Add gui manager menu button - a = QAction('&Database Manager', mw) - a.setStatusTip( - _("Open gui manager to inspect, compare, and analyze MorphMan DBs")) - a.setShortcut(_("Ctrl+D")) - a.triggered.connect(onMorphManManager) - morphmanSubMenu.addAction(a) - - # Add readability tool menu button - a = QAction('Readability &Analyzer', mw) - a.setStatusTip(_("Check readability and build frequency lists")) - a.setShortcut(_("Ctrl+A")) - a.triggered.connect(onMorphManReadability) - morphmanSubMenu.addAction(a) - - # ToDo: remove this pylint disable. These imports are here because they have Anki - # addHooks to initialize the UI. It would be better to initialize all Anki UI - # in one single place with explicit call to reveal true intention. + # TODO: Extract all hooks from the imports below and remove the pylint disable # pylint: disable=W0611 from .morph.browser import viewMorphemes from .morph.browser import extractMorphemes @@ -88,12 +44,136 @@ def main(): from .morph.browser import boldUnknowns from .morph.browser import browseMorph from .morph.browser import alreadyKnownTagger - from .morph import newMorphHelper - from .morph import stats + gui_hooks.collection_did_load.append(replace_reviewer_functions) + + # This stores the focus morphs seen today, necessary for the respective skipping option to work + gui_hooks.reviewer_did_answer_card.append(mark_morph_seen) + + # Adds the 'K: V:' to the toolbar + gui_hooks.top_toolbar_did_init_links.append(add_morph_stats_to_toolbar) + + # See more detailed morph stats by holding 'Shift'-key while pressing 'Stats' in toolbar + # TODO: maybe move it somewhere less hidden if possible? E.g.a separate toolbar button + gui_hooks.stats_dialog_will_show(add_morph_stats_to_ease_graph) + + gui_hooks.profile_will_close.append(tear_down_actions_and_submenu) + + +def init_actions_and_submenu(): + global morphman_sub_menu + + recalc_action = create_recalc_action() + preferences_action = create_preferences_action() + database_manager_action = create_database_manager_action() + readability_analyzer_action = create_readability_analyzer_action() + + morphman_sub_menu = create_morphman_submenu() + morphman_sub_menu.addAction(recalc_action) + morphman_sub_menu.addAction(preferences_action) + morphman_sub_menu.addAction(database_manager_action) + morphman_sub_menu.addAction(readability_analyzer_action) + + # test_action = create_test_action() + # morphman_sub_menu.addAction(test_action) + + +def mark_morph_seen(reviewer: Reviewer, card, ease): + # Hook gives extra input parameters, hence this seemingly redundant function + reviewing_utils.mark_morph_seen(card.note()) + + +def replace_reviewer_functions(collection: Collection) -> None: + # This skips the cards the user specified in preferences GUI + Reviewer.nextCard = hooks.wrap(Reviewer.nextCard, reviewing_utils.my_next_card, "around") + + # Automatically highlights morphs on cards if the respective note stylings are present + hooks.field_filter.append(reviewing_utils.highlight) + + +def add_morph_stats_to_toolbar(links, toolbar): + name, details = morph_stats.get_stats() + links.append( + toolbar.create_link( + "morph", name, morph_stats.on_morph_stats_clicked, tip=details, id="morph" + ) + ) + + +def add_morph_stats_to_ease_graph(): + anki.stats.CollectionStats.easeGraph = hooks.wrap(anki.stats.CollectionStats.easeGraph, morph_graphs_wrapper, + "around") + + +def create_morphman_submenu() -> QMenu: + global morphman_sub_menu_creation_action + + morphman_sub_menu = QMenu("MorphMan", mw) + morphman_sub_menu_creation_action = mw.form.menuTools.addMenu(morphman_sub_menu) + + return morphman_sub_menu + + +def create_test_action() -> QAction: + action = QAction('&Test', mw) + action.setStatusTip(_("Recalculate all.db, note fields, and new card ordering")) + action.setShortcut(_("Ctrl+T")) + action.triggered.connect(test_function) + return action + + +def create_recalc_action() -> QAction: + action = QAction('&Recalc', mw) + action.setStatusTip(_("Recalculate all.db, note fields, and new card ordering")) + action.setShortcut(_("Ctrl+M")) + action.triggered.connect(main_module.main) + return action + + +def create_preferences_action() -> QAction: + action = QAction('&Preferences', mw) + action.setStatusTip(_("Change inspected cards, fields and tags")) + action.setShortcut(_("Ctrl+O")) + action.triggered.connect(preferencesDialog.main) + return action + + +def create_database_manager_action() -> QAction: + action = QAction('&Database Manager', mw) + action.setStatusTip( + _("Open gui manager to inspect, compare, and analyze MorphMan DBs")) + action.setShortcut(_("Ctrl+D")) + action.triggered.connect(manager.main) + return action + + +def create_readability_analyzer_action() -> QAction: + action = QAction('Readability &Analyzer', mw) + action.setStatusTip(_("Check readability and build frequency lists")) + action.setShortcut(_("Ctrl+A")) + action.triggered.connect(readability.main) + return action + + +def morph_graphs_wrapper(*args, **kwargs): + importlib.reload(graphs) + return graphs.morphGraphs(args, kwargs) + + +def tear_down_actions_and_submenu(): + if morphman_sub_menu is not None: + morphman_sub_menu.clear() + mw.form.menuTools.removeAction(morphman_sub_menu_creation_action) + + +def test_function(): + skipped_cards = reviewing_utils.SkippedCards() + + skipped_cards.skipped_cards['comprehension'] += 10 + skipped_cards.skipped_cards['fresh'] += 1 + skipped_cards.skipped_cards['today'] += 1 - anki.stats.CollectionStats.easeGraph = \ - wrap(anki.stats.CollectionStats.easeGraph, morphGraphsWrapper, pos="") + skipped_cards.show_tooltip_of_skipped_cards() main() diff --git a/morph/browser/alreadyKnownTagger.py b/morph/browser/alreadyKnownTagger.py index 50b76401..214dcf22 100644 --- a/morph/browser/alreadyKnownTagger.py +++ b/morph/browser/alreadyKnownTagger.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from aqt.utils import tooltip from anki.hooks import addHook -from ..util import addBrowserNoteSelectionCmd, getFilter, runOnce +from ..util import addBrowserNoteSelectionCmd, get_filter, runOnce from ..preferences import get_preference from anki.lang import _ @@ -12,7 +12,7 @@ def pre(b): # :: Browser -> State def per(st, n): # :: State -> Note -> State - if getFilter(n) is None: + if get_filter(n) is None: return st n.addTag(st['tag']) diff --git a/morph/browser/browseMorph.py b/morph/browser/browseMorph.py index e41feb34..ce949610 100644 --- a/morph/browser/browseMorph.py +++ b/morph/browser/browseMorph.py @@ -3,7 +3,7 @@ from anki.lang import _ from aqt.utils import tooltip -from ..newMorphHelper import focus, focusName, focusQuery +from ..reviewing_utils import try_to_get_focus_morphs, focus_query from ..util import addBrowserNoteSelectionCmd, runOnce from ..preferences import get_preference as cfg @@ -15,17 +15,17 @@ def per(st, n): if n is None: return st - for focusMorph in focus(n): + for focusMorph in try_to_get_focus_morphs(n): # TODO: is this safe?? st['focusMorphs'].add(focusMorph) return st def post(st): search = '' - focusField = focusName() + focusField = cfg('Field_FocusMorph') focusMorphs = st['focusMorphs'] - q = focusQuery(focusField, focusMorphs) + q = focus_query(focusField, focusMorphs) if q != '': st['b'].form.searchEdit.lineEdit().setText(q) st['b'].onSearchActivated() diff --git a/morph/browser/extractMorphemes.py b/morph/browser/extractMorphemes.py index 7120f2d4..34f8f435 100644 --- a/morph/browser/extractMorphemes.py +++ b/morph/browser/extractMorphemes.py @@ -4,7 +4,7 @@ from anki.utils import strip_html from ..morphemes import AnkiDeck, MorphDb, getMorphemes from ..morphemizer import getMorphemizerByName -from ..util import addBrowserNoteSelectionCmd, mw, getFilter, infoMsg, QFileDialog, runOnce +from ..util import addBrowserNoteSelectionCmd, mw, get_filter, infoMsg, QFileDialog, runOnce from ..preferences import get_preference as cfg @@ -17,7 +17,7 @@ def pre(b): def per(st, n): mats = mw.col.db.list('select ivl from cards where nid = :nid', nid=n.id) - note_cfg = getFilter(n) + note_cfg = get_filter(n) if note_cfg is None: return st diff --git a/morph/browser/massTagger.py b/morph/browser/massTagger.py index 276995f1..8ed40def 100644 --- a/morph/browser/massTagger.py +++ b/morph/browser/massTagger.py @@ -4,7 +4,7 @@ from anki.utils import strip_html from ..morphemes import getMorphemes, MorphDb from ..morphemizer import getMorphemizerByName -from ..util import addBrowserNoteSelectionCmd, getFilter, infoMsg, QInputDialog, QFileDialog, QLineEdit, runOnce +from ..util import addBrowserNoteSelectionCmd, get_filter, infoMsg, QInputDialog, QFileDialog, QLineEdit, runOnce from ..preferences import get_preference as cfg from anki.lang import _ @@ -28,7 +28,7 @@ def pre(b): # :: Browser -> State def per(st, n): # :: State -> Note -> State - note_cfg = getFilter(n) + note_cfg = get_filter(n) if note_cfg is None: return st morphemizer = getMorphemizerByName(note_cfg['Morphemizer']) diff --git a/morph/browser/viewMorphemes.py b/morph/browser/viewMorphemes.py index 9e332299..f514a416 100644 --- a/morph/browser/viewMorphemes.py +++ b/morph/browser/viewMorphemes.py @@ -3,7 +3,7 @@ from anki.utils import strip_html from ..morphemes import getMorphemes, ms2str from ..morphemizer import getMorphemizerByName -from ..util import addBrowserNoteSelectionCmd, getFilter, infoMsg, runOnce +from ..util import addBrowserNoteSelectionCmd, get_filter, infoMsg, runOnce from ..preferences import get_preference as cfg @@ -11,7 +11,7 @@ def pre(b): return {'morphemes': []} def per(st, n): - notecfg = getFilter(n) + notecfg = get_filter(n) if notecfg is None: return st diff --git a/morph/graphs.py b/morph/graphs.py index 6a16a4fc..1542298b 100644 --- a/morph/graphs.py +++ b/morph/graphs.py @@ -221,7 +221,7 @@ def get_stats(self, db_table, bucket_size_days, day_cutoff_seconds, num_buckets= if not all_reviews_for_bucket: return stats_by_name - all_db = util.allDb() + all_db = util.get_all_db() nid_to_morphs = defaultdict(set) for m, ls in all_db.db.items(): diff --git a/morph/main.py b/morph/main.py index 9148d715..8471599a 100644 --- a/morph/main.py +++ b/morph/main.py @@ -5,37 +5,26 @@ import time import itertools -from anki.tags import TagManager - from functools import partial +from typing import Dict, Set import aqt.main +from aqt.utils import tooltip +from aqt.operations import QueryOp + +from anki.tags import TagManager from anki.utils import split_fields, join_fields, strip_html, int_time, field_checksum +from anki.collection import Collection from .morphemes import Location, Morpheme -from . import stats -from . import util from .morphemes import MorphDb, AnkiDeck, getMorphemes from .morphemizer import getMorphemizerByName from .util import printf, mw, errorMsg, getFilterByMidAndTags, getReadEnabledModels, getModifyEnabledModels from .preferences import get_preference as cfg, get_preferences from .util_external import memoize -# hack: typing is compile time anyway, so, nothing bad happens if it fails, the try is to support anki < 2.1.16 -try: - from aqt.pinnedmodules import typing # pylint: disable=W0611 # See above hack comment - from typing import Dict, Set -except ImportError: - pass - -# not all anki verions have profiling features -doProfile = False -try: - import cProfile, pstats - from pstats import SortKey -except: - pass - +from . import morph_stats as stats +from . import util # only for jedi-auto-completion assert isinstance(mw, aqt.main.AnkiQt) @@ -81,6 +70,7 @@ def setField(mid, fs, k, v): # nop if field DNE if idx: fs[idx] = v + def notesToUpdate(last_updated, included_mids): # returns list of (nid, mid, flds, guid, tags, maxmat) of # cards to analyze @@ -118,16 +108,16 @@ def notesToUpdate(last_updated, included_mids): return mw.col.db.execute(query) -def mkAllDb(all_db=None): + +def make_all_db(all_db=None): from . import config importlib.reload(config) t_0, db, TAG = time.time(), mw.col.db, mw.col.tags - mw.progress.start(label='Prep work for all.db creation', - immediate=True) + + mw.taskman.run_on_main(partial(mw.progress.start, label='Prep work for all.db creation', immediate=True)) # for providing an error message if there is no note that is used for processing N_enabled_notes = 0 - if not all_db: all_db = MorphDb() @@ -135,7 +125,7 @@ def mkAllDb(all_db=None): last_preferences = all_db.meta.get('last_preferences', {}) if not last_preferences == get_preferences(): print("Preferences changed. Recomputing all_db...") - all_db = MorphDb() # Clear all db + all_db = MorphDb() # Clear all db last_updated = 0 else: last_updated = all_db.meta.get('last_updated', 0) @@ -149,15 +139,15 @@ def mkAllDb(all_db=None): notes = notesToUpdate(last_updated, included_mids) N_notes = len(notes) - mw.progress.finish() - mw.progress.start(label='Generating all.db data', - max=N_notes, - immediate=True) + print("notes to update:", N_notes) + + mw.taskman.run_on_main(mw.progress.finish) + mw.taskman.run_on_main(partial(mw.progress.start, label='Generating all.db data', max=N_notes, immediate=True)) for i, (nid, mid, flds, guid, tags, maxmat) in enumerate(notes): - if i % 500 == 0: - mw.progress.update(value=i) + # if i % 500 == 0: + mw.taskman.run_on_main(partial(mw.progress.update, value=i)) ts = TAG.split(tags) mid_cfg = getFilterByMidAndTags(mid, ts) @@ -189,9 +179,9 @@ def mkAllDb(all_db=None): mname = mw.col.models.get(mid)['name'] errorMsg('Failed to get field "{field}" from a note of model "{model}". Please fix your Note Filters ' 'under MorphMan > Preferences to match your collection appropriately.'.format( - model=mname, field=fieldName)) + model=mname, field=fieldName)) return - assert maxmat!=None, "Maxmat should not be None" + assert maxmat != None, "Maxmat should not be None" loc = fidDb.get((nid, guid, fieldName), None) if not loc: @@ -213,14 +203,15 @@ def mkAllDb(all_db=None): printf('Processed %d notes in %f sec' % (N_notes, time.time() - t_0)) - mw.progress.update(label='Creating all.db objects') + mw.taskman.run_on_main(partial(mw.progress.update, label='Creating all.db objects')) old_meta = all_db.meta all_db.clear() all_db.addFromLocDb(locDb) all_db.meta = old_meta - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) return all_db + def filterDbByMat(db, mat): """Assumes safe to use cached locDb""" newDb = MorphDb() @@ -234,8 +225,10 @@ def updateNotes(allDb): t_0, now, db = time.time(), int_time(), mw.col.db TAG = mw.col.tags # type: TagManager - ds, nid2mmi = [], {} - mw.progress.start(label='Updating data', immediate=True) + ds, note_id_morphman_index = [], {} + + mw.taskman.run_on_main(partial(mw.progress.start, label='Updating data', immediate=True)) + fidDb = allDb.fidDb(recalc=True) loc_db = allDb.locDb(recalc=False) # type: Dict[Location, Set[Morpheme]] @@ -247,17 +240,20 @@ def updateNotes(allDb): badLengthTag = cfg('Tag_BadLength') # handle secondary databases - mw.progress.update(label='Creating seen/known/mature from all.db') + mw.taskman.run_on_main(partial(mw.progress.update, label='Creating seen/known/mature from all.db')) + seenDb = filterDbByMat(allDb, cfg('threshold_seen')) knownDb = filterDbByMat(allDb, cfg('threshold_known')) matureDb = filterDbByMat(allDb, cfg('threshold_mature')) - mw.progress.update(label='Loading priority.db') + + mw.taskman.run_on_main(partial(mw.progress.update, label='Loading priority.db')) priorityDb = MorphDb(cfg('path_priority'), ignoreErrors=True) - mw.progress.update(label='Loading frequency.txt') + mw.taskman.run_on_main(partial(mw.progress.update, label='Loading frequency.txt')) + frequencyListPath = cfg('path_frequency') frequency_map = {} - frequency_has_morphemes = False + frequency_list_exists = False try: with io.open(frequencyListPath, encoding='utf-8-sig') as csvfile: @@ -265,7 +261,7 @@ def updateNotes(allDb): rows = [row for row in csvreader] if rows[0][0] == "#study_plan_frequency": - frequency_has_morphemes = True + frequency_list_exists = True frequency_map = dict( zip([Morpheme(row[0], row[1], row[2], row[3], row[4], row[5]) for row in rows[1:]], itertools.count(0))) @@ -289,7 +285,7 @@ def updateNotes(allDb): skip_comprehension_cards = cfg('Option_SkipComprehensionCards') skip_fresh_cards = cfg('Option_SkipFreshVocabCards') - + # Find all morphs that changed maturity and the notes that refer to them. last_maturities = allDb.meta.get('last_maturities', {}) new_maturities = {} @@ -336,22 +332,32 @@ def updateNotes(allDb): query_results = db.execute(query) N_notes = len(query_results) - mw.progress.finish() - mw.progress.start(label='Updating notes', - max=N_notes, - immediate=True) for i, (nid, mid, flds, guid, tags, maxtype) in enumerate(query_results): ts = TAG.split(tags) - if i % 500 == 0: - mw.progress.update(value=i) - C = partial(cfg, model_id=mid) + if i % 1000 == 0: + mw.taskman.run_on_main(partial(mw.progress.update, + label=f"Recalculated {i} of {N_notes} cards ", + value=i, + max=N_notes)) notecfg = getFilterByMidAndTags(mid, ts) if notecfg is None or not notecfg['Modify']: continue + # add bonus for morphs in priority.db and frequency.txt + C = partial(cfg, model_id=mid) + + frequencyBonus = C('frequency.txt bonus') + if C('Option_AlwaysPrioritizeFrequencyMorphs'): + noPriorityPenalty = C('no priority penalty') + else: + noPriorityPenalty = 0 + reinforceNewVocabWeight = C('reinforce new vocab weight') + priorityDbWeight = C('priority.db weight') + proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') + # Get all morphemes for note morphemes = set() for fieldName in notecfg['Fields']: @@ -361,8 +367,6 @@ def updateNotes(allDb): except KeyError: continue - proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') - # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, new_knowns = set(), set(), set(), set() for morpheme in morphemes: @@ -379,48 +383,50 @@ def updateNotes(allDb): new_knowns.add(morpheme) # Determine MMI - Morph Man Index - N, N_s, N_k, N_m = len(morphemes), len( - unseens), len(unknowns), len(unmatures) - - # Bail early for lite update - if N_k > 2 and C('only update k+2 and below'): + morphemes_amount = len(morphemes) + unseens_amount = len(unseens) + unknows_amount = len(unknowns) + unmatures_amount = len(unmatures) + + # Set the mmi (due) on all cards to max by default to prevent buggy cards to showing up first + # if a card already has this mmi (due) it won't update, so this will not have a negative impact on syncing. + # card.due is converted to a signed 32-bit integer on the backend, so max value is 2147483647 before overflow + morphman_index = 2147483647 + note_id_morphman_index[nid] = morphman_index + + # Bail early if card has more than 3 unknown morphs for lite update + # TODO: Add to preferences GUI to make it adjustable + if unknows_amount > 3: + continue + elif skip_comprehension_cards and unknows_amount == 0: continue - # add bonus for morphs in priority.db and frequency.txt - frequencyBonus = C('frequency.txt bonus') - if C('Option_AlwaysPrioritizeFrequencyMorphs'): - noPriorityPenalty = C('no priority penalty') - else: - noPriorityPenalty = 0 - reinforceNewVocabWeight = C('reinforce new vocab weight') - priorityDbWeight = C('priority.db weight') - isPriority = False - isFrequency = False - - focusMorph = None - - F_k = 0 + is_priority = False + is_frequency = False + unknown_morph = None + morph_frequency = 0 usefulness = 0 - for focusMorph in unknowns: - F_k += allDb.frequency(focusMorph) - if priorityDb.frequency(focusMorph) > 0: - isPriority = True + for unknown_morph in unknowns: + morph_frequency += allDb.frequency(unknown_morph) + + if priorityDb.frequency(unknown_morph) > 0: + is_priority = True usefulness += priorityDbWeight - - if frequency_has_morphemes: - focusMorphIndex = frequency_map.get(focusMorph, -1) + + if frequency_list_exists: + focus_morph_index = frequency_map.get(unknown_morph, -1) else: - focusMorphIndex = frequency_map.get(focusMorph.base, -1) + focus_morph_index = frequency_map.get(unknown_morph.base, -1) - if focusMorphIndex >= 0: - isFrequency = True + if focus_morph_index >= 0: + is_frequency = True # The bigger this number, the lower mmi becomes - usefulness += int(round( frequencyBonus * (1 - focusMorphIndex / frequencyListLength) )) + usefulness += int(round(frequencyBonus * (1 - focus_morph_index / frequencyListLength))) # average frequency of unknowns (ie. how common the word is within your collection) - F_k_avg = F_k // N_k if N_k > 0 else F_k + F_k_avg = morph_frequency // unknows_amount if unknows_amount > 0 else morph_frequency usefulness += F_k_avg # add bonus for studying recent learned knowns (reinforce) @@ -437,8 +443,8 @@ def updateNotes(allDb): usefulness = 99999 - min(99999, usefulness) # difference from optimal length range (too little context vs long sentence) - lenDiffRaw = min(N - C('min good sentence length'), - max(0, N - C('max good sentence length'))) + lenDiffRaw = min(morphemes_amount - C('min good sentence length'), + max(0, morphemes_amount - C('max good sentence length'))) lenDiff = min(9, abs(lenDiffRaw)) # Fill in various fields/tags on the note based on cfg @@ -449,41 +455,41 @@ def updateNotes(allDb): notReadyTag, compTag, vocabTag, freshTag)] # apply penalty for cards that aren't prioritized for learning - if not (isPriority or isFrequency): + if not (is_priority or is_frequency): usefulness += noPriorityPenalty # determine card type - if N_m == 0: # sentence comprehension card, m+0 + if unmatures_amount == 0: # sentence comprehension card, m+0 ts.append(compTag) if skip_comprehension_cards: - usefulness += 1000000 # Add a penalty to put these cards at the end of the queue - elif N_k == 1: # new vocab card, k+1 + usefulness += 1000000 # Add a penalty to put these cards at the end of the queue + elif unknows_amount == 1: # new vocab card, k+1 ts.append(vocabTag) - if maxtype == 0: # Only update focus fields on 'new' card types. - setField(mid, fs, field_focus_morph, focusMorph.base) - setField(mid, fs, field_focus_morph_pos, focusMorph.pos) - elif N_k > 1: # M+1+ and K+2+ + if maxtype == 0: # Only update focus fields on 'new' card types. + setField(mid, fs, field_focus_morph, unknown_morph.base) + setField(mid, fs, field_focus_morph_pos, unknown_morph.pos) + elif unknows_amount > 1: # M+1+ and K+2+ ts.append(notReadyTag) - if maxtype == 0: # Only update focus fields on 'new' card types. + if maxtype == 0: # Only update focus fields on 'new' card types. setField(mid, fs, field_focus_morph, ', '.join([u.base for u in unknowns])) setField(mid, fs, field_focus_morph_pos, ', '.join([u.pos for u in unknowns])) else: # only case left: we have k+0, but m+1 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts.append(freshTag) if skip_fresh_cards: - usefulness += 1000000 # Add a penalty to put these cards at the end of the queue - if maxtype == 0: # Only update focus fields on 'new' card types. + usefulness += 1000000 # Add a penalty to put these cards at the end of the queue + if maxtype == 0: # Only update focus fields on 'new' card types. setField(mid, fs, field_focus_morph, ', '.join([u.base for u in unmatures])) setField(mid, fs, field_focus_morph_pos, ', '.join([u.pos for u in unmatures])) # calculate mmi - mmi = 100000 * N_k + 1000 * lenDiff + int(round(usefulness)) + morphman_index = 100000 * unknows_amount + 1000 * lenDiff + int(round(usefulness)) if C('set due based on mmi'): - nid2mmi[nid] = mmi - + note_id_morphman_index[nid] = morphman_index + # set type agnostic fields - setField(mid, fs, field_unknown_count, '%d' % N_k) - setField(mid, fs, field_unmature_count, '%d' % N_m) - setField(mid, fs, field_morph_man_index, '%d' % mmi) + setField(mid, fs, field_unknown_count, '%d' % unknows_amount) + setField(mid, fs, field_unmature_count, '%d' % unmatures_amount) + setField(mid, fs, field_morph_man_index, '%d' % morphman_index) setField(mid, fs, field_unknowns, ', '.join(u.base for u in unknowns)) setField(mid, fs, field_unmatures, ', '.join(u.base for u in unmatures)) @@ -496,12 +502,12 @@ def updateNotes(allDb): # other tags if priorityTag in ts: ts.remove(priorityTag) - if isPriority: + if is_priority: ts.append(priorityTag) if frequencyTag in ts: ts.remove(frequencyTag) - if isFrequency: + if is_frequency: ts.append(frequencyTag) if tooShortTag in ts: @@ -520,7 +526,7 @@ def updateNotes(allDb): ts = [tag for tag in ts if tag not in unnecessary] # update sql db - tags_ = TAG.join(TAG.canonify(ts)) + tags_ = TAG.join(ts) flds_ = join_fields(fs) if flds != flds_ or tags != tags_: # only update notes that have changed csum = field_checksum(fs[0]) @@ -528,20 +534,20 @@ def updateNotes(allDb): ds.append( (tags_, flds_, sfld, csum, now, mw.col.usn(), nid)) - mw.progress.update(label='Updating anki database...') + mw.taskman.run_on_main(partial(mw.progress.update, label='Updating anki database...')) mw.col.db.executemany( 'update notes set tags=?, flds=?, sfld=?, csum=?, mod=?, usn=? where id=?', ds) # Now reorder new cards based on MMI - mw.progress.update(label='Updating new card ordering...') + mw.taskman.run_on_main(partial(mw.progress.update, label='Updating new card ordering...')) ds = [] # "type = 0": new cards # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type] # "type = 2": review cards for (cid, nid, due) in db.execute('select id, nid, due from cards where type = 0'): - if nid in nid2mmi: # owise it was disabled - due_ = nid2mmi[nid] + if nid in note_id_morphman_index: # owise it was disabled + due_ = note_id_morphman_index[nid] if due != due_: # only update cards that have changed ds.append((due_, now, mw.col.usn(), cid)) @@ -549,7 +555,7 @@ def updateNotes(allDb): mw.col.db.executemany( 'update cards set due=?, mod=?, usn=? where id=?', ds) - mw.reset() + mw.taskman.run_on_main(mw.reset) allDb.meta['last_preferences'] = get_preferences() allDb.meta['last_maturities'] = new_maturities @@ -558,58 +564,57 @@ def updateNotes(allDb): printf('Updated %d notes in %f sec' % (N_notes, time.time() - t_0)) if cfg('saveDbs'): - mw.progress.update(label='Saving all/seen/known/mature dbs') + mw.taskman.run_on_main(partial(mw.progress.update, label='Saving all/seen/known/mature dbs')) allDb.save(cfg('path_all')) seenDb.save(cfg('path_seen')) knownDb.save(cfg('path_known')) matureDb.save(cfg('path_mature')) printf('Updated %d notes + saved dbs in %f sec' % (N_notes, time.time() - t_0)) - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) return knownDb def main(): - # begin------------------- - if doProfile: - pr = cProfile.Profile() - pr.enable() - - # load existing all.db - mw.progress.start(label='Loading existing all.db', immediate=True) - t_0 = time.time() - cur = util.allDb() if cfg('loadAllDb') else None - printf('Loaded all.db in %f sec' % (time.time() - t_0)) - mw.progress.finish() + op = QueryOp( + parent=mw, + op=main_background_op, + success=lambda t: tooltip("Finished Recalc"), # t = return value of the op + ) + + # if with_progress() is not called, no progress window will be shown. + # note: QueryOp.with_progress() was broken until Anki 2.1.50 + op.with_progress().run_in_background() + + +def main_background_op(collection: Collection): + assert mw is not None + + mw.taskman.run_on_main(partial(mw.progress.start, label='Loading existing all.db', immediate=True)) + current_all_db = util.get_all_db() if cfg('loadAllDb') else None + mw.taskman.run_on_main(mw.progress.finish) # update all.db - allDb = mkAllDb(cur) - # there was an (non-critical-/non-"exception"-)error but error message was already displayed + allDb = make_all_db(current_all_db) + + # there was an (non-critical-/non-"exception"-)error but error message was already displayed # TODO WTF? if not allDb: - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) return # merge in external.db - mw.progress.start(label='Merging ext.db', immediate=True) + mw.taskman.run_on_main(partial(mw.progress.start, label='Merging ext.db', immediate=True)) ext = MorphDb(cfg('path_ext'), ignoreErrors=True) allDb.merge(ext) - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) # update notes knownDb = updateNotes(allDb) # update stats and refresh display - stats.updateStats(knownDb) - mw.toolbar.draw() + stats.update_stats(knownDb) + + mw.taskman.run_on_main(mw.toolbar.draw) # set global allDb util._allDb = allDb - - # finish------------------- - if doProfile: - pr.disable() - s = io.StringIO() - sortby = SortKey.CUMULATIVE - ps = pstats.Stats(pr, stream=s).sort_stats(sortby) - ps.print_stats() - print(s.getvalue()) diff --git a/morph/manager.py b/morph/manager.py index ca2777dd..f96a94fa 100644 --- a/morph/manager.py +++ b/morph/manager.py @@ -86,7 +86,7 @@ def onGo(self): val += 1 bar.setValue(val) mw.app.processEvents() - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) mw.reset() infoMsg("Completed successfully") diff --git a/morph/morph_stats.py b/morph/morph_stats.py new file mode 100644 index 00000000..bd19bf7a --- /dev/null +++ b/morph/morph_stats.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +import gzip +import pickle as pickle +from functools import partial + +from aqt.utils import tooltip + +from .util import mw +from .preferences import get_preference as cfg +from .morphemes import MorphDb + +from .errors.profileNotYetLoadedException import ProfileNotYetLoadedException + + +def get_stat_path(): return cfg('path_stats') + + +def load_stats(): + try: + f = gzip.open(get_stat_path()) + d = pickle.load(f) + f.close() + return d + except IOError: # file DNE => create it + return update_stats() + except ProfileNotYetLoadedException: # profile not loaded yet, can't do anything but wait + return None + + +def save_stats(d): + f = gzip.open(get_stat_path(), 'wb') + pickle.dump(d, f, -1) + f.close() + + +def update_stats(known_db=None): + mw.taskman.run_on_main(partial(mw.progress.start, label='Updating stats', immediate=True)) + + # Load known.db and get total morphemes known + if known_db is None: + known_db = MorphDb(cfg('path_known'), ignoreErrors=True) + + d = {'totalVariations': len(known_db.db), 'totalKnown': len(known_db.groups)} + + save_stats(d) + mw.taskman.run_on_main(mw.progress.finish) + return d + + +def get_stats(): + d = load_stats() + if not d: + return 'K ???', '????' + + total_known = d.get('totalKnown', 0) + total_variations = d.get('totalVariations', total_known) + + name = 'K %d V %d' % (total_known, total_variations) + details = 'Total known morphs' + return name, details + + +def on_morph_stats_clicked(): + tooltip("Total known morphs") diff --git a/morph/newMorphHelper.py b/morph/newMorphHelper.py deleted file mode 100644 index 51b75750..00000000 --- a/morph/newMorphHelper.py +++ /dev/null @@ -1,320 +0,0 @@ -# -*- coding: utf-8 -*- -import codecs -import importlib - -import aqt.main - -if importlib.util.find_spec('anki.scheduler.v1'): - from anki.scheduler import v1 as sched -else: - from anki import sched - -if importlib.util.find_spec('anki.scheduler.v2'): - from anki.scheduler import v2 as schedv2 -else: - from anki import schedv2 - -from anki import hooks -from anki.hooks import wrap -from anki.lang import _ -from aqt import reviewer, dialogs -import re - -from aqt.utils import tooltip - -from . import main -from .util import mw, allDb -from .preferences import get_preference as cfg - -assert isinstance(mw, aqt.main.AnkiQt) - - -# 1 after answering -> skip all cards with same focus as one just answered -# 2 hotkey -> set card as already known, skip it, and all others with same focus -# 3 hotkey -> search for all cards with same focus (in browser) -# 4 in browser -> immediately learn selected cards -# 5 on show -> highlight morphemes within expression according to how well known -# 6 on fill -> pull new cards from all child decks at once instead of sequentially - -# config aliases -def CN(note, key): return cfg(key, note.mid) - - -def focusName(): return cfg('Field_FocusMorph') - - -def focus(n): - focusValue = n[focusName()].strip() - if focusValue == '': - return [] - return [f.strip() for f in focusValue.split(',')] - - -def focusQuery(fieldName, focusMorphs): - q = ' or '.join([r'"%s:re:(^|,|\s)%s($|,|\s)"' % (fieldName, re.escape(f)) for f in focusMorphs]) - if len(focusMorphs) > 0: - q = '(%s)' % q - return q - - -########## 6 parent deck pulls new cards from all children instead of sequentially (ie. mostly first) -def my_fillNew(self, recursing=False, _old=None): - """If 'new card merged fill' is enabled for the current deck, when we refill we - pull from all child decks, sort combined pool of cards, then limit. - If disabled, do the standard sequential fill method""" - def C(key): return cfg(key, None, self.col.decks.active()[0]) - - if not C('new card merged fill'): - return _old(self, recursing) - - if self._newQueue: - return True - if not self.newCount: - return False - - self._newQueue = self.col.db.list( - '''select id from cards where did in %s and queue = 0 and due >= ? order by due limit ?''' % self._deckLimit(), - C('new card merged fill min due'), self.queueLimit ) - if self._newQueue: - self._newQueue.reverse() - return True - - -sched.Scheduler._fillNew = wrap(sched.Scheduler._fillNew, my_fillNew, 'around') -schedv2.Scheduler._fillNew = wrap(schedv2.Scheduler._fillNew, my_fillNew, 'around') - -########## handle skipping for 1-2 -seenMorphs = set() - - -def markFocusSeen(self, n): - """Mark a focusMorph as already seen so future new cards with the same focus - will be skipped. Also prints number of cards to be skipped if enabled""" - try: - focusMorphs = focus(n) - except KeyError: - return - - if len(focusMorphs) == 0: - return - - seenMorphs.update(focusMorphs) - q = focusQuery(focusName(), focusMorphs) - num_skipped = len(self.mw.col.findNotes(q)) - 1 - if num_skipped and cfg('print number of alternatives skipped'): - tooltip(_('%d alternatives will be skipped' % num_skipped)) - - -def my_getNewCard(self, _old): - """Continually call _getNewCard until we get one with a focusMorph we haven't - seen before. Also skip bad vocab cards. - - :type self: anki.sched.Scheduler | anki.schedv2.Scheduler - :type _old: Callable - """ - - while True: - def C(key): return cfg(key, None, self.col.decks.active()[0]) - if not C('next new card feature'): - return _old(self) - if not C('new card merged fill'): - card = _old(self) # type: anki.cards.Card - else: - if not self._fillNew(): - return - card_id = self._newQueue.pop() - card = self.col.getCard(card_id) - self.newCount -= 1 - - if not card: - return # no more cards - note = card.note() - - # find the right morphemizer for this note, so we can apply model-dependent options (modify off == disable - # skip feature) - from .util import getFilter - note_filter = getFilter(note) - - # this note is not configured in any filter -> proceed like normal without MorphMan-plugin - # the deck should not be modified -> the user probably doesn't want the 'skip mature' feature - if note_filter is None or not note_filter['Modify']: - return card - - # get the focus morph - try: - focus_morphs = focus(note) # field contains either the focusMorph or is empty - except KeyError: - tooltip(_('Encountered card without the \'focus morph\' field configured in the preferences. Please check ' - 'your MorphMan settings and note models.')) - return card # card has no focusMorph field -> undefined behavior -> just proceed like normal - - # evaluate all conditions, on which this card might be skipped/buried - is_comprehension_card = note.hasTag(cfg('Tag_Comprehension')) - is_fresh_vocab = note.hasTag(cfg('Tag_Fresh')) - is_already_known = note.hasTag(cfg('Tag_AlreadyKnown')) - - skip_comprehension = cfg('Option_SkipComprehensionCards') - skip_fresh = cfg('Option_SkipFreshVocabCards') - skip_focus_morph_seen_today = cfg('Option_SkipFocusMorphSeenToday') - - skip_conditions = [ - is_comprehension_card and skip_comprehension, - is_fresh_vocab and skip_fresh, - is_already_known, # the user requested that the vocabulary does not have to be shown - skip_focus_morph_seen_today and any([focus in seenMorphs for focus in focus_morphs]) , # we already learned that/saw that today - ] - - if not any(skip_conditions): - break - - # skip/bury card if any skip condition is true - self.buryCards([card.id]) - - # the card was quarried from the "new queue" so we have to increase the "new counter" back to its original value - self.newCount += 1 - return card - - -sched.Scheduler._getNewCard = wrap(sched.Scheduler._getNewCard, my_getNewCard, 'around') -schedv2.Scheduler._getNewCard = wrap(schedv2.Scheduler._getNewCard, my_getNewCard, 'around') - - -########## 1 - after learning a new focus morph, don't learn new cards with the same focus -def my_reviewer_answerCard(self, ease): # 1 - # type: (reviewer.Reviewer, int) -> None - if self.mw.state != "review" or self.state != "answer" or self.mw.col.sched.answerButtons(self.card) < ease: - return - - if CN(self.card.note(), 'auto skip alternatives'): - markFocusSeen(self, self.card.note()) - - -reviewer.Reviewer._answerCard = wrap(reviewer.Reviewer._answerCard, my_reviewer_answerCard, "before") - - -########## 2 - set current card's focus morph as already known and skip alternatives -def setKnownAndSkip(self): # 2 - # type: (reviewer.Reviewer) -> None - """Set card as alreadyKnown and skip along with all other cards with same focusMorph. - Useful if you see a focusMorph you already know from external knowledge - """ - - self.mw.checkpoint(_("Set already known focus morph")) - n = self.card.note() - n.addTag(cfg('Tag_AlreadyKnown')) - n.flush() - markFocusSeen(self, n) - - # "new counter" might have been decreased (but "new card" was not answered - # so it shouldn't) -> this function recomputes "new counter" - self.mw.col.reset() - - # skip card - self.nextCard() - - -########## 3 - search in browser for cards with same focus -def browseSameFocus(self): # 3 - """Opens browser and displays all notes with the same focus morph. - Useful to quickly find alternative notes to learn focus from""" - try: - n = self.card.note() - focusMorphs = focus(n) - if len(focusMorphs) == 0: - return - - q = focusQuery(focusName(), focusMorphs) - b = dialogs.open('Browser', self.mw) - b.form.searchEdit.lineEdit().setText(q) - b.onSearchActivated() - except KeyError: - pass - - -########## set keybindings for 2-3 -def my_reviewer_shortcutKeys(self): - key_browse, key_skip = cfg('browse same focus key'), cfg('set known and skip key') - keys = original_shortcutKeys(self) - keys.extend([ - (key_browse, lambda: browseSameFocus(self)), - (key_skip, lambda: setKnownAndSkip(self)) - ]) - return keys - - -original_shortcutKeys = reviewer.Reviewer._shortcutKeys -reviewer.Reviewer._shortcutKeys = my_reviewer_shortcutKeys - - -########## 4 - highlight morphemes using morphHighlight - -def highlight(txt: str, field, filter: str, ctx) -> str: - """When a field is marked with the 'focusMorph' command, we format it by - wrapping all the morphemes in s with attributes set to its maturity""" - - print("morphHighlight filter %s" % filter) - if filter != "morphHighlight": - return txt - - from .util import getFilter - from .morphemizer import getMorphemizerByName - from .morphemes import getMorphemes - - # must avoid formatting a smaller morph that is contained in a bigger morph - # => do largest subs first and don't sub anything already in - def nonSpanSub(sub, repl, string): - return ''.join(re.sub(sub, repl, s, flags=re.IGNORECASE) if not s.startswith(')', string)) - - frequency_list_path = cfg('path_frequency') - try: - with codecs.open(frequency_list_path, encoding='utf-8') as f: - frequency_list = [line.strip().split('\t')[0] for line in f.readlines()] - except: - frequency_list = [] - - priority_db = main.MorphDb(cfg('path_priority'), ignoreErrors=True).db - - note = ctx.note() - tags = note.stringTags() - filter = getFilter(note) - if filter is None: - return txt - morphemizer = getMorphemizerByName(filter['Morphemizer']) - if morphemizer is None: - return txt - - ms = getMorphemes(morphemizer, txt, tags) - - proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') - - for m in sorted(ms, key=lambda x: len(x.inflected), reverse=True): # largest subs first - locs = allDb().getMatchingLocs(m) - mat = max(loc.maturity for loc in locs) if locs else 0 - - if proper_nouns_known and m.isProperNoun(): - mtype = 'mature' - elif mat >= cfg('threshold_mature'): - mtype = 'mature' - elif mat >= cfg('threshold_known'): - mtype = 'known' - elif mat >= cfg('threshold_seen'): - mtype = 'seen' - else: - mtype = 'unknown' - - priority = 'true' if m in priority_db else 'false' - - focus_morph_string = m.show().split()[0] - frequency = 'true' if focus_morph_string in frequency_list else 'false' - - repl = '\\1'.format( - mtype=mtype, - priority=priority, - frequency=frequency, - mat=mat - ) - txt = nonSpanSub('(%s)' % m.inflected, repl, txt) - return txt - -hooks.field_filter.append(highlight) diff --git a/morph/preferences.py b/morph/preferences.py index d89e19d7..96a8adff 100644 --- a/morph/preferences.py +++ b/morph/preferences.py @@ -4,8 +4,8 @@ from .errors.profileNotYetLoadedException import ProfileNotYetLoadedException -# retrieving the configuration using get_config is very expensive operation -# instead, save it +# retrieving the configuration using get_config is very expensive operation instead, save it +# TODO: These aren't actually used properly? edit get_preferences? config_data = None config_py = None diff --git a/morph/readability.py b/morph/readability.py index 1c8f587c..e293f6c7 100644 --- a/morph/readability.py +++ b/morph/readability.py @@ -15,6 +15,7 @@ import sqlite3 from collections import namedtuple from contextlib import redirect_stdout, redirect_stderr +from functools import partial from PyQt6.QtCore import * from PyQt6.QtGui import * @@ -39,7 +40,8 @@ importlib.reload(readability_ui) importlib.reload(readability_settings_ui) -def kaner(to_translate, hiraganer = False): + +def kaner(to_translate, hiraganer=False): hiragana = "がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ" \ "あいうえおかきくけこさしすせそたちつてと" \ "なにぬねのはひふへほまみむめもやゆよらりるれろ" \ @@ -55,15 +57,18 @@ def kaner(to_translate, hiraganer = False): else: hiragana = [ord(char) for char in hiragana] translate_table = dict(zip(hiragana, katakana)) - return to_translate.translate(translate_table) + return to_translate.translate(translate_table) + def adjustReading(reading): return kaner(reading) + PROFILE_PARSING = False if PROFILE_PARSING: import cProfile + def atoi(text): return int(text) if text.isdigit() else text @@ -76,12 +81,14 @@ def natural_keys(text): """ return [atoi(c) for c in re.split(r'(\d+)', text)] + class NaturalKeysTableWidgetItem(QTableWidgetItem): def __lt__(self, other): lvalue = self.text() rvalue = other.text() return natural_keys(lvalue) < natural_keys(rvalue) + class Source: def __init__(self, name, morphs, line_morphs, unknown_db): self.name = name @@ -90,12 +97,11 @@ def __init__(self, name, morphs, line_morphs, unknown_db): self.unknown_db = unknown_db # Get filled when generating a study plan - self.missing_morphs = None + self.missing_morphs = None self.seen_i = 0 self.known_i = 0 - def getPath(le, caption, open_directory=False): # LineEdit -> GUI () start_path = os.path.dirname(le.text()) if start_path == '': @@ -143,12 +149,13 @@ def getFuzzyCount(self, m, exclude_db): for alt, c in ms.items(): if exclude_db != None and c[1]: # Skip marked morphs continue - if exclude_db != None and exclude_db.matches(alt): # Skip excluded morphs + if exclude_db != None and exclude_db.matches(alt): # Skip excluded morphs continue if altIncludesMorpheme(alt, m): # pylint: disable=W1114 #ToDo: verify if pylint is right count += c[0] return count + class LocationCorpus: def __init__(self, db, save_lines=False): self.version = 1.0 @@ -200,6 +207,7 @@ def line_iter(self): else: yield self.morph_count_iter() + class CorpusDBUnpickler(pickle.Unpickler): def find_class(self, cmodule, cname): @@ -210,6 +218,7 @@ def find_class(self, cmodule, cname): cmodule = cmodule.replace('900801631.', curr_module_name) return pickle.Unpickler.find_class(self, cmodule, cname) + class LocationCorpusDB: def __init__(self): self.version = 1.0 @@ -268,6 +277,7 @@ def load(self, path, save_lines=False): del other_db + class TableInteger(QTableWidgetItem): def __init__(self, value): super(TableInteger, self).__init__(str(int(value))) @@ -278,6 +288,7 @@ def __lt__(self, other): rvalue = other.text() return natural_keys(lvalue) < natural_keys(rvalue) + class TableFloat(QTableWidgetItem): def __init__(self, value): super(TableFloat, self).__init__('%0.03f' % value) @@ -288,6 +299,7 @@ def __lt__(self, other): rvalue = other.text() return natural_keys(lvalue) < natural_keys(rvalue) + class TablePercent(QTableWidgetItem): def __init__(self, value): super(TablePercent, self).__init__('%0.02f' % value) @@ -298,6 +310,7 @@ def __lt__(self, other): rvalue = other.text() return natural_keys(lvalue) < natural_keys(rvalue) + def migakuDictDbPath(): if importlib.util.find_spec('Migaku Dictionary'): try: @@ -308,8 +321,13 @@ def migakuDictDbPath(): return None return None -SourceStudyResult = namedtuple('SourceStudyResult', ['old_readability', 'new_readability', 'learned_m', 'total_freq', 'avg_freq', 'avg_rate']) -SchedulingMorph = namedtuple('SchedulingMorph', ['score', 'master_count', 'unknown_count', 'source_unknown_count', 'count', 'morph']) + +SourceStudyResult = namedtuple('SourceStudyResult', + ['old_readability', 'new_readability', 'learned_m', 'total_freq', 'avg_freq', + 'avg_rate']) +SchedulingMorph = namedtuple('SchedulingMorph', + ['score', 'master_count', 'unknown_count', 'source_unknown_count', 'count', 'morph']) + class SettingsDialog(QDialog): def __init__(self, parent=None): @@ -375,6 +393,7 @@ def onAccept(self): def onReject(self): pass + class AnalyzerDialog(QDialog): def __init__(self, parent=None): super(AnalyzerDialog, self).__init__(parent) @@ -405,7 +424,7 @@ def __init__(self, parent=None): self.ui.studyPlanCheckBox.setChecked(cfg('Option_SaveStudyPlan')) self.ui.frequencyListCheckBox.setChecked(cfg('Option_SaveFrequencyList')) self.ui.advancedSettingsButton.clicked.connect(lambda le: SettingsDialog(self).show()) - + self.ui.wordReportCheckBox.setChecked(cfg('Option_SaveWordReport')) self.ui.groupByDirCheckBox.setChecked(cfg('Option_GroupByDir')) self.ui.processLinesCheckBox.setChecked(cfg('Option_ProcessLines')) @@ -438,23 +457,23 @@ def __init__(self, parent=None): self.write('Web Service Created: '+self.server.serverName()+' : '+self.server.serverAddress().toString()+':'+str(self.server.serverPort()) + '\n') else: self.write("Could't create Web Service\n") - + self.server.newConnection.connect(self.onNewConnection) self.write('Web Service Listening: ' + str(self.server.isListening()) + '\n') def onNewConnection(self): print(self.sender()) - + client = self.server.nextPendingConnection() self.write('Client %s Connected\n' % str(client)) self.clients.add(client) client.textMessageReceived.connect(self.processTextMessage) client.disconnected.connect(self.clientDisconnected) - def processTextMessage(self, message): + def processTextMessage(self, message): client = self.sender() - #self.write('websocket message: %s\n' % message) + # self.write('websocket message: %s\n' % message) if self.orig_known_db is None: self.write("Analysis isn't ready\n") @@ -463,13 +482,13 @@ def processTextMessage(self, message): ########### msg = json.loads(message) result = {'idx': msg['idx']} - + try: if msg['type'] == "get-word-tags": parts = msg['word'].split('◴') term, pronunciation = tuple(parts) - #self.write(" input %s %s\n" % (term, pronunciation)) + # self.write(" input %s %s\n" % (term, pronunciation)) pron = adjustReading(pronunciation) is_freq = False @@ -478,7 +497,7 @@ def processTextMessage(self, message): morphemizer = self.morphemizer() morphs = getMorphemes(morphemizer, term) - #if len(morphs) == 1 and morphs[0].read != pron: + # if len(morphs) == 1 and morphs[0].read != pron: # m = morphs[0] # morphs = [Morpheme(m.norm, m.base, m.inflected, pron, m.pos, m.subPos)] @@ -525,13 +544,13 @@ def processTextMessage(self, message): except Exception as e: self.write('exception: %s\n' % str(e)) result['result'] = 'failed' - + client.sendTextMessage(json.dumps(result)) - + def clientDisconnected(self): client = self.sender() self.write('Client %s Disconnected\n' % str(client)) - #self.clients.remove(client) + # self.clients.remove(client) def closeEvent(self, *args, **kwargs): print('closing analyzer') @@ -553,9 +572,8 @@ def buildTotalStudyCount(self): def onStudyPlanSortIndicatorChanged(self, index, order): # Apply the sort - self.ui.studyPlanTable.sortItems(index, order) + self.ui.studyPlanTable.sortItems(index, order) self.buildTotalStudyCount() - def morphemizer(self): return self.ui.morphemizerComboBox.getCurrent() @@ -583,7 +601,7 @@ def isatty(self): def saveWordReport(self, known_db, morphs, path): all_db = CountingMorphDB() - for m,c in morphs.items(): + for m, c in morphs.items(): all_db.addMorph(Morpheme(m.norm, m.base, m.base, m.read, m.pos, m.subPos), c) master_morphs = {} @@ -661,10 +679,12 @@ def sourceStudyPlan(self, f, source, known_db, unknown_db): source_count = source_unknown_count + unknown_count score = pow(source_count, self.source_score_power) * self.source_score_multiplier + master_count - to_learn_morphs.append(SchedulingMorph(morph=morph, count=count, source_unknown_count=source_unknown_count, unknown_count=unknown_count, master_count=master_count, score=score)) - - if self.debug_output: f.write(' missing: ' + m[0].show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]\n' % (score, source_unknown_count, unknown_count, master_count)) + to_learn_morphs.append(SchedulingMorph(morph=morph, count=count, source_unknown_count=source_unknown_count, + unknown_count=unknown_count, master_count=master_count, score=score)) + if self.debug_output: f.write( + ' missing: ' + m[0].show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]\n' % ( + score, source_unknown_count, unknown_count, master_count)) known_i = source.known_i learned_this_source = [] @@ -680,7 +700,8 @@ def sourceStudyPlan(self, f, source, known_db, unknown_db): for iteration in range(0, iterations): for m in sorted(to_learn_morphs, key=lambda x: x.score, reverse=True): - if readability >= self.readability_target and not (iteration == 0 and self.take_all_minimum_frequency_morphs): + if readability >= self.readability_target and not ( + iteration == 0 and self.take_all_minimum_frequency_morphs): if self.debug_output: f.write(' readability target reached\n') break @@ -689,7 +710,9 @@ def sourceStudyPlan(self, f, source, known_db, unknown_db): continue if (iteration == 0) and (m.master_count < self.minimum_master_frequency): - if self.debug_output: f.write(' low score: %s [score %d ep_freq %d all_freq %d master_freq %d]\n' % (m.morph.show(), m.score, m.source_unknown_count, m.unknown_count, m.master_count)) + if self.debug_output: f.write( + ' low score: %s [score %d ep_freq %d all_freq %d master_freq %d]\n' % ( + m.morph.show(), m.score, m.source_unknown_count, m.unknown_count, m.master_count)) continue learned_morphs.append(m) @@ -703,8 +726,9 @@ def sourceStudyPlan(self, f, source, known_db, unknown_db): avg_freq = float(total_freq) / learned_m if learned_m > 0 else 0 avg_rate = float(learned_m) / source.i_count if source.i_count > 0 else 0 - return learned_this_source, SourceStudyResult(old_readability=old_readability, new_readability=readability, learned_m=learned_m, total_freq=total_freq, avg_freq=avg_freq, avg_rate=avg_rate) - + return learned_this_source, SourceStudyResult(old_readability=old_readability, new_readability=readability, + learned_m=learned_m, total_freq=total_freq, avg_freq=avg_freq, + avg_rate=avg_rate) def onAnalyze(self): self.clearOutput() @@ -725,7 +749,7 @@ def onAnalyze(self): save_frequency_list = self.ui.frequencyListCheckBox.isChecked() group_by_dir = self.ui.groupByDirCheckBox.isChecked() process_lines = self.ui.processLinesCheckBox.isChecked() - + # Save updated preferences pref = {} pref['Option_AnalysisInputPath'] = input_path @@ -744,8 +768,10 @@ def onAnalyze(self): self.proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') # Study plan settings - self.take_all_minimum_frequency_morphs = (self.minimum_master_frequency > 0) and cfg('Option_AlwaysAddMinFreqMorphs') - self.always_meet_readability_target = (self.minimum_master_frequency > 0) and cfg('Option_AlwaysMeetReadabilityTarget') + self.take_all_minimum_frequency_morphs = (self.minimum_master_frequency > 0) and cfg( + 'Option_AlwaysAddMinFreqMorphs') + self.always_meet_readability_target = (self.minimum_master_frequency > 0) and cfg( + 'Option_AlwaysMeetReadabilityTarget') self.reset_known_for_each_show = cfg('Option_ResetLearnedAfterEachInput') self.optimal_master_target = cfg('Option_OptimalMasterTarget') @@ -888,7 +914,7 @@ def proc_file_result(full_name, corpuses): if morph_state is None: morph_state = 0 if m.isProperNoun(): - morph_state |= 1 # Proper noun bit + morph_state |= 1 # Proper noun bit is_proper_noun = True else: is_proper_noun = False @@ -914,7 +940,7 @@ def proc_file_result(full_name, corpuses): if morph_state & 4: mature_count += count - + if process_lines: line_morphs.append(line_morphs_set) line_count += 1 @@ -932,8 +958,8 @@ def proc_file_result(full_name, corpuses): iplus1_percent = 0.0 if line_count == 0 else 100.0 * iplus1_line_count / line_count log_text = '%s\t%d\t%d\t%0.2f\t%d\t%d\t%0.2f\t%0.2f\t%0.2f\t%0.2f\t%0.2f\t%0.2f\n' % ( - full_name, len(seen_morphs), len(known_morphs), known_percent, i_count, known_count, - learning_percent, mature_percent, readability, proper_noun_percent, line_percent, iplus1_percent) + full_name, len(seen_morphs), len(known_morphs), known_percent, i_count, known_count, + learning_percent, mature_percent, readability, proper_noun_percent, line_percent, iplus1_percent) log_fp.write(log_text) self.writeOutput(log_text) row = self.ui.readabilityTable.rowCount() @@ -959,7 +985,7 @@ def proc_file_result(full_name, corpuses): if save_study_plan: source = Source(full_name, seen_morphs, line_morphs, source_unknown_db) - source.i_count = i_count # TODO: Move to class + source.i_count = i_count # TODO: Move to class sources.append(source) def measure_readability(file_path, corp_name): @@ -1007,14 +1033,14 @@ def parse_text(loc_corpus, text): srt_count = 0 else: should_flush = False - + if t != '': filtered_text += t + '\n' - + # Todo: This will flush every line so we can compute per-line readability, which is slower than batching lines. # Figure out how to get per-line analysis with batched lines. if should_flush or len(filtered_text) >= 2048: - #if len(filtered_text) >= 2048: + # if len(filtered_text) >= 2048: parse_text(loc_corpus, filtered_text) filtered_text = '' @@ -1059,21 +1085,23 @@ def accepted_filetype(filename): self.ui.readabilityTable.setColumnCount(12) self.ui.readabilityTable.setHorizontalHeaderLabels([ "Input", "Total\nMorphs", "Known\nMorphs", "Known\nMorphs %", "Total\nInstances", "Known\nInstances", - "Young\nInstances %", "Mature\nInstances %", "Known\nInstances %", "Proper\nNoun %", "Line\nReadability %", "i+1\nLines %"]) + "Young\nInstances %", "Mature\nInstances %", "Known\nInstances %", "Proper\nNoun %", "Line\nReadability %", + "i+1\nLines %"]) if len(list_of_files) > 0: if PROFILE_PARSING: pr = cProfile.Profile() pr.enable() - + self.writeOutput('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % ( "Input", "Total Morphs", "Known Morphs", "% Known Morphs", "Total Instances", "Known Instances", "% Young", "% Mature", "% Known", "% Proper Nouns", "% Known Lines", "% i+1 Lines")) - mw.progress.start( label='Measuring readability', max=len(list_of_files), immediate=True ) + mw.taskman.run_on_main( + lambda: mw.progress.start(label='Measuring readability', max=len(list_of_files), immediate=True)) for n, file_path in enumerate(sorted(list_of_files, key=natural_keys)): - mw.progress.update(value=n, label='Parsing (%d/%d) %s' % ( - n + 1, len(list_of_files), os.path.basename(file_path))) + mw.taskman.run_on_main(partial(mw.progress.update, value=n, label='Parsing (%d/%d) %s' % ( + n + 1, len(list_of_files), os.path.basename(file_path)))) if os.path.isfile(file_path): corp_name = os.path.relpath(file_path, Path(input_path).parent) measure_readability(file_path, corp_name) @@ -1087,9 +1115,9 @@ def get_loc_str(loc): i_loc_str = get_loc_str(corpus_db.ordered_locs[i][0]) j = i + 1 - mw.progress.update(value=i, label='Updating (%d/%d) %s' % ( - i + 1, total_locs, i_loc_str)) - + mw.taskman.run_on_main(partial(mw.progress.update, value=i, label='Updating (%d/%d) %s' % ( + i + 1, total_locs, i_loc_str))) + while j < total_locs: j_loc = corpus_db.ordered_locs[j] if i_loc_str != get_loc_str(corpus_db.ordered_locs[j][0]): @@ -1098,15 +1126,15 @@ def get_loc_str(loc): proc_file_result(i_loc_str, [x[1] for x in corpus_db.ordered_locs[i:j]]) i = j - - #total_locs = len(corpus_db.ordered_locs) - #for i, (loc, loc_corpus) in enumerate(corpus_db.ordered_locs): + + # total_locs = len(corpus_db.ordered_locs) + # for i, (loc, loc_corpus) in enumerate(corpus_db.ordered_locs): # mw.progress.update(value=n, label='Updating (%d/%d) %s' % ( # i + 1, total_locs, str(loc[0]))) # proc_file_result(loc, loc_corpus) self.writeOutput('\nUsed morphemizer: %s\n' % morphemizer.getDescription()) - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) if PROFILE_PARSING: pr.disable() @@ -1116,7 +1144,7 @@ def get_loc_str(loc): return self.ui.readabilityTable.setSortingEnabled(True) - self.ui.readabilityTable.resizeColumnsToContents() + self.ui.readabilityTable.resizeColumnsToContents() if save_word_report: self.writeOutput("\n[Saving word instance report to '%s'...]\n" % self.instance_freq_report_path) @@ -1127,7 +1155,7 @@ def get_loc_str(loc): if self.save_missing_word_report: self.writeOutput("\n[Saving missing word report to '%s'...]\n" % self.missing_master_path) - + master_morphs = {} for ms in self.master_db.db.values(): for m, c in ms.items(): @@ -1188,17 +1216,22 @@ def get_line_readability(show, known_db): self.ui.studyPlanTable.setSortingEnabled(False) self.ui.studyPlanTable.setColumnCount(10) self.ui.studyPlanTable.setHorizontalHeaderLabels([ - "Input", "To Study\nMorphs ", "Cummulative\nMorphs", "Old Morph\nReadability %", "New Morph\nReadability %", - "Old Line\nReadability %", "New Line\nReadability %", "Avg Study\nFreq", "Avg Study\nPer Instance", "New Master\nFreq %"]) + "Input", "To Study\nMorphs ", "Cummulative\nMorphs", "Old Morph\nReadability %", + "New Morph\nReadability %", + "Old Line\nReadability %", "New Line\nReadability %", "Avg Study\nFreq", "Avg Study\nPer Instance", + "New Master\nFreq %"]) - mw.progress.start( label='Building study plan', max=len(sources), immediate=True ) + mw.taskman.run_on_main( + lambda: mw.progress.start(label='Building study plan', max=len(sources), immediate=True)) - def output_study_result(source, study_result, old_line_readability, new_line_readability, new_master_freq): + def output_study_result(source, study_result, old_line_readability, new_line_readability, + new_master_freq): nonlocal learned_tot learned_tot += study_result.learned_m source_str = "'%s' study goal: (%3d/%4d) morph readability: %0.2f -> %0.2f line readability: %0.2f -> %0.2f\n" % ( - source.name, study_result.learned_m, learned_tot, study_result.old_readability, study_result.new_readability, old_line_readability, new_line_readability) + source.name, study_result.learned_m, learned_tot, study_result.old_readability, + study_result.new_readability, old_line_readability, new_line_readability) self.writeOutput(source_str) f.write(source_str) @@ -1216,7 +1249,8 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea self.ui.studyPlanTable.setItem(row, 9, TablePercent(new_master_freq)) for m in learned_this_source: - f.write('\t' + m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]\n' % (m.score, m.source_unknown_count, m.unknown_count, m.master_count)) + f.write('\t' + m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]\n' % ( + m.score, m.source_unknown_count, m.unknown_count, m.master_count)) matched_sources = set() @@ -1225,14 +1259,16 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea while len(matched_sources) < len(sources): find_optimal_source = (optmize_master_freq and cumulative_master_freq < self.optimal_master_target) - self.writeOutput("{} {} {} {}\n".format(find_optimal_source, optmize_master_freq, cumulative_master_freq, self.optimal_master_target)) + self.writeOutput( + "{} {} {} {}\n".format(find_optimal_source, optmize_master_freq, cumulative_master_freq, + self.optimal_master_target)) source_results = [] if optmize_master_freq and not find_optimal_source: - break # Stop searching here + break # Stop searching here for n, source in enumerate(sources): - mw.progress.update( value=n, label='Processing (%d/%d) %s' % (n+1, len(sources), source.name) ) + mw.taskman.run_on_main(partial(mw.progress.update, value=n, label='Processing (%d/%d) %s' % (n + 1, len(sources), source.name))) if source in matched_sources: continue @@ -1247,12 +1283,13 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea continue else: matched_sources.add(source) - + new_line_readability = self.get_line_readability(source, known_db) self.master_score += study_result.total_freq cumulative_master_freq = self.get_master_freq() - output_study_result(source, study_result, old_line_readability, new_line_readability, cumulative_master_freq) + output_study_result(source, study_result, old_line_readability, new_line_readability, + cumulative_master_freq) if self.reset_known_for_each_show: known_db.removeMorphs([m.morph for m in learned_this_source]) @@ -1262,7 +1299,8 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea if find_optimal_source: # Pick the best result as a match and output it - source, learned_this_source, study_result = sorted(source_results, key=lambda x: x[2].avg_freq, reverse=True)[0] + source, learned_this_source, study_result = \ + sorted(source_results, key=lambda x: x[2].avg_freq, reverse=True)[0] self.write('Picked source %s avg_freq %0.2f\n' % (source.name, study_result.avg_freq)) matched_sources.add(source) old_line_readability = self.get_line_readability(source, known_db) @@ -1271,7 +1309,8 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea new_line_readability = self.get_line_readability(source, known_db) self.master_score += study_result.total_freq cumulative_master_freq = self.get_master_freq() - output_study_result(source, study_result, old_line_readability, new_line_readability, cumulative_master_freq) + output_study_result(source, study_result, old_line_readability, new_line_readability, + cumulative_master_freq) if self.reset_known_for_each_show: known_db.removeMorphs([m.morph for m in learned_this_source]) @@ -1283,7 +1322,7 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea self.ui.studyPlanTable.resizeColumnsToContents() self.buildTotalStudyCount() known_db.save(self.future_known_db_path) - mw.progress.finish() + mw.taskman.run_on_main(mw.progress.finish) if save_frequency_list: self.writeOutput("\n[Saving frequency list to '%s'...]\n" % self.frequency_list_path) @@ -1299,8 +1338,9 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea unique_set.add(m.morph) self.freq_set.add((m.morph.base, m.morph.read)) self.freq_db.addMorph(m.morph, 1) - print(m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]' % (m.score, m.source_unknown_count, m.unknown_count, m.master_count), file=f) - + print(m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]' % ( + m.score, m.source_unknown_count, m.unknown_count, m.master_count), file=f) + # Followed by all remaining morphs sorted by score. if self.fill_all_morphs_in_plan: for m in sorted(all_missing_morphs, key=operator.itemgetter(5), reverse=True): @@ -1311,7 +1351,8 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea unique_set.add(m.morph) self.freq_set.add((m.morph.base, m.morph.read)) self.freq_db.addMorph(m.morph, 1) - print(m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]' % (m.score, m.source_unknown_count, m.unknown_count, m.master_count), file=f) + print(m.morph.show() + '\t[score %d ep_freq %d all_freq %d master_freq %d]' % ( + m.score, m.source_unknown_count, m.unknown_count, m.master_count), file=f) elif self.master_total_instances > 0: master_morphs = [] for ms in self.master_db.db.values(): @@ -1328,7 +1369,7 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea self.freq_set.add((m[0].base, m[0].read)) self.freq_db.addMorph(m[0], 1) known_db.addMLs1(m[0], set()) - + if self.master_total_instances > 0: master_score = 0 for ms in self.master_db.db.values(): @@ -1340,13 +1381,11 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea master_current_score * 100.0 / self.master_total_instances, master_score * 100.0 / self.master_total_instances)) - #open(self.frequency_list_path+'.migaku.txt', 'wt', encoding='utf-8') as migaku_f + # open(self.frequency_list_path+'.migaku.txt', 'wt', encoding='utf-8') as migaku_f if cfg('Option_MigakuDictionaryFreq') and self.migaku_dict_db_path: with redirect_stdout(self), redirect_stderr(self): print("Updating Migaku DB Frequency!") - - conn = sqlite3.connect(self.migaku_dict_db_path) with conn: cur = conn.cursor() @@ -1354,17 +1393,20 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea dicts = cur.fetchall() for dictname, lid in dicts: dict_table = 'l' + str(lid) + 'name' + dictname - #print(' migaku db table', dict_table) - cur.execute("SELECT term, altterm, pronunciation, pos, definition, examples, audio, frequency, starCount FROM {0};".format(dict_table)) + # print(' migaku db table', dict_table) + cur.execute( + "SELECT term, altterm, pronunciation, pos, definition, examples, audio, frequency, starCount FROM {0};".format( + dict_table)) items = cur.fetchall() - #print(" db has %d items" % len(items)) + # print(" db has %d items" % len(items)) - mw.progress.start( label='Updating DB ' + dictname, max=len(items), immediate=True ) + mw.taskman.run_on_main(partial(mw.progress.start, label='Updating DB ' + dictname, max=len(items), immediate=True)) ds = [] - for i, (term, altterm, pronunciation, pos, definition, examples, audio, frequency, starCount) in enumerate(items): + for i, (term, altterm, pronunciation, pos, definition, examples, audio, frequency, + starCount) in enumerate(items): if i % 1000 == 0: - mw.progress.update( value=n, label='(%d/%d) Migaku Dict %s' % (i, len(items), dictname) ) + mw.taskman.run_on_main(partial(mw.progress.update, value=n, label='(%d/%d) Migaku Dict %s' % (i, len(items), dictname))) pron = adjustReading(pronunciation) @@ -1401,15 +1443,18 @@ def output_study_result(source, study_result, old_line_readability, new_line_rea newStarCount += ' compound' if newStarCount != starCount: - #print('updating', term, 'stars to', newStarCount, file=migaku_f) - ds.append((newStarCount, term, altterm, pronunciation, pos, definition, examples, audio)) - + # print('updating', term, 'stars to', newStarCount, file=migaku_f) + ds.append( + (newStarCount, term, altterm, pronunciation, pos, definition, examples, audio)) + if len(ds) > 0: - cur.executemany('update {0} set starCount=? where term=? AND altterm=? AND pronunciation=? AND pos=? AND definition=? AND examples=? AND audio=?'.format(dict_table), ds) + cur.executemany( + 'update {0} set starCount=? where term=? AND altterm=? AND pronunciation=? AND pos=? AND definition=? AND examples=? AND audio=?'.format( + dict_table), ds) + + mw.taskman.run_on_main(mw.progress.finish) - mw.progress.finish() def main(): mw.mm = AnalyzerDialog(mw) mw.mm.show() - diff --git a/morph/readability_settings_ui.py b/morph/readability_settings_ui.py index 00976335..9e411702 100644 --- a/morph/readability_settings_ui.py +++ b/morph/readability_settings_ui.py @@ -6,7 +6,7 @@ # run again. Do not edit this file unless you know what you are doing. -from PyQt6 import QtCore, QtGui, QtWidgets +from PyQt6 import QtCore, QtWidgets class Ui_ReadabilitySettingsDialog(object): diff --git a/morph/reviewing_utils.py b/morph/reviewing_utils.py new file mode 100644 index 00000000..77c21cab --- /dev/null +++ b/morph/reviewing_utils.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- +import codecs +import re +from typing import List, Optional + +from aqt import dialogs +from aqt.reviewer import Reviewer +from aqt.utils import tooltip + +from anki.notes import Note +from anki.consts import CARD_TYPE_NEW + +from . import main +from .util import get_all_db, get_filter +from .preferences import get_preference + +from .morphemizer import getMorphemizerByName +from .morphemes import getMorphemes + +seen_morphs = set() + + +# 1 after answering -> skip all cards with same focus as one just answered +# 2 hotkey -> set card as already known, skip it, and all others with same focus +# 3 hotkey -> search for all cards with same focus (in browser) +# 4 in browser -> immediately learn selected cards +# 5 on show -> highlight morphemes within expression according to how well known +# 6 on fill -> pull new cards from all child decks at once instead of sequentially + + +def try_to_get_focus_morphs(note: Note) -> Optional[List[str]]: + try: + focus_value = note[get_preference('Field_FocusMorph')].strip() + if focus_value == '': + return [] + return [f.strip() for f in focus_value.split(',')] + except KeyError: + return None + + +def focus_query(field_name, focus_morphs): + q = ' or '.join([r'"%s:re:(^|,|\s)%s($|,|\s)"' % (field_name, re.escape(f)) for f in focus_morphs]) + if len(focus_morphs) > 0: + q = '(%s)' % q + return q + + +def mark_morph_seen(note: Note) -> None: + focus_morphs = try_to_get_focus_morphs(note) + + if focus_morphs is not None and len(focus_morphs) > 0: + seen_morphs.update(focus_morphs) + + +def my_next_card(self: Reviewer, _old) -> None: + skipped_cards = SkippedCards() + + self.previous_card = self.card + self.card = None + self._v3 = None + + # NB! If the deck you are studying has sub-decks then new cards will by default only be gathered from the first + # sub-deck until it is empty before looking for new cards in the next sub-deck. If you instead want to get + # new i+1 cards from all sub-decks do the following: + # 1. Activate the v3 scheduler in: Tools -> Review -> Scheduler -> V3 scheduler + # 2. Deck that has sub-decks: Deck options -> Display Order -> New card gather order -> Ascending position + + if self.mw.col.sched.version < 3: + self._get_next_v1_v2_card() + else: + self._get_next_v3_card() + + self._previous_card_info.set_card(self.previous_card) + self._card_info.set_card(self.card) + + if not self.card: + self.mw.moveToState("overview") + return + + while True: + if self.card.type != CARD_TYPE_NEW: + break # ignore non-new cards + + note: Note = self.card.note() + note_filter = get_filter(note) # Note filters from preferences GUI + + if note_filter is None: + break # card did not match (note type and tags) set in preferences GUI + + if not note_filter['Modify']: + break # modify is not set in preferences GUI + + focus_morphs = try_to_get_focus_morphs(note) + + if focus_morphs is None: + tooltip( + ('Encountered card without the \'focus morph\' field configured in the preferences. Please check ' + 'your MorphMan settings and note models.')) + break + + skipped_card = skipped_cards.process_skip_conditions_of_card(note, focus_morphs) + + if not skipped_card: + break # card did not meet any skip criteria + + self.mw.col.sched.buryCards([self.card.id], manual=False) + + if self.mw.col.sched.version < 3: + self._get_next_v1_v2_card() + else: + self._get_next_v3_card() + + if self._reps is None: + self._initWeb() + + self._showQuestion() + + # TODO: add option to preferences GUI + if skipped_cards.skipped_at_least_one_card() and get_preference('print number of alternatives skipped'): + skipped_cards.show_tooltip_of_skipped_cards() + + +def set_known_and_skip(self): # 2 + # type: (Reviewer) -> None + """Set card as alreadyKnown and skip along with all other cards with same focusMorph. + Useful if you see a focusMorph you already know from external knowledge + """ + assert self.card is not None + + self.mw.checkpoint(("Set already known focus morph")) + note = self.card.note() + note.add_tag(get_preference('Tag_AlreadyKnown')) + note.flush() + mark_morph_seen(note) + + # "new counter" might have been decreased (but "new card" was not answered + # so it shouldn't) -> this function recomputes "new counter" + self.mw.col.reset() # TODO: Is this still necessary? + + # skip card + self.nextCard() + + +########## 3 - search in browser for cards with same focus +def browse_same_focus(self): # 3 + """Opens browser and displays all notes with the same focus morph. + Useful to quickly find alternative notes to learn focus from""" + try: + n = self.card.note() + focus_morphs = try_to_get_focus_morphs(n) + if len(focus_morphs) == 0: + return + + q = focus_query(get_preference('Field_FocusMorph'), focus_morphs) + b = dialogs.open('Browser', self.mw) + b.form.searchEdit.lineEdit().setText(q) + b.onSearchActivated() + except KeyError: + pass + + +########## set keybindings for 2-3 +def my_reviewer_shortcutKeys(self): + key_browse, key_skip = get_preference('browse same focus key'), get_preference('set known and skip key') + keys = original_shortcutKeys(self) + keys.extend([ + (key_browse, lambda: browse_same_focus(self)), + (key_skip, lambda: set_known_and_skip(self)) + ]) + return keys + + +original_shortcutKeys = Reviewer._shortcutKeys # TODO: move to init file +Reviewer._shortcutKeys = my_reviewer_shortcutKeys + + +########## 4 - highlight morphemes using morphHighlight + +def highlight(txt: str, field, filter: str, ctx) -> str: + """When a field is marked with the 'focusMorph' command, we format it by + wrapping all the morphemes in s with attributes set to its maturity""" + + print("morphHighlight filter %s" % filter) + if filter != "morphHighlight": + return txt + + # must avoid formatting a smaller morph that is contained in a bigger morph + # => do largest subs first and don't sub anything already in + def nonSpanSub(sub, repl, string): + return ''.join(re.sub(sub, repl, s, flags=re.IGNORECASE) if not s.startswith(')', string)) + + frequency_list_path = get_preference('path_frequency') + try: + with codecs.open(frequency_list_path, encoding='utf-8') as f: + frequency_list = [line.strip().split('\t')[0] for line in f.readlines()] + except: + frequency_list = [] + + priority_db = main.MorphDb(get_preference('path_priority'), ignoreErrors=True).db + + note = ctx.note() + tags = note.stringTags() + filter = get_filter(note) + if filter is None: + return txt + morphemizer = getMorphemizerByName(filter['Morphemizer']) + if morphemizer is None: + return txt + + ms = getMorphemes(morphemizer, txt, tags) + + proper_nouns_known = get_preference('Option_ProperNounsAlreadyKnown') + + for m in sorted(ms, key=lambda x: len(x.inflected), reverse=True): # largest subs first + locs = get_all_db().getMatchingLocs(m) + mat = max(loc.maturity for loc in locs) if locs else 0 + + if proper_nouns_known and m.isProperNoun(): + mtype = 'mature' + elif mat >= get_preference('threshold_mature'): + mtype = 'mature' + elif mat >= get_preference('threshold_known'): + mtype = 'known' + elif mat >= get_preference('threshold_seen'): + mtype = 'seen' + else: + mtype = 'unknown' + + priority = 'true' if m in priority_db else 'false' + + focus_morph_string = m.show().split()[0] + frequency = 'true' if focus_morph_string in frequency_list else 'false' + + repl = '\\1'.format( + mtype=mtype, + priority=priority, + frequency=frequency, + mat=mat + ) + txt = nonSpanSub('(%s)' % m.inflected, repl, txt) + return txt + + +class SkippedCards: + + def __init__(self): + self.skipped_cards = {'comprehension': 0, 'fresh': 0, 'known': 0, 'today': 0} + self.skip_comprehension = get_preference('Option_SkipComprehensionCards') + self.skip_fresh = get_preference('Option_SkipFreshVocabCards') + self.skip_focus_morph_seen_today = get_preference('Option_SkipFocusMorphSeenToday') + + def process_skip_conditions_of_card(self, note: Note, focus_morphs: list[str]) -> bool: + # skip conditions set in preferences GUI + is_comprehension_card = note.has_tag(get_preference('Tag_Comprehension')) + is_fresh_vocab = note.has_tag(get_preference('Tag_Fresh')) + is_already_known = note.has_tag(get_preference('Tag_AlreadyKnown')) + + if is_comprehension_card: + if self.skip_comprehension: + self.skipped_cards['comprehension'] += 1 + return True + elif is_fresh_vocab: + if self.skip_fresh: + self.skipped_cards['fresh'] += 1 + return True + elif is_already_known: # the user requested that the vocabulary does not have to be shown + self.skipped_cards['known'] += 1 + return True + elif self.skip_focus_morph_seen_today and any([focus in seen_morphs for focus in focus_morphs]): + self.skipped_cards['today'] += 1 + return True + + return False + + def skipped_at_least_one_card(self): + for key in self.skipped_cards.keys(): + if self.skipped_cards[key] > 0: + return True + return False + + def show_tooltip_of_skipped_cards(self): + skipped_string = '' + + if self.skipped_cards['comprehension'] > 0: + skipped_string += f"Skipped {self.skipped_cards['comprehension']} comprehension cards" + if self.skipped_cards['fresh'] > 0: + if skipped_string != '': + skipped_string += '
' + skipped_string += f"Skipped {self.skipped_cards['fresh']} cards with fresh vocab" + if self.skipped_cards['known'] > 0: + if skipped_string != '': + skipped_string += '
' + skipped_string += f"Skipped {self.skipped_cards['known']} already known vocab cards" + if self.skipped_cards['today'] > 0: + if skipped_string != '': + skipped_string += '
' + skipped_string += f"Skipped {self.skipped_cards['today']} cards with focus morph already seen today" + + tooltip(skipped_string) diff --git a/morph/stats.py b/morph/stats.py deleted file mode 100644 index 9b1adc57..00000000 --- a/morph/stats.py +++ /dev/null @@ -1,95 +0,0 @@ -# -*- coding: utf-8 -*- -import gzip -import pickle as pickle - -from anki.hooks import wrap -from anki.lang import _ -from aqt import toolbar -from aqt.utils import tooltip - -from .util import mw -from .preferences import get_preference as cfg - -from .errors.profileNotYetLoadedException import ProfileNotYetLoadedException - -def getStatsPath(): return cfg('path_stats') - - -def loadStats(): - try: - f = gzip.open(getStatsPath()) - d = pickle.load(f) - f.close() - return d - except IOError: # file DNE => create it - return updateStats() - except ProfileNotYetLoadedException: # profile not loaded yet, can't do anything but wait - return None - - -def saveStats(d): - f = gzip.open(getStatsPath(), 'wb') - pickle.dump(d, f, -1) - f.close() - - -def updateStats(known_db=None): - mw.progress.start(label='Updating stats', immediate=True) - - from .morphemes import MorphDb - - # Load known.db and get total morphemes known - if known_db is None: - known_db = MorphDb(cfg('path_known'), ignoreErrors=True) - - d = {'totalVariations': len(known_db.db), 'totalKnown': len(known_db.groups)} - - saveStats(d) - mw.progress.finish() - return d - - -def getStatsLink(): - d = loadStats() - if not d: - return 'K ???', '????' - - total_known = d.get('totalKnown', 0) - total_variations = d.get('totalVariations', total_known) - - name = 'K %d V %d' % (total_known, total_variations) - details = 'Total known morphs' - return name, details - - -def on_morph_link_clicked(): - tooltip("Total known morphs") - - -def on_top_toolbar_did_init_links(links, toolbar): - name, details = getStatsLink() - links.append( - toolbar.create_link( - "morph", _(name), on_morph_link_clicked, tip=_(details), id="morph" - ) - ) - -try: - from aqt import gui_hooks - gui_hooks.top_toolbar_did_init_links.append(on_top_toolbar_did_init_links) -except: - # Support for legacy Anki before 2.1.22 - def my_centerLinks(self, _old): - name, details = getStatsLink() - links = [ - ["decks", _("Decks"), _("Shortcut key: %s") % "D"], - ["add", _("Add"), _("Shortcut key: %s") % "A"], - ["browse", _("Browse"), _("Shortcut key: %s") % "B"], - ["stats", _("Stats"), _("Shortcut key: %s") % "T"], - ["sync", _("Sync"), _("Shortcut key: %s") % "Y"], - ["morph", _(name), _(details)], - ] - return self._linkHTML(links) - - toolbar.Toolbar._centerLinks = wrap(toolbar.Toolbar._centerLinks, my_centerLinks, 'around') - diff --git a/morph/text_utils.py b/morph/text_utils.py index 24d33847..bc00fc8c 100644 --- a/morph/text_utils.py +++ b/morph/text_utils.py @@ -4,7 +4,7 @@ from .morphemes import getMorphemes from .morphemizer import getMorphemizerByName from .preferences import get_preference as cfg -from .util import getFilterByMidAndTags, allDb +from .util import getFilterByMidAndTags, get_all_db def nonSpanSub(sub, repl, string): return ''.join(re.sub(sub, repl, s, flags=re.IGNORECASE) if not s.startswith('= cfg('threshold_known')): diff --git a/morph/util.py b/morph/util.py index a7768452..0787e525 100644 --- a/morph/util.py +++ b/morph/util.py @@ -1,25 +1,22 @@ # -*- coding: utf-8 -*- import codecs import datetime +from functools import partial from os import path +from typing import Any, Dict, List, Optional, Callable, TypeVar -from anki.hooks import addHook -from anki.notes import Note from aqt import mw from aqt.browser import Browser from aqt.qt import * from aqt.utils import showCritical, showInfo -from .preferences import get_preference, init_preferences -# hack: typing is compile time anyway, so, nothing bad happens if it fails, the try is to support anki < 2.1.16 -try: - from aqt.pinnedmodules import typing # pylint: disable=W0611 # See above hack comment - from typing import Any, Dict, List, Optional, Callable, TypeVar +from anki.hooks import addHook +from anki.notes import Note - T = TypeVar('T') +from .preferences import get_preference +from .morphemes import MorphDb -except ImportError: - pass +T = TypeVar('T') ############################################################################### # Global data @@ -27,7 +24,7 @@ _allDb = None -def allDb(): +def get_all_db() -> MorphDb: global _allDb # Force reload if all.db got deleted @@ -45,29 +42,27 @@ def allDb(): ############################################################################### -addHook('profileLoaded', init_preferences) -# ToDo: - move this hook to better home - - -def getFilter(note): - # type: (Note) -> Optional[dict] - return getFilterByTagsAndType(note.model()['name'], note.tags) +# Filters are the 'note filter' option in morphman gui preferences on which note types they want morphman to handle +# If a note is matched multiple times only the first filter in the list will be used +def get_filter(note: Note) -> Optional[dict]: # TODO: redundant function? + note_type = note.note_type()['name'] + return get_filter_by_type_and_tags(note_type, note.tags) def getFilterByMidAndTags(mid, tags): # type: (Any, List[str]) -> Optional[Dict[...]] - return getFilterByTagsAndType(mw.col.models.get(mid)['name'], tags) + return get_filter_by_type_and_tags(mw.col.models.get(mid)['name'], tags) -def getFilterByTagsAndType(type, tags): - # type: (str, List[str]) -> Optional[Dict[...]] - for f in get_preference('Filter'): - if type != f['Type'] and f['Type'] is not None: # None means all note types are ok - continue - if not set(f['Tags']) <= set(tags): - continue # required tags have to be subset of actual tags - return f - return None +def get_filter_by_type_and_tags(note_type: str, note_tags: List[str]) -> Optional[dict]: + for note_filter in get_preference('Filter'): + if note_type == note_filter['Type'] or note_filter['Type'] is None: # None means 'All note types' is selected + note_tags = set(note_tags) + note_filter_tags = set(note_filter['Tags']) + if note_filter_tags.issubset(note_tags): # required tags have to be subset of actual tags + return note_filter + return None # card did not match (note type and tags) set in preferences GUI + def getReadEnabledModels(): included_types = set() @@ -81,6 +76,7 @@ def getReadEnabledModels(): break return included_types, include_all + def getModifyEnabledModels(): included_types = set() include_all = False @@ -93,6 +89,7 @@ def getModifyEnabledModels(): break return included_types, include_all + ############################################################################### # Fact browser utils ############################################################################### @@ -103,12 +100,11 @@ def doOnNoteSelection(b, preF, perF, postF, progLabel): return nids = b.selectedNotes() - mw.progress.start(label=progLabel, max=len(nids)) + nids_length = len(nids) for i, nid in enumerate(nids): - mw.progress.update(value=i) + mw.taskman.run_on_main(partial(mw.progress.update, label=progLabel, max=nids_length, value=i)) n = mw.col.getNote(nid) st = perF(st, n) - mw.progress.finish() st = postF(st) mw.col.updateFieldCache(nids) @@ -194,6 +190,7 @@ def mkBtn(txt, f, parent): parent.addWidget(b) return b + ############################################################################### # Mplayer settings ############################################################################### @@ -205,5 +202,6 @@ def wrapper(*args, **kwargs): if not wrapper.has_run: wrapper.has_run = True return f(*args, **kwargs) + wrapper.has_run = False return wrapper