Skip to content

Commit

Permalink
PERF: only store the needed suffix arrays in mem
Browse files Browse the repository at this point in the history
Might help a bit with memory usage
  • Loading branch information
fedarko committed Sep 15, 2023
1 parent 395cabb commit 1cdb301
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions wotplot/_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,6 @@ def _mlog(s):
_mlog("computing suffix array for s2...")
s2_sa = _get_suffix_array(s2)

_mlog("computing ReverseComplement(s2)...")
rcs2 = rc(s2)
_mlog("computing suffix array for ReverseComplement(s2)...")
rcs2_sa = _get_suffix_array(rcs2)

# Find k-mers that are shared between both strings (not considering
# reverse-complementing)
matches = {}
Expand All @@ -323,6 +318,15 @@ def _mlog(s):
s1, s2, k, s1_sa, s2_sa, matches, yorder=yorder, binary=binary
)
_mlog(f"found {len(matches):,} forward match cell(s).")
# I'm not sure if this is needed, but we might as well be very clear that
# "hey this big chunky suffix array is now unnecessary please garbage
# collect it"
del s2_sa

_mlog("computing ReverseComplement(s2)...")
rcs2 = rc(s2)
_mlog("computing suffix array for ReverseComplement(s2)...")
rcs2_sa = _get_suffix_array(rcs2)

_mlog("finding matches between s1 and ReverseComplement(s2)...")
_fill_match_cells(
Expand All @@ -337,6 +341,7 @@ def _mlog(s):
s2isrc=True,
)
_mlog(f"found {len(matches):,} total match cell(s).")
del rcs2_sa
density = 100 * (len(matches) / (mat_shape[0] * mat_shape[1]))
_mlog(f"density = {density:.2f}%.")

Expand Down

0 comments on commit 1cdb301

Please sign in to comment.