Skip to content

Commit

Permalink
update canu tag and use per-contig trim setting to avoid trimming tel…
Browse files Browse the repository at this point in the history
…omere while still trimming internal nodes to supported region
  • Loading branch information
skoren committed Sep 24, 2024
1 parent 33c2079 commit 40388bc
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/Snakefiles/7-generateConsensus.sm
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fi
-threads {threads} \\\\
-import ../{input.package} \\\\
-A ../{output.consensus}.WORKING \\\\
-C 1 \\\\
-C 2 \\\\
\$align \\\\
-maxcoverage 50 \\\\
-e 0.05 \\\\
Expand Down
10 changes: 8 additions & 2 deletions src/scripts/get_layout_from_mbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
layout_output = sys.argv[6]
layscf_output = sys.argv[7]

min_contig_no_trim = 500000
min_read_len_fraction = 0.5
min_read_fromend_fraction = min_read_len_fraction/1.5
min_exact_len_fraction = min_read_len_fraction/3
Expand Down Expand Up @@ -243,7 +244,7 @@ def get_exact_match_length(clusters):
# Find all words that
# begin with [<>], contain anything but [
# begin with [N, contain digits and end with N] or N:optional-description]
# we dump the description here and anly keep the N, digits N] part
# we dump the description here and anly keep the N, digits N] part
#
fullname = lp[0]
pathfull = re.findall(r"([<>][^[]+|\[N\d+N(?:[^\]]+){0,1}\])", lp[1])
Expand Down Expand Up @@ -530,6 +531,7 @@ def get_exact_match_length(clusters):
# contig actually has pieces, output the scaffold map. (The header line
# output from rukki looks like a contig with no pieces.)
#
no_trim = set()
nameid = 1
for contig in sorted(contig_pieces.keys()):
npieces = ngaps = nempty = 0
Expand Down Expand Up @@ -615,6 +617,7 @@ def get_exact_match_length(clusters):
print(f"path {outname} {contig}", file=scf_layout_file)

for line in contig_pieces[contig]:
if len(contig_pieces[contig]) > 2 and (line == contig_pieces[contig][0] or line == contig_pieces[contig][-2]): no_trim.add(line)
print(line, file=scf_layout_file)

nameid += 1
Expand All @@ -623,7 +626,6 @@ def get_exact_match_length(clusters):

del nameid


for contig in sorted(contig_actual_lines.keys()):
if len(contig_actual_lines[contig]) == 0: continue
assert len(contig_actual_lines[contig]) > 0
Expand All @@ -637,6 +639,10 @@ def get_exact_match_length(clusters):
end_pos = max(end_pos, line[2])
print(f"tig\t{contig}", file=tig_layout_file)
print(f"len\t{end_pos - start_pos}", file=tig_layout_file)
if end_pos-start_pos >= min_contig_no_trim or contig in no_trim:
print(f"trm\t1", file=tig_layout_file)
else:
print(f"trm\t0", file=tig_layout_file)
print(f"rds\t{len(contig_actual_lines[contig])}", file=tig_layout_file)
for line in contig_actual_lines[contig]:
bgn = line[1] - start_pos
Expand Down

0 comments on commit 40388bc

Please sign in to comment.