Skip to content

Commit

Permalink
fix for non-scaffolding in alternative direction
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry-Antipov committed Sep 26, 2024
1 parent cf66459 commit 55e9c6e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/Snakefiles/8-hicPipeline.sm
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ rule HiC_rdnascaff:
unitigs_telo = '8-hicPipeline/unitigs.telo',
unitigs_nohpc50 = '8-hicPipeline/unitigs_nonhpc50.mashmap',
path_mashmap = rules.prepareScaffolding.output.path_mashmap,
contigs = rules.copyFiles.output.unitig_fasta,
prefiltered_aln = rules.scaffoldMergeBWA.output.alignments if config['withPOREC'] == "False" else rules.transformBWA.output.byread_mapping
output:
scaff_tsv_path = '8-hicPipeline/scaff_rukki.paths.tsv',
Expand Down
39 changes: 35 additions & 4 deletions src/scripts/scaffolding/scaffold_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def getScores(self, or_path_id, path_storage, type):
continue
next_nor_path = gf.nor_path_id(next_or_path)
#TODO: optimize homology counting, once per all paths
if next_nor_path in homologous_paths:
if next_nor_path in homologous_paths or next_nor_path == nor_path_id:
continue
else:
if not precounted:
Expand Down Expand Up @@ -562,6 +562,8 @@ def generateScaffolds(self):
cur_scaffold = [or_from_path_id]
cur_path_id = or_from_path_id
nor_used_path_ids.add(or_from_path_id.strip('-+'))
#lets add bidirectional expansion
tried_reverse = False
while True:
next_path_id = self.findNextPath(cur_path_id, nor_used_path_ids, "weight")
if next_path_id == "NONE":
Expand All @@ -570,10 +572,29 @@ def generateScaffolds(self):

if next_path_id == "DONE":
self.logger.info ("All done, stopped at telomere")
break
if tried_reverse:
break
else:
self.logger.info (f"Reversing {cur_scaffold}")
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
cur_scaffold.reverse()
self.logger.info (f"Reversed {cur_scaffold}")
cur_path_id = cur_scaffold[-1]
tried_reverse = True
continue
elif next_path_id == "NONE":
self.logger.info ("Failed to find extension, stopping")
break
if tried_reverse:
break
else:
self.logger.info (f"Reversing {cur_scaffold}")
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
cur_scaffold.reverse()
self.logger.info (f"Reversed {cur_scaffold}")
cur_path_id = cur_scaffold[-1]
tried_reverse = True
continue

else:
hom_before = False
nor_new_path_id = gf.nor_path_id(next_path_id)
Expand All @@ -587,13 +608,23 @@ def generateScaffolds(self):
hom_before = True
if hom_before:
self.logger.info (f"Homologous paths before in scaffold, not extending {cur_path_id} with {next_path_id}")
break
if tried_reverse:
break
else:
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
cur_scaffold.reverse()
cur_path_id = cur_scaffold[-1]
tried_reverse = True
continue
self.logger.info (f"Extending {cur_path_id} with {next_path_id}")

#possibly not do so with paths of length one? They can be more successful in other direction
cur_scaffold.append(next_path_id)
nor_used_path_ids.add(next_path_id.strip('-+'))
cur_path_id = next_path_id
#Reversing for comparison with previous runs only
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
cur_scaffold.reverse()
self.logger.info(f"scaffold {cur_scaffold}\n")
if len(cur_scaffold) > 1:
res.append(cur_scaffold)
Expand Down

0 comments on commit 55e9c6e

Please sign in to comment.