Skip to content

Commit

Permalink
Merge pull request #1 from sannalda/development
Browse files Browse the repository at this point in the history
Adding annotation checker and NCBI submission formatter
  • Loading branch information
sannalda authored Feb 6, 2024
2 parents cef2b1b + b667180 commit c0bcfad
Show file tree
Hide file tree
Showing 10 changed files with 403 additions and 18 deletions.
26 changes: 20 additions & 6 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
workdir: "/scratch/sjannalda/projects/PlastidTutorial"
# Working directory where all the analysis will run. This should be in "scratch" or similar folder
workdir: "/scratch/sjannalda/projects/PlastidPipelineTesting"

samples:
ERR5529317: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5529317/ERR5529317
ERR5529436: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5529436/ERR5529436
ERR5554746: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5554746/ERR5554746

#SRR17032099: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/SRR17032099/SRR17032099
SRR12917849: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/SRR12917849/SRR12917849
#SRR12917857: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/SRR12917857/SRR12917857
#ERR5529317: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5529317/ERR5529317
#ERR5529436: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5529436/ERR5529436
#ERR5554746: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5554746/ERR5554746
#ERR5529299: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/ERR5529299/ERR5529299
#Am09: /scratch/sjannalda/projects/Am09/run1/data/AM0909
#Am21: /scratch/sjannalda/projects/Am21/run1/data/AM2134
#SRR17032105: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/SRR17032105/SRR17032105
#SRR12432532: /scratch/sjannalda/projects/PlastidPipelineTesting/rawdata/SRR12432532/SRR12432532

### Pipeline Options
Trimming: TRUE
Standardization: TRUE
Submission: TRUE



Expand Down Expand Up @@ -54,4 +63,9 @@ MultiGenBank: True # Refers to generate multi-GenBank
lsc_gene: "rbcL" # Gene located in LSC. Default is "rbcL" (previously used "psbA" with direction -1)
lsc_gene_dir: 1 # Gene direction of lsc. 1 is forward, -1 is reverse
ssc_gene: "ndhF" # Gene located in SSC
ssc_gene_dir: 1 # Gene direction of ssc. 1 is forward, -1 is reverse
ssc_gene_dir: 1 # Gene direction of ssc. 1 is forward, -1 is reverse



### Submission Options
metadata_file: "/home/sjannalda/bgbm/projects/PlastidPipeline/config/metadata.txt"
15 changes: 15 additions & 0 deletions config/metadata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
AUTHORS Siddarth Annaldasula, Katja Reichel
INSTITUTION Institut fuer Biologie-Botanik, Freie Universitaet Berlin, Altensteinstrasse 6, Berlin, Berlin 14195,Germany
SAMPLE AM09_01
SOURCE Arnica montana L. silica-dried leaf sample
SPECIES Arnica montana
TAXONOMY cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliopsida; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Madieae; Arnicinae; Arnica
TAXREF 436207
SOURCE_MOD
/altitude="0 m"
/collected_by="Esther Sossai, Elke Zippel"
/collection_date="17-Jun-2023"
/country="Germany:Mecklenburg-Vorpommern:Barth"
/identified_by="Elke Zippel"
/lat_lon="54.393536 N 12.701389 E"
/tissue_type="leaf"
8 changes: 7 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ rule all:
expand("{sample}/qc/filtering/filtering_report_{sample}.html", sample=config["samples"]),
expand("{sample}/annotation/{sample}.standardardized.gb", sample=config["samples"]) if (config["Standardization"]) else expand("{sample}/annotation/{sample}.original.gb",sample=config["samples"]),
expand("{sample}/annotation/{sample}.standardardized.fasta", sample=config["samples"]) if (config["Standardization"]) else expand("{sample}/assembly/{sample}.original.fasta",sample=config["samples"]),
#expand("{sample}/qc/backmapping/{sample}.backmapping.readcoverage.log", sample=config["samples"]),
expand("{sample}/annotation/{sample}.standardardized.cleaned.gb", sample=config["samples"]) if (config["Standardization"]) else expand("{sample}/annotation/{sample}.original.cleaned.gb",sample=config["samples"]),
expand("{sample}/annotation/{sample}.submission.gb", sample=config["samples"]) if (config["Submission"]) else "",
expand("{sample}/qc/backmapping/{sample}.backmapping.readcoverage.log", sample=config["samples"]),




include: "rules/Preprocessing.smk"

include: "rules/Trimming.smk"
Expand All @@ -22,4 +26,6 @@ include: "rules/Annotation.smk"

include: "rules/Backmapping.smk"

include: "rules/Submission.smk"

localrules: AnnotationPreStandardization, AnnotationPostStandardization
11 changes: 8 additions & 3 deletions workflow/envs/Annotation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ dependencies:
- biopython=1.81=py311h2582759_0
- brotli-python=1.1.0=py311hb755f60_0
- bzip2=1.0.8=h7f98852_4
- ca-certificates=2023.7.22=hbcca054_0
- certifi=2023.7.22=pyhd8ed1ab_0
- ca-certificates=2024.2.2=hbcca054_0
- certifi=2024.2.2=pyhd8ed1ab_0
- charset-normalizer=3.2.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- exceptiongroup=1.1.3=pyhd8ed1ab_0
Expand All @@ -38,19 +38,24 @@ dependencies:
- libzlib=1.2.13=hd590300_5
- ncurses=6.4=hcb278e6_0
- numpy=1.26.0=py311h64a7726_0
- openssl=3.1.3=hd590300_0
- openssl=3.2.1=hd590300_0
- outcome=1.2.0=pyhd8ed1ab_0
- packaging=23.1=pyhd8ed1ab_0
- pandas=2.2.0=py311h320fe9a_0
- pip=23.2.1=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.11.5=hab00c5b_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-dotenv=1.0.0=pyhd8ed1ab_1
- python-tzdata=2023.4=pyhd8ed1ab_0
- python_abi=3.11=4_cp311
- pytz=2024.1=pyhd8ed1ab_0
- readline=8.2=h8228510_1
- requests=2.31.0=pyhd8ed1ab_0
- selenium=4.12.0=pyhd8ed1ab_0
- selenium-manager=4.12.0=he8a937b_0
- setuptools=68.2.2=pyhd8ed1ab_0
- six=1.16.0=pyh6c4a22f_0
- sniffio=1.3.0=pyhd8ed1ab_0
- sortedcontainers=2.4.0=pyhd8ed1ab_0
- tk=8.6.12=h27826a3_0
Expand Down
67 changes: 67 additions & 0 deletions workflow/envs/Annotation_old.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: annotation
channels:
- conda-forge
- bioconda
- defaults
- r
- default
- anaconda
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- attrs=23.1.0=pyh71513ae_1
- biopython=1.81=py311h2582759_0
- brotli-python=1.1.0=py311hb755f60_0
- bzip2=1.0.8=h7f98852_4
- ca-certificates=2023.7.22=hbcca054_0
- certifi=2023.7.22=pyhd8ed1ab_0
- charset-normalizer=3.2.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- exceptiongroup=1.1.3=pyhd8ed1ab_0
- h11=0.14.0=pyhd8ed1ab_0
- idna=3.4=pyhd8ed1ab_0
- ld_impl_linux-64=2.40=h41732ed_0
- libblas=3.9.0=18_linux64_openblas
- libcblas=3.9.0=18_linux64_openblas
- libexpat=2.5.0=hcb278e6_1
- libffi=3.4.2=h7f98852_5
- libgcc-ng=13.2.0=h807b86a_2
- libgfortran-ng=13.2.0=h69a702a_2
- libgfortran5=13.2.0=ha4646dd_2
- libgomp=13.2.0=h807b86a_2
- liblapack=3.9.0=18_linux64_openblas
- libnsl=2.0.0=h7f98852_0
- libopenblas=0.3.24=pthreads_h413a1c8_0
- libsqlite=3.43.0=h2797004_0
- libstdcxx-ng=13.2.0=h7e041cc_2
- libuuid=2.38.1=h0b41bf4_0
- libzlib=1.2.13=hd590300_5
- ncurses=6.4=hcb278e6_0
- numpy=1.26.0=py311h64a7726_0
- openssl=3.1.3=hd590300_0
- outcome=1.2.0=pyhd8ed1ab_0
- packaging=23.1=pyhd8ed1ab_0
- pip=23.2.1=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.11.5=hab00c5b_0_cpython
- python-dotenv=1.0.0=pyhd8ed1ab_1
- python_abi=3.11=4_cp311
- readline=8.2=h8228510_1
- requests=2.31.0=pyhd8ed1ab_0
- selenium=4.12.0=pyhd8ed1ab_0
- selenium-manager=4.12.0=he8a937b_0
- setuptools=68.2.2=pyhd8ed1ab_0
- sniffio=1.3.0=pyhd8ed1ab_0
- sortedcontainers=2.4.0=pyhd8ed1ab_0
- tk=8.6.12=h27826a3_0
- tqdm=4.66.1=pyhd8ed1ab_0
- trio=0.22.2=py311h38be061_0
- trio-websocket=0.10.4=pyhd8ed1ab_0
- typing_extensions=4.8.0=pyha770c72_0
- tzdata=2023c=h71feb2d_0
- urllib3=2.0.5=pyhd8ed1ab_0
- webdriver-manager=4.0.0=pyhd8ed1ab_0
- wheel=0.41.2=pyhd8ed1ab_0
- wsproto=1.2.0=pyhd8ed1ab_0
- xz=5.2.6=h166bdaf_0
prefix: /home/sjannalda/mambaforge-pypy3/envs/annotation
45 changes: 40 additions & 5 deletions workflow/rules/Annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ rule AnnotationPreStandardization:
output:
"{sample}/annotation/{sample}.original.gb"
resources:
mem_mb=1000,
time="0-00:20:00",
mem_mb=2000,
time="0-00:30:00",
chrome=1
conda:
"../envs/Annotation.yaml"
Expand All @@ -32,11 +32,46 @@ rule AnnotationPostStandardization:
output:
"{sample}/annotation/{sample}.standardardized.gb"
resources:
mem_mb=1000,
time="0-00:20:00",
mem_mb=2000,
time="0-00:30:00",
chrome=1
conda:
"../envs/Annotation.yaml"
localrule: True
script:
"../scripts/GeSeqAutomation.py"
"../scripts/GeSeqAutomation.py"

if (not config["Standardization"]):
rule annotationQualityCheckInputFunc:
input:
"{sample}/assembly/{sample}.original.fasta",
"{sample}/annotation/{sample}.original.gb"
output:
"{sample}/annotation/{sample}.original.cleaned.gb",
"{sample}/annotation/{sample}.original.incorrect.gb"
log:
"{sample}/annotation/{sample}.original.incorrect.log"
resources:
mem_mb=1000,
time="0-0:20:00"
conda:
"../envs/Annotation.yaml"
script:
"../scripts/AnnotationQualityControl.py"
else:
rule annotationQualityCheckInputFunc:
input:
"{sample}/annotation/{sample}.standardardized.fasta",
"{sample}/annotation/{sample}.standardardized.gb"
output:
"{sample}/annotation/{sample}.standardardized.cleaned.gb",
"{sample}/annotation/{sample}.standardardized.incorrect.gb"
log:
"{sample}/annotation/{sample}.standardardized.incorrect.log"
resources:
mem_mb=1000,
time="0-0:20:00"
conda:
"../envs/Annotation.yaml"
script:
"../scripts/AnnotationQualityControl.py"
20 changes: 20 additions & 0 deletions workflow/rules/Submission.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
def annotationSubmissionNCBIInphutFunc(wildcards):
output = [config["metadata_file"]]
if (not config["Standardization"]):
return output + ["{sample}/annotation/{sample}.original.cleaned.gb","{sample}/annotation/{sample}.original.fasta"]
else:
return output + ["{sample}/annotation/{sample}.standardardized.cleaned.gb","{sample}/annotation/{sample}.standardardized.fasta"]


rule annotationSubmissionNCBI:
input:
gb=annotationSubmissionNCBIInphutFunc
output:
"{sample}/annotation/{sample}.submission.gb"
resources:
mem_mb=1000,
time="0-0:20:00"
conda:
"../envs/Annotation.yaml"
script:
"../scripts/AnnotationSubmission.py"
Loading

0 comments on commit c0bcfad

Please sign in to comment.