-
Notifications
You must be signed in to change notification settings - Fork 0
/
zff2augustus_gbk.pl
executable file
·89 lines (79 loc) · 2.34 KB
/
zff2augustus_gbk.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/perl -w
# This script assumes very specific ZFF right now, that you are passing in
# export.ann and export.dna that were processed from fathom in SNAP
use strict;
use List::Util qw(min max);
use File::Spec;
use Getopt::Long;
use Bio::DB::Fasta;
use Bio::SeqIO;
use Bio::Location::Split;
use Bio::Location::Simple;
my $zff = shift @ARGV || 'export.ann';
my $genome = shift @ARGV || 'export.dna';
if( ! defined $zff || ! defined $genome ) {
die "usage: zff2augustus_gbk.pl <zff> <genomefasta>\n";
}
my $dbh = Bio::DB::Fasta->new($genome);
my $out = Bio::SeqIO->new(-format => 'genbank',-fh => \*STDOUT);
open(my $fh => $zff) || die $!;
my $seq;
my @location;
while (<$fh>) {
if (/^>(\S+)/) {
if( $seq ) {
# process the previous sequence first
my $loc;
if( @location == 1 ){
$loc = shift @location;
} else {
$loc = Bio::Location::Split->new();
for my $locsub ( sort { $a->start <=> $b->start } @location) {
$loc->add_sub_Location( $locsub );
}
}
my $gene = Bio::SeqFeature::Generic->new(-primary_tag => 'CDS',
-location => $loc);
$seq->add_SeqFeature($gene);
$out->write_seq($seq);
@location = ();
}
my $seqid = $1;
my $seqstr = $dbh->seq($seqid);
if( ! defined $seqstr ) {
die("cannot find $seqid in the input file $genome\n");
}
$seq= Bio::Seq->new(-seq => $seqstr, -id => $seqid);
$seq->add_SeqFeature(Bio::SeqFeature::Generic->new(-primary_tag => 'source',
-start => 1,
-end => $dbh->length($seqid)));
} else {
my @f = split;
if( @f != 4 && @f != 9 ) {
die "input does not appear to be ZFF";
}
my $strand = $f[1] < $f[2] ? '1' : '-1';
if ($strand < 0) {($f[1], $f[2]) = ($f[2], $f[1])}
my $id = pop @f;
# warn("start,end are $f[1], $f[2]\n");
push @location, Bio::Location::Simple->new
(-start => $f[1],
-end => $f[2],
-strand => $strand);
# warn("loc is ",$location[-1]->to_FTstring(), "\n");
}
}
# fencepost, gotta do this again for the last one
my $loc;
if( @location == 1 ){
$loc = shift @location;
} else {
$loc = Bio::Location::Split->new();
for my $locsub ( sort { $a->start <=> $b->start } @location) {
$loc->add_sub_Location( $locsub );
}
}
my $gene = Bio::SeqFeature::Generic->new(-primary_tag => 'CDS',
-location => $loc);
$seq->add_SeqFeature($gene);
$out->write_seq($seq);