diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..2133859
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,2 @@
+bin/quince_chiphmmnew
+tmp
diff --git a/dist.ini b/dist.ini
new file mode 100644
index 0000000..383a95e
--- /dev/null
+++ b/dist.ini
@@ -0,0 +1,25 @@
+name    = DETCT
+author  = James Morris <james.morris2@sanger.ac.uk>
+author  = Ian Sealy <ian.sealy@sanger.ac.uk>
+license = GPL_3
+copyright_holder = Genome Research Ltd
+copyright_year   = 2013
+version = 0.1.0
+
+[@Basic]
+[ExecDir]
+dir = script
+[FileFinder::ByName / ScriptNotR]
+dir = script
+skip = .*\.R$
+[ModuleBuild]
+[PodWeaver]
+finder = :InstallModules
+finder = ScriptNotR
+[PodCoverageTests]
+[PodSyntaxTests]
+[Test::Perl::Critic]
+[PerlTidy]
+[AutoPrereqs]
+[PkgVersion]
+[Test::Compile]
diff --git a/lib/DETCT.pm b/lib/DETCT.pm
new file mode 100644
index 0000000..8777d00
--- /dev/null
+++ b/lib/DETCT.pm
@@ -0,0 +1,21 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT;
+## use critic
+
+# ABSTRACT: Transcript Counting API
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-18
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+1;
diff --git a/lib/DETCT/Analysis.pm b/lib/DETCT/Analysis.pm
new file mode 100644
index 0000000..0899cd4
--- /dev/null
+++ b/lib/DETCT/Analysis.pm
@@ -0,0 +1,1569 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Analysis;
+## use critic
+
+# ABSTRACT: Object representing an analysis of a collection of samples
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-19
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+use List::MoreUtils qw( uniq );
+use YAML::Tiny;
+use Data::Compare;
+use DETCT::Sample;
+use DETCT::Sequence;
+use DETCT::Misc::BAM;
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private name               => my %name;               # e.g. zmp_ph1
+private sample             => my %sample;             # arrayref of samples
+private sequence           => my %sequence;           # arrayref of sequences
+private read1_length       => my %read1_length;       # e.g. 30
+private read2_length       => my %read2_length;       # e.g. 54
+private mismatch_threshold => my %mismatch_threshold; # e.g. 2
+private bin_size           => my %bin_size;           # e.g. 100
+private peak_buffer_width  => my %peak_buffer_width;  # e.g. 100
+private hmm_sig_level      => my %hmm_sig_level;      # e.g. 0.001
+private hmm_binary         => my %hmm_binary;         # e.g. ~/quince_chiphmmnew
+private r_binary           => my %r_binary;           # e.g. R
+private deseq_script       => my %deseq_script;       # e.g. ~/run_deseq.R
+private output_sig_level   => my %output_sig_level;   # e.g. 0.05
+private ref_fasta          => my %ref_fasta;          # e.g. zv9.fa
+private fasta_index        => my %fasta_index;        # Bio::DB::Sam::Fai
+private ensembl_host    => my %ensembl_host;       # e.g. ensembldb.ensembl.org
+private ensembl_port    => my %ensembl_port;       # e.g. 3306
+private ensembl_user    => my %ensembl_user;       # e.g. anonymous
+private ensembl_pass    => my %ensembl_pass;       # e.g. secret
+private ensembl_name    => my %ensembl_name;       # e.g. zv9_core
+private ensembl_species => my %ensembl_species;    # e.g. danio_rerio
+private slice_adaptor => my %slice_adaptor; # Bio::EnsEMBL::DBSQL::SliceAdaptor
+private chunk_total   => my %chunk_total;   # e.g. 20
+private chunk         => my %chunk;         # arrayref of arrayrefs of sequences
+private test_chunk    => my %test_chunk;    # e.g. 1
+
+# Constants
+Readonly our $MAX_NAME_LENGTH      => 128;
+Readonly our $DEFAULT_ENSEMBL_HOST => 'ensembldb.ensembl.org';
+Readonly our $DEFAULT_ENSEMBL_USER => 'anonymous';
+
+=method new
+
+  Usage       : my $analysis = DETCT::Analysis->new( {
+                    name               => 'zmp_ph1',
+                    read1_length       => 30,
+                    read2_length       => 54,
+                    mismatch_threshold => 2,
+                    bin_size           => 100,
+                    peak_buffer_width  => 100,
+                    hmm_sig_level      => 0.001,
+                    hmm_binary         => 'bin/quince_chiphmmnew',
+                    r_binary           => 'R',
+                    deseq_script       => 'script/run_deseq.R',
+                    output_sig_level   => 0.05,
+                    chunk_total        => 20,
+                } );
+  Purpose     : Constructor for analysis objects
+  Returns     : DETCT::Analysis
+  Parameters  : Hashref {
+                    name               => String,
+                    read1_length       => Int,
+                    read2_length       => Int,
+                    mismatch_threshold => Int,
+                    bin_size           => Int,
+                    peak_buffer_width  => Int,
+                    hmm_sig_level      => Float,
+                    hmm_binary         => String,
+                    r_binary           => String,
+                    deseq_script       => String,
+                    output_sig_level   => Float,
+                    ref_fasta          => String or undef,
+                    ensembl_host       => String or undef,
+                    ensembl_port       => Int or undef,
+                    ensembl_user       => String or undef,
+                    ensembl_pass       => String or undef,
+                    ensembl_name       => String or undef,
+                    ensembl_species    => String or undef,
+                    chunk_total        => Int,
+                    test_chunk         => Int or undef,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_name( $arg_ref->{name} );
+    $self->set_read1_length( $arg_ref->{read1_length} );
+    $self->set_read2_length( $arg_ref->{read2_length} );
+    $self->set_mismatch_threshold( $arg_ref->{mismatch_threshold} );
+    $self->set_bin_size( $arg_ref->{bin_size} );
+    $self->set_peak_buffer_width( $arg_ref->{peak_buffer_width} );
+    $self->set_hmm_sig_level( $arg_ref->{hmm_sig_level} );
+    $self->set_hmm_binary( $arg_ref->{hmm_binary} );
+    $self->set_r_binary( $arg_ref->{r_binary} );
+    $self->set_deseq_script( $arg_ref->{deseq_script} );
+    $self->set_output_sig_level( $arg_ref->{output_sig_level} );
+    $self->set_ref_fasta( $arg_ref->{ref_fasta} );
+    $self->set_ensembl_host( $arg_ref->{ensembl_host} );
+    $self->set_ensembl_port( $arg_ref->{ensembl_port} );
+    $self->set_ensembl_user( $arg_ref->{ensembl_user} );
+    $self->set_ensembl_pass( $arg_ref->{ensembl_pass} );
+    $self->set_ensembl_name( $arg_ref->{ensembl_name} );
+    $self->set_ensembl_species( $arg_ref->{ensembl_species} );
+    $self->set_chunk_total( $arg_ref->{chunk_total} );
+    $self->set_test_chunk( $arg_ref->{test_chunk} );
+    return $self;
+}
+
+=method new_from_yaml
+
+  Usage       : my $analysis = DETCT::Analysis->new_from_yaml( 'zmp_ph1.yaml' );
+  Purpose     : Constructor for creating analysis objects from a YAML file
+  Returns     : DETCT::Analysis
+  Parameters  : String (the YAML file)
+  Throws      : If YAML file is missing or not readable
+  Comments    : None
+
+=cut
+
+sub new_from_yaml {
+    my ( $class, $yaml_file ) = @_;
+    my $self = register($class);
+
+    confess "YAML file ($yaml_file) does not exist or cannot be read"
+      if !-r $yaml_file;
+
+    my $yaml = YAML::Tiny->read($yaml_file);
+
+    $self->set_name( $yaml->[0]->{name} );
+    $self->set_read1_length( $yaml->[0]->{read1_length} );
+    $self->set_read2_length( $yaml->[0]->{read2_length} );
+    $self->set_mismatch_threshold( $yaml->[0]->{mismatch_threshold} );
+    $self->set_bin_size( $yaml->[0]->{bin_size} );
+    $self->set_peak_buffer_width( $yaml->[0]->{peak_buffer_width} );
+    $self->set_hmm_sig_level( $yaml->[0]->{hmm_sig_level} );
+    $self->set_hmm_binary( $yaml->[0]->{hmm_binary} );
+    $self->set_r_binary( $yaml->[0]->{r_binary} );
+    $self->set_deseq_script( $yaml->[0]->{deseq_script} );
+    $self->set_output_sig_level( $yaml->[0]->{output_sig_level} );
+    $self->set_ref_fasta( $yaml->[0]->{ref_fasta} );
+    $self->set_ensembl_host( $yaml->[0]->{ensembl_host} );
+    $self->set_ensembl_port( $yaml->[0]->{ensembl_port} );
+    $self->set_ensembl_user( $yaml->[0]->{ensembl_user} );
+    $self->set_ensembl_pass( $yaml->[0]->{ensembl_pass} );
+    $self->set_ensembl_name( $yaml->[0]->{ensembl_name} );
+    $self->set_ensembl_species( $yaml->[0]->{ensembl_species} );
+    $self->set_chunk_total( $yaml->[0]->{chunk_total} );
+    $self->set_test_chunk( $yaml->[0]->{test_chunk} );
+
+    foreach my $sample_hash ( @{ $yaml->[0]->{samples} } ) {
+        my $sample = DETCT::Sample->new(
+            {
+                name        => $sample_hash->{name},
+                description => $sample_hash->{description},
+                condition   => $sample_hash->{condition},
+                group       => $sample_hash->{group},
+                tag         => $sample_hash->{tag},
+                bam_file    => $sample_hash->{bam_file},
+            }
+        );
+        $self->add_sample( $sample, 1 );    # 1 = do not validate
+    }
+
+    $self->_validate();
+
+    return $self;
+}
+
+=method name
+
+  Usage       : my $name = $analysis->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "zmp_ph1")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $analysis->set_name('zmp_ph1');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name is missing
+#               If name is empty
+#               If name > $MAX_NAME_LENGTH characters
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+
+    confess 'No name specified' if !defined $name;
+    confess 'Empty name specified' if !length $name;
+    confess "Name ($name) longer than $MAX_NAME_LENGTH characters"
+      if length $name > $MAX_NAME_LENGTH;
+
+    return $name;
+}
+
+=method add_sample
+
+  Usage       : $analysis->add_sample($sample);
+  Purpose     : Add a sample to an analysis
+  Returns     : undef
+  Parameters  : DETCT::Sample
+                Defined or undef (indicating if validation is needed)
+  Throws      : If sample is missing or invalid (i.e. not a DETCT::Sample
+                object)
+  Comments    : None
+
+=cut
+
+sub add_sample {
+    my ( $self, $sample, $no_validaton ) = @_;
+
+    confess 'No sample specified' if !defined $sample;
+    confess 'Class of sample (', ref $sample, ') not DETCT::Sample'
+      if !$sample->isa('DETCT::Sample');
+
+    if ( !exists $sample{ id $self} ) {
+        $sample{ id $self} = [$sample];
+        $self->add_all_sequences( $sample->bam_file );    # Because first sample
+    }
+    else {
+        push @{ $sample{ id $self} }, $sample;
+    }
+
+    if ( !defined $no_validaton ) {
+        $self->_validate();
+    }
+
+    return;
+}
+
+=method get_all_samples
+
+  Usage       : $samples = $analysis->get_all_samples();
+  Purpose     : Get all samples of an analysis
+  Returns     : Arrayref of DETCT::Sample objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_samples {
+    my ($self) = @_;
+
+    return $sample{ id $self} || [];
+}
+
+=method add_all_sequences
+
+  Usage       : $analysis->add_all_sequences($bam_file);
+  Purpose     : Add all sequences (sorted by decreasing length) to an analysis
+  Returns     : undef
+  Parameters  : String (the BAM file)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub add_all_sequences {
+    my ( $self, $bam_file ) = @_;
+
+    $bam_file = DETCT::Sample::check_bam_file($bam_file);
+
+    $sequence{ id $self} = [];
+
+    my %len = DETCT::Misc::BAM::get_reference_sequence_lengths($bam_file);
+
+    foreach my $name ( reverse sort { $len{$a} <=> $len{$b} } keys %len ) {
+        my $sequence = DETCT::Sequence->new(
+            {
+                name => $name,
+                bp   => $len{$name},
+            }
+        );
+
+        push @{ $sequence{ id $self} }, $sequence;
+    }
+
+    # Group sequences into chunks
+    $self->add_all_chunks();
+
+    return;
+}
+
+=method get_all_sequences
+
+  Usage       : $sequences = $analysis->get_all_sequences();
+  Purpose     : Get all sequences (sorted by decreasing length) of an analysis
+  Returns     : Arrayref of DETCT::Sequence objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_sequences {
+    my ($self) = @_;
+
+    return $sequence{ id $self} || [];
+}
+
+# Usage       : $analysis->_validate();
+# Purpose     : Check analysis
+# Returns     : 1
+# Parameters  : None
+# Throws      : If reference sequences don't match
+# Comments    : None
+sub _validate {
+    my ($self) = @_;
+
+    my @bam_files = $self->list_all_bam_files();
+
+    # Compare reference sequence from first BAM file to all other BAM files
+    my $first_bam_file = shift @bam_files;
+    my %first_bam_length =
+      DETCT::Misc::BAM::get_reference_sequence_lengths($first_bam_file);
+    foreach my $bam_file (@bam_files) {
+        my %bam_length =
+          DETCT::Misc::BAM::get_reference_sequence_lengths($bam_file);
+        if ( !Compare( \%first_bam_length, \%bam_length ) ) {
+            confess "$first_bam_file and $bam_file use different reference";
+        }
+    }
+
+    return 1;
+}
+
+=method read1_length
+
+  Usage       : my $read1_length = $analysis->read1_length;
+  Purpose     : Getter for read 1 length attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub read1_length {
+    my ($self) = @_;
+    return $read1_length{ id $self};
+}
+
+=method set_read1_length
+
+  Usage       : $analysis->set_read1_length(20);
+  Purpose     : Setter for read 1 length attribute
+  Returns     : undef
+  Parameters  : +ve Int (the read 1 length)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_read1_length {
+    my ( $self, $arg ) = @_;
+    $read1_length{ id $self} = _check_read1_length($arg);
+    return;
+}
+
+# Usage       : $read1_length = _check_read1_length($read1_length);
+# Purpose     : Check for valid read 1 length
+# Returns     : +ve Int (the valid read 1 length)
+# Parameters  : +ve Int (the read 1 length)
+# Throws      : If read 1 length is missing or not a positive integer
+# Comments    : None
+sub _check_read1_length {
+    my ($read1_length) = @_;
+    return $read1_length
+      if defined $read1_length && $read1_length =~ m/\A \d+ \z/xms;
+    confess 'No read 1 length specified' if !defined $read1_length;
+    confess "Invalid read 1 length ($read1_length) specified";
+}
+
+=method read2_length
+
+  Usage       : my $read2_length = $analysis->read2_length;
+  Purpose     : Getter for read 2 length attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub read2_length {
+    my ($self) = @_;
+    return $read2_length{ id $self};
+}
+
+=method set_read2_length
+
+  Usage       : $analysis->set_read2_length(20);
+  Purpose     : Setter for read 2 length attribute
+  Returns     : undef
+  Parameters  : +ve Int (the read 2 length)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_read2_length {
+    my ( $self, $arg ) = @_;
+    $read2_length{ id $self} = _check_read2_length($arg);
+    return;
+}
+
+# Usage       : $read2_length = _check_read2_length($read2_length);
+# Purpose     : Check for valid read 2 length
+# Returns     : +ve Int (the valid read 2 length)
+# Parameters  : +ve Int (the read 2 length)
+# Throws      : If read 2 length is missing or not a positive integer
+# Comments    : None
+sub _check_read2_length {
+    my ($read2_length) = @_;
+    return $read2_length
+      if defined $read2_length && $read2_length =~ m/\A \d+ \z/xms;
+    confess 'No read 2 length specified' if !defined $read2_length;
+    confess "Invalid read 2 length ($read2_length) specified";
+}
+
+=method mismatch_threshold
+
+  Usage       : my $mismatch_threshold = $analysis->mismatch_threshold;
+  Purpose     : Getter for mismatch threshold attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub mismatch_threshold {
+    my ($self) = @_;
+    return $mismatch_threshold{ id $self};
+}
+
+=method set_mismatch_threshold
+
+  Usage       : $analysis->set_mismatch_threshold(20);
+  Purpose     : Setter for mismatch threshold attribute
+  Returns     : undef
+  Parameters  : +ve Int (the mismatch threshold)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_mismatch_threshold {
+    my ( $self, $arg ) = @_;
+    $mismatch_threshold{ id $self} = _check_mismatch_threshold($arg);
+    return;
+}
+
+# Usage       : $mismatch_threshold
+#                   = _check_mismatch_threshold($mismatch_threshold);
+# Purpose     : Check for valid mismatch threshold
+# Returns     : +ve Int (the valid mismatch threshold)
+# Parameters  : +ve Int (the mismatch threshold)
+# Throws      : If mismatch threshold is missing or not a positive integer
+# Comments    : None
+sub _check_mismatch_threshold {
+    my ($mismatch_threshold) = @_;
+    return $mismatch_threshold
+      if defined $mismatch_threshold && $mismatch_threshold =~ m/\A \d+ \z/xms;
+    confess 'No mismatch threshold specified' if !defined $mismatch_threshold;
+    confess "Invalid mismatch threshold ($mismatch_threshold) specified";
+}
+
+=method bin_size
+
+  Usage       : my $bin_size = $analysis->bin_size;
+  Purpose     : Getter for bin size attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub bin_size {
+    my ($self) = @_;
+    return $bin_size{ id $self};
+}
+
+=method set_bin_size
+
+  Usage       : $analysis->set_bin_size(100);
+  Purpose     : Setter for bin size attribute
+  Returns     : undef
+  Parameters  : +ve Int (the bin size)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_bin_size {
+    my ( $self, $arg ) = @_;
+    $bin_size{ id $self} = _check_bin_size($arg);
+    return;
+}
+
+# Usage       : $bin_size = _check_bin_size($bin_size);
+# Purpose     : Check for valid bin size
+# Returns     : +ve Int (the valid bin size)
+# Parameters  : +ve Int (the bin size)
+# Throws      : If bin size is missing or not a positive integer
+# Comments    : None
+sub _check_bin_size {
+    my ($bin_size) = @_;
+    return $bin_size
+      if defined $bin_size && $bin_size =~ m/\A \d+ \z/xms;
+    confess 'No bin size specified' if !defined $bin_size;
+    confess "Invalid bin size ($bin_size) specified";
+}
+
+=method peak_buffer_width
+
+  Usage       : my $peak_buffer_width = $analysis->peak_buffer_width;
+  Purpose     : Getter for peak buffer width attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub peak_buffer_width {
+    my ($self) = @_;
+    return $peak_buffer_width{ id $self};
+}
+
+=method set_peak_buffer_width
+
+  Usage       : $analysis->set_peak_buffer_width(100);
+  Purpose     : Setter for peak buffer width attribute
+  Returns     : undef
+  Parameters  : +ve Int (the peak buffer width)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_peak_buffer_width {
+    my ( $self, $arg ) = @_;
+    $peak_buffer_width{ id $self} = _check_peak_buffer_width($arg);
+    return;
+}
+
+# Usage       : $peak_buffer_width = _check_peak_buffer_width($peak_buffer_width);
+# Purpose     : Check for valid peak buffer width
+# Returns     : +ve Int (the valid peak buffer width)
+# Parameters  : +ve Int (the peak buffer width)
+# Throws      : If peak buffer width is missing or not a positive integer
+# Comments    : None
+sub _check_peak_buffer_width {
+    my ($peak_buffer_width) = @_;
+    return $peak_buffer_width
+      if defined $peak_buffer_width && $peak_buffer_width =~ m/\A \d+ \z/xms;
+    confess 'No peak buffer width specified' if !defined $peak_buffer_width;
+    confess "Invalid peak buffer width ($peak_buffer_width) specified";
+}
+
+=method hmm_sig_level
+
+  Usage       : my $hmm_sig_level = $analysis->hmm_sig_level;
+  Purpose     : Getter for HMM significance level attribute
+  Returns     : +ve Float
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub hmm_sig_level {
+    my ($self) = @_;
+    return $hmm_sig_level{ id $self};
+}
+
+=method set_hmm_sig_level
+
+  Usage       : $analysis->set_hmm_sig_level(0.001);
+  Purpose     : Setter for HMM significance level attribute
+  Returns     : undef
+  Parameters  : +ve Float (the HMM significance level)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_hmm_sig_level {
+    my ( $self, $arg ) = @_;
+    $hmm_sig_level{ id $self} = _check_hmm_sig_level($arg);
+    return;
+}
+
+# Usage       : $hmm_sig_level = _check_hmm_sig_level($hmm_sig_level);
+# Purpose     : Check for valid HMM significance level
+# Returns     : +ve Float (the valid HMM significance level)
+# Parameters  : +ve Float (the HMM significance level)
+# Throws      : If HMM significance level is missing or not a positive float
+# Comments    : None
+sub _check_hmm_sig_level {
+    my ($hmm_sig_level) = @_;
+    return $hmm_sig_level
+      if defined $hmm_sig_level && $hmm_sig_level =~ m/\A \d* [.] \d+ \z/xms;
+    confess 'No HMM significance level specified' if !defined $hmm_sig_level;
+    confess "Invalid HMM significance level ($hmm_sig_level) specified";
+}
+
+=method hmm_binary
+
+  Usage       : my $hmm_binary = $analysis->hmm_binary;
+  Purpose     : Getter for HMM binary attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub hmm_binary {
+    my ($self) = @_;
+    return $hmm_binary{ id $self};
+}
+
+=method set_hmm_binary
+
+  Usage       : $analysis->set_hmm_binary('bin/quince_chiphmmnew');
+  Purpose     : Setter for HMM binary attribute
+  Returns     : undef
+  Parameters  : String (the HMM binary)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_hmm_binary {
+    my ( $self, $arg ) = @_;
+    $hmm_binary{ id $self} = _check_hmm_binary($arg);
+    return;
+}
+
+# Usage       : $hmm_binary = _check_hmm_binary($hmm_binary);
+# Purpose     : Check for valid HMM binary
+# Returns     : String (the valid HMM binary)
+# Parameters  : String (the HMM binary)
+# Throws      : If HMM binary is missing or not readable
+# Comments    : None
+sub _check_hmm_binary {
+    my ($hmm_binary) = @_;
+    return $hmm_binary if defined $hmm_binary && -r $hmm_binary;
+    confess 'No HMM binary specified' if !defined $hmm_binary;
+    confess "HMM binary ($hmm_binary) does not exist or cannot be read";
+}
+
+=method r_binary
+
+  Usage       : my $r_binary = $analysis->r_binary;
+  Purpose     : Getter for R binary attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub r_binary {
+    my ($self) = @_;
+    return $r_binary{ id $self};
+}
+
+=method set_r_binary
+
+  Usage       : $analysis->set_r_binary('R');
+  Purpose     : Setter for R binary attribute
+  Returns     : undef
+  Parameters  : String (the R binary)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_r_binary {
+    my ( $self, $arg ) = @_;
+    $r_binary{ id $self} = _check_r_binary($arg);
+    return;
+}
+
+# Usage       : $r_binary = _check_r_binary($r_binary);
+# Purpose     : Check for valid R binary
+# Returns     : String (the valid R binary)
+# Parameters  : String (the R binary)
+# Throws      : If R binary is missing
+# Comments    : None
+sub _check_r_binary {
+    my ($r_binary) = @_;
+    return $r_binary if defined $r_binary;
+    confess 'No R binary specified';
+}
+
+=method deseq_script
+
+  Usage       : my $deseq_script = $analysis->deseq_script;
+  Purpose     : Getter for DESeq script attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub deseq_script {
+    my ($self) = @_;
+    return $deseq_script{ id $self};
+}
+
+=method set_deseq_script
+
+  Usage       : $analysis->set_deseq_script('script/run_deseq.R');
+  Purpose     : Setter for DESeq script attribute
+  Returns     : undef
+  Parameters  : String (the DESeq script)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_deseq_script {
+    my ( $self, $arg ) = @_;
+    $deseq_script{ id $self} = _check_deseq_script($arg);
+    return;
+}
+
+# Usage       : $deseq_script = _check_deseq_script($deseq_script);
+# Purpose     : Check for valid DESeq script
+# Returns     : String (the valid DESeq script)
+# Parameters  : String (the DESeq script)
+# Throws      : If DESeq script is missing or not readable
+# Comments    : None
+sub _check_deseq_script {
+    my ($deseq_script) = @_;
+    return $deseq_script if defined $deseq_script && -r $deseq_script;
+    confess 'No DESeq script specified' if !defined $deseq_script;
+    confess "DESeq script ($deseq_script) does not exist or cannot be read";
+}
+
+=method output_sig_level
+
+  Usage       : my $output_sig_level = $analysis->output_sig_level;
+  Purpose     : Getter for output significance level attribute
+  Returns     : +ve Float
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub output_sig_level {
+    my ($self) = @_;
+    return $output_sig_level{ id $self};
+}
+
+=method set_output_sig_level
+
+  Usage       : $analysis->set_output_sig_level(0.001);
+  Purpose     : Setter for output significance level attribute
+  Returns     : undef
+  Parameters  : +ve Float (the output significance level)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_output_sig_level {
+    my ( $self, $arg ) = @_;
+    $output_sig_level{ id $self} = _check_output_sig_level($arg);
+    return;
+}
+
+# Usage       : $output_sig_level = _check_output_sig_level($output_sig_level);
+# Purpose     : Check for valid output significance level
+# Returns     : +ve Float (the valid output significance level)
+# Parameters  : +ve Float (the output significance level)
+# Throws      : If output significance level is missing or not a positive float
+# Comments    : None
+sub _check_output_sig_level {
+    my ($output_sig_level) = @_;
+    return $output_sig_level
+      if defined $output_sig_level
+      && $output_sig_level =~ m/\A \d* [.] \d+ \z/xms;
+    confess 'No output significance level specified'
+      if !defined $output_sig_level;
+    confess "Invalid output significance level ($output_sig_level) specified";
+}
+
+=method ref_fasta
+
+  Usage       : my $ref_fasta = $analysis->ref_fasta;
+  Purpose     : Getter for reference FASTA attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ref_fasta {
+    my ($self) = @_;
+    return $ref_fasta{ id $self};
+}
+
+=method set_ref_fasta
+
+  Usage       : $analysis->set_ref_fasta('zv9.fa');
+  Purpose     : Setter for reference FASTA attribute
+  Returns     : undef
+  Parameters  : String (the reference FASTA)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ref_fasta {
+    my ( $self, $arg ) = @_;
+    $ref_fasta{ id $self} = _check_ref_fasta($arg);
+    return;
+}
+
+# Usage       : $ref_fasta = _check_ref_fasta($ref_fasta);
+# Purpose     : Check for valid reference FASTA
+# Returns     : String (the valid reference FASTA)
+# Parameters  : String (the reference FASTA)
+# Throws      : If reference FASTA is defined but not readable
+# Comments    : None
+sub _check_ref_fasta {
+    my ($ref_fasta) = @_;
+    return $ref_fasta if !defined $ref_fasta || -r $ref_fasta;
+    confess "Reference FASTA ($ref_fasta) cannot be read";
+}
+
+=method ensembl_host
+
+  Usage       : my $ensembl_host = $analysis->ensembl_host;
+  Purpose     : Getter for Ensembl host attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+=method fasta_index
+
+  Usage       : my $fai = $analysis->fasta_index;
+  Purpose     : Getter for FASTA index attribute
+  Returns     : Bio::DB::Sam::Fai
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub fasta_index {
+    my ($self) = @_;
+
+    if ( !defined $fasta_index{ id $self} && $self->ref_fasta ) {
+
+        # We can create a FASTA index object
+        $self->set_fasta_index( Bio::DB::Sam::Fai->load( $self->ref_fasta ) );
+    }
+
+    return $fasta_index{ id $self};
+}
+
+=method set_fasta_index
+
+  Usage       : $analysis->set_fasta_index($fai);
+  Purpose     : Setter for FASTA index attribute
+  Returns     : undef
+  Parameters  : Bio::DB::Sam::Fai
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_fasta_index {
+    my ( $self, $arg ) = @_;
+    $fasta_index{ id $self} = _check_fasta_index($arg);
+    return;
+}
+
+# Usage       : $fai = _check_fasta_index($fai);
+# Purpose     : Check for valid FASTA index
+# Returns     : Bio::DB::Sam::Fai
+# Parameters  : Bio::DB::Sam::Fai
+# Throws      : If FASTA index is missing or invalid (i.e. not a
+#               Bio::DB::Sam::Fai object)
+# Comments    : None
+sub _check_fasta_index {
+    my ($fasta_index) = @_;
+    return $fasta_index
+      if defined $fasta_index && $fasta_index->isa('Bio::DB::Sam::Fai');
+    confess 'No FASTA index specified' if !defined $fasta_index;
+    confess 'Class of FASTA index (', ref $fasta_index,
+      ') not Bio::DB::Sam::Fai';
+}
+
+sub ensembl_host {
+    my ($self) = @_;
+    return $ensembl_host{ id $self};
+}
+
+=method set_ensembl_host
+
+  Usage       : $analysis->set_ensembl_host('ensembldb.ensembl.org');
+  Purpose     : Setter for Ensembl host attribute
+  Returns     : undef
+  Parameters  : String (the Ensembl host)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_host {
+    my ( $self, $arg ) = @_;
+    $ensembl_host{ id $self} = $arg;
+    return;
+}
+
+=method ensembl_port
+
+  Usage       : my $ensembl_port = $analysis->ensembl_port;
+  Purpose     : Getter for Ensembl port attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ensembl_port {
+    my ($self) = @_;
+    return $ensembl_port{ id $self};
+}
+
+=method set_ensembl_port
+
+  Usage       : $analysis->set_ensembl_port(3306);
+  Purpose     : Setter for Ensembl port attribute
+  Returns     : undef
+  Parameters  : +ve Int (the Ensembl port)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_port {
+    my ( $self, $arg ) = @_;
+    $ensembl_port{ id $self} = _check_ensembl_port($arg);
+    return;
+}
+
+# Usage       : $ensembl_port = _check_ensembl_port($ensembl_port);
+# Purpose     : Check for valid Ensembl port
+# Returns     : +ve Int (the valid Ensembl port)
+# Parameters  : +ve Int (the Ensembl port)
+# Throws      : If Ensembl port is defined but not a positive integer
+# Comments    : None
+sub _check_ensembl_port {
+    my ($ensembl_port) = @_;
+    return $ensembl_port
+      if !defined $ensembl_port || $ensembl_port =~ m/\A \d+ \z/xms;
+    confess "Invalid Ensembl port ($ensembl_port) specified";
+}
+
+=method ensembl_user
+
+  Usage       : my $ensembl_user = $analysis->ensembl_user;
+  Purpose     : Getter for Ensembl username attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ensembl_user {
+    my ($self) = @_;
+    return $ensembl_user{ id $self};
+}
+
+=method set_ensembl_user
+
+  Usage       : $analysis->set_ensembl_user('anonymous');
+  Purpose     : Setter for Ensembl username attribute
+  Returns     : undef
+  Parameters  : String (the Ensembl username)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_user {
+    my ( $self, $arg ) = @_;
+    $ensembl_user{ id $self} = $arg;
+    return;
+}
+
+=method ensembl_pass
+
+  Usage       : my $ensembl_pass = $analysis->ensembl_pass;
+  Purpose     : Getter for Ensembl password attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ensembl_pass {
+    my ($self) = @_;
+    return $ensembl_pass{ id $self};
+}
+
+=method set_ensembl_pass
+
+  Usage       : $analysis->set_ensembl_pass('secret');
+  Purpose     : Setter for Ensembl password attribute
+  Returns     : undef
+  Parameters  : String (the Ensembl password)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_pass {
+    my ( $self, $arg ) = @_;
+    $ensembl_pass{ id $self} = $arg;
+    return;
+}
+
+=method ensembl_name
+
+  Usage       : my $ensembl_name = $analysis->ensembl_name;
+  Purpose     : Getter for Ensembl database name attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ensembl_name {
+    my ($self) = @_;
+    return $ensembl_name{ id $self};
+}
+
+=method set_ensembl_name
+
+  Usage       : $analysis->set_ensembl_name('zv9_core');
+  Purpose     : Setter for Ensembl database name attribute
+  Returns     : undef
+  Parameters  : String (the Ensembl database name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_name {
+    my ( $self, $arg ) = @_;
+    $ensembl_name{ id $self} = $arg;
+    return;
+}
+
+=method ensembl_species
+
+  Usage       : my $ensembl_species = $analysis->ensembl_species;
+  Purpose     : Getter for Ensembl species attribute
+  Returns     : String
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub ensembl_species {
+    my ($self) = @_;
+    return $ensembl_species{ id $self};
+}
+
+=method set_ensembl_species
+
+  Usage       : $analysis->set_ensembl_species('danio_rerio');
+  Purpose     : Setter for Ensembl species attribute
+  Returns     : undef
+  Parameters  : String (the Ensembl species)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_ensembl_species {
+    my ( $self, $arg ) = @_;
+    $ensembl_species{ id $self} = $arg;
+    return;
+}
+
+=method slice_adaptor
+
+  Usage       : my $slice_adaptor = $analysis->slice_adaptor;
+  Purpose     : Getter for Ensembl slice adaptor attribute
+  Returns     : Bio::EnsEMBL::DBSQL::SliceAdaptor
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub slice_adaptor {
+    my ($self) = @_;
+
+    if ( !defined $slice_adaptor{ id $self}
+        && ( $self->ensembl_species || $self->ensembl_name ) )
+    {
+        # We can create an Ensembl slice adaptor
+        $self->_create_slice_adaptor();
+    }
+
+    return $slice_adaptor{ id $self};
+}
+
+=method set_slice_adaptor
+
+  Usage       : $analysis->set_slice_adaptor($slice_adaptor);
+  Purpose     : Setter for Ensembl slice adaptor attribute
+  Returns     : undef
+  Parameters  : Bio::EnsEMBL::DBSQL::SliceAdaptor
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_slice_adaptor {
+    my ( $self, $arg ) = @_;
+    $slice_adaptor{ id $self} = _check_slice_adaptor($arg);
+    return;
+}
+
+# Usage       : $slice_adaptor = _check_slice_adaptor($slice_adaptor);
+# Purpose     : Check for valid Ensembl slice adaptor
+# Returns     : Bio::EnsEMBL::DBSQL::SliceAdaptor
+# Parameters  : Bio::EnsEMBL::DBSQL::SliceAdaptor
+# Throws      : If slice adaptor is missing or invalid (i.e. not a
+#               Bio::EnsEMBL::DBSQL::SliceAdaptor object)
+# Comments    : None
+sub _check_slice_adaptor {
+    my ($slice_adaptor) = @_;
+    return $slice_adaptor
+      if defined $slice_adaptor
+      && $slice_adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor');
+    confess 'No Ensembl slice adaptor specified' if !defined $slice_adaptor;
+    confess 'Class of Ensembl slice adaptor (', ref $slice_adaptor,
+      ') not Bio::EnsEMBL::DBSQL::SliceAdaptor';
+}
+
+=method chunk_total
+
+  Usage       : my $chunk_total = $analysis->chunk_total;
+  Purpose     : Getter for chunk total attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub chunk_total {
+    my ($self) = @_;
+    return $chunk_total{ id $self};
+}
+
+=method set_chunk_total
+
+  Usage       : $analysis->set_chunk_total(20);
+  Purpose     : Setter for chunk total attribute
+  Returns     : undef
+  Parameters  : +ve Int (the chunk total)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_chunk_total {
+    my ( $self, $arg ) = @_;
+    $chunk_total{ id $self} = _check_chunk_total($arg);
+
+    # Recalculate chunks if necessary
+    if ( scalar @{ $self->get_all_samples() } ) {
+        $self->add_all_chunks();
+    }
+
+    return;
+}
+
+# Usage       : $chunk_total = _check_chunk_total($chunk_total);
+# Purpose     : Check for valid chunk total
+# Returns     : +ve Int (the valid chunk total)
+# Parameters  : +ve Int (the chunk total)
+# Throws      : If chunk total is missing or not a positive integer
+# Comments    : None
+sub _check_chunk_total {
+    my ($chunk_total) = @_;
+    return $chunk_total
+      if defined $chunk_total && $chunk_total =~ m/\A \d+ \z/xms;
+    confess 'No chunk total specified' if !defined $chunk_total;
+    confess "Invalid chunk total ($chunk_total) specified";
+}
+
+=method test_chunk
+
+  Usage       : my $test_chunk = $analysis->test_chunk;
+  Purpose     : Getter for test chunk attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub test_chunk {
+    my ($self) = @_;
+    return $test_chunk{ id $self};
+}
+
+=method set_test_chunk
+
+  Usage       : $analysis->set_test_chunk(1);
+  Purpose     : Setter for test chunk attribute
+  Returns     : undef
+  Parameters  : +ve Int (the test chunk)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_test_chunk {
+    my ( $self, $arg ) = @_;
+    $test_chunk{ id $self} = $arg;
+    return;
+}
+
+=method add_all_chunks
+
+  Usage       : $analysis->add_all_chunks();
+  Purpose     : Add all chunks (groups of sequences) to an analysis
+  Returns     : undef
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : Groups all sequences into a specific number of (roughly equally
+                sized) chunks
+
+=cut
+
+sub add_all_chunks {
+    my ($self) = @_;
+
+    my @seqs = @{ $self->get_all_sequences() };
+
+    # Get total sequence length
+    my $total_bp = 0;
+    foreach my $seq (@seqs) {
+        $total_bp += $seq->bp;
+    }
+
+    # Get chunk target size (+ 1 to ensure slight overestimate)
+    my $target_chunk_size = int( $total_bp / $self->chunk_total + 1 );
+
+    my @chunks;
+    my @chunk_size = map { 0 } 1 .. $self->chunk_total;
+
+    # Iterate over sequences
+  SEQ: foreach my $seq (@seqs) {
+
+        # Iterate over each chunk
+        foreach my $chunk_index ( 0 .. $self->chunk_total - 1 ) {
+
+            # Add sequence to chunk if there's room or if the chunk is empty
+            if (   $chunk_size[$chunk_index] + $seq->bp <= $target_chunk_size
+                || $chunk_size[$chunk_index] == 0 )
+            {
+                push @{ $chunks[$chunk_index] }, $seq;
+                $chunk_size[$chunk_index] += $seq->bp;
+                next SEQ;    # Next sequence
+            }
+        }
+
+        # Sequence hasn't been added to a chunk, so add to chunk with most room
+        my $roomy_chunk_index = 0;
+        foreach my $chunk_index ( 0 .. $self->chunk_total - 1 ) {
+            if ( $chunk_size[$chunk_index] < $chunk_size[$roomy_chunk_index] ) {
+                $roomy_chunk_index = $chunk_index;
+            }
+        }
+        push @{ $chunks[$roomy_chunk_index] }, $seq;
+        $chunk_size[$roomy_chunk_index] += $seq->bp;
+    }
+
+    # Iterate over empty chunks in order to attempt to add sequences to them
+    foreach my $empty_chunk_index ( 0 .. $self->chunk_total - 1 ) {
+        next if defined $chunks[$empty_chunk_index];    # Only want empty chunks
+
+        # Find chunk with highest number of sequences (but more than one)
+        my $max_seqs_chunk_index;
+        my $max_seqs;
+        foreach my $chunk_index ( 0 .. $self->chunk_total - 1 ) {
+            next if !defined $chunks[$chunk_index]; # Only want non-empty chunks
+            my $seqs = scalar @{ $chunks[$chunk_index] };
+            if ( $seqs > 1 && ( !defined $max_seqs || $seqs > $max_seqs ) ) {
+                $max_seqs_chunk_index = $chunk_index;
+                $max_seqs             = $seqs;
+            }
+        }
+
+        last if !defined $max_seqs;                 # No splittable chunks
+
+        # Split chosen chunk into empty chunk
+        my $split_index = int( $max_seqs / 2 );
+        @{ $chunks[$empty_chunk_index] } =
+          splice @{ $chunks[$max_seqs_chunk_index] }, 0, $split_index;
+    }
+
+    $chunk{ id $self} = \@chunks;
+
+    # Number of chunks may be smaller than requested chunk total, so adjust
+    $chunk_total{ id $self} = scalar @chunks;
+
+    return;
+}
+
+=method get_all_chunks
+
+  Usage       : $chunks = $analysis->get_all_chunks();
+  Purpose     : Get all chunks (groups of sequences) of an analysis
+  Returns     : Arrayref of arrayrefs of DETCT::Sequence objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_chunks {
+    my ($self) = @_;
+
+    my $chunks = $chunk{ id $self} || [];
+
+    # If a test chunk is specified then only return that chunk not all chunks
+    if ( $self->test_chunk && exists $chunks->[ $self->test_chunk - 1 ] ) {
+        $chunks = [ $chunks->[ $self->test_chunk - 1 ] ];
+    }
+
+    return $chunks;
+}
+
+=method list_all_bam_files
+
+  Usage       : @bam_files = $analysis->list_all_bam_files();
+  Purpose     : Get all BAM files used in an analysis
+  Returns     : Arrayref of strings
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub list_all_bam_files {
+    my ($self) = @_;
+
+    my $samples = $self->get_all_samples();
+
+    my @bam_files = map { $_->bam_file } @{$samples};
+
+    return uniq( sort @bam_files );
+}
+
+=method list_all_tags_by_bam_file
+
+  Usage       : @tags = $analysis->list_all_tags_by_bam_file();
+  Purpose     : Get all tags used in an analysis in a particular BAM file
+  Returns     : Arrayref of strings
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub list_all_tags_by_bam_file {
+    my ( $self, $bam_file ) = @_;
+
+    my $samples = $self->get_all_samples();
+
+    my @tags = map { $_->tag } grep { $_->bam_file eq $bam_file } @{$samples};
+
+    return uniq( sort @tags );
+}
+
+=method get_subsequence
+
+  Usage       : $seq = $analysis->get_subsequence('1', 1, 10);
+  Purpose     : Get subsequence from reference
+  Returns     : String (sequence)
+  Parameters  : String (the sequence name)
+                Int (the sequence start)
+                Int (the sequence end)
+                Int (the sequence strand)
+  Throws      : If sequence name is missing
+                If sequence start is missing
+                If sequence end is missing
+                If sequence strand is missing
+  Comments    : None
+
+=cut
+
+sub get_subsequence {
+    my ( $self, $seq_name, $start, $end, $strand ) = @_;
+
+    confess 'No sequence name specified'   if !defined $seq_name;
+    confess 'No sequence start specified'  if !defined $start;
+    confess 'No sequence end specified'    if !defined $end;
+    confess 'No sequence strand specified' if !defined $strand;
+
+    # Avoid negative positions (but don't worry if end is larger than sequence)
+    if ( $start < 1 ) {
+        $start = 1;
+    }
+    if ( $end < 1 ) {
+        $end = 1;
+    }
+
+    my $subseq;
+
+    if ( $self->fasta_index ) {
+        $subseq = DETCT::Misc::BAM::get_sequence(
+            {
+                fasta_index => $self->fasta_index,
+                seq_name    => $seq_name,
+                start       => $start,
+                end         => $end,
+                strand      => $strand,
+            }
+        );
+    }
+    elsif ( $self->slice_adaptor ) {
+        $subseq =
+          $self->slice_adaptor->fetch_by_region( 'toplevel', $seq_name, $start,
+            $end, $strand )->seq;
+    }
+    else {
+        confess 'No reference FASTA or Ensembl database';
+    }
+
+    return uc $subseq;
+}
+
+# Usage       : $self->_create_slice_adaptor();
+# Purpose     : Create an Ensembl slice adaptor
+# Returns     : Undef
+# Parameters  : None
+# Throws      : No exceptions
+# Comments    : None
+sub _create_slice_adaptor {
+    my ($self) = @_;
+
+    my $host =
+      $self->ensembl_host ? $self->ensembl_host : $DEFAULT_ENSEMBL_HOST;
+    my $port = $self->ensembl_port;
+    my $user =
+      $self->ensembl_user ? $self->ensembl_user : $DEFAULT_ENSEMBL_USER;
+    my $pass = $self->ensembl_pass;
+    my $slice_adaptor;
+    if ( !$self->ensembl_name ) {
+
+        # Get slice adaptor via registry
+        require Bio::EnsEMBL::Registry;
+        Bio::EnsEMBL::Registry->load_registry_from_db(
+            -host    => $host,
+            -port    => $port,
+            -user    => $user,
+            -pass    => $pass,
+            -species => $self->ensembl_species,
+        );
+        $slice_adaptor =
+          Bio::EnsEMBL::Registry->get_adaptor( $self->ensembl_species, 'core',
+            'slice' );
+    }
+    else {
+        # Get slice adaptor from specific database
+        require Bio::EnsEMBL::DBSQL::DBAdaptor;
+        my $ensembl_db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
+            -host   => $host,
+            -port   => $port,
+            -user   => $user,
+            -pass   => $pass,
+            -dbname => $self->ensembl_name,
+        );
+        $slice_adaptor = $ensembl_db->get_SliceAdaptor();
+    }
+
+    $self->set_slice_adaptor($slice_adaptor);
+
+    return;
+}
+
+1;
diff --git a/lib/DETCT/Gene.pm b/lib/DETCT/Gene.pm
new file mode 100644
index 0000000..4bc5d88
--- /dev/null
+++ b/lib/DETCT/Gene.pm
@@ -0,0 +1,574 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Gene;
+## use critic
+
+# ABSTRACT: Object representing a gene
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-11-24
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+use Scalar::Util qw( weaken );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private genebuild_version => my %genebuild_version;   # e.g. e69
+private stable_id         => my %stable_id;           # e.g. ENSDARG00000095747
+private name              => my %name;                # e.g. cxc64
+private description       => my %description;         # e.g. CXC chemokine 64...
+private biotype           => my %biotype;             # e.g. protein_coding
+private seq_name          => my %seq_name;            # e.g. 5
+private start             => my %start;               # e.g. 40352744
+private end               => my %end;                 # e.g. 40354399
+private strand            => my %strand;              # e.g. 1
+private transcript        => my %transcript;          # DETCT::Transcript
+
+# Constants
+Readonly our $MAX_NAME_LENGTH => 128;
+
+=method new
+
+  Usage       : my $gene = DETCT::Gene->new( {
+                    genebuild_version => 'e61',
+                    stable_id         => 'ENSDARG00000095747',
+                    biotype           => 'protein_coding',
+                    seq_name          => '5',
+                    start             => 40352744,
+                    end               => 40354399,
+                    strand            => 1,
+                } );
+  Purpose     : Constructor for gene objects
+  Returns     : DETCT::Gene
+  Parameters  : Hashref {
+                    genebuild_version => String,
+                    stable_id         => String,
+                    name              => String or undef,
+                    description       => String or undef,
+                    biotype           => String,
+                    seq_name          => String,
+                    start             => +ve Int,
+                    end               => +ve Int,
+                    strand            => Int (1 or -1),
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_genebuild_version( $arg_ref->{genebuild_version} );
+    $self->set_stable_id( $arg_ref->{stable_id} );
+    $self->set_name( $arg_ref->{name} );
+    $self->set_description( $arg_ref->{description} );
+    $self->set_biotype( $arg_ref->{biotype} );
+    $self->set_seq_name( $arg_ref->{seq_name} );
+    $self->set_start( $arg_ref->{start} );
+    $self->set_end( $arg_ref->{end} );
+    $self->set_strand( $arg_ref->{strand} );
+    return $self;
+}
+
+=method genebuild_version
+
+  Usage       : my $gv = $gene->genebuild_version;
+  Purpose     : Getter for genebuild version attribute
+  Returns     : String (e.g. "e61" for Ensembl 61)
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub genebuild_version {
+    my ($self) = @_;
+    return $genebuild_version{ id $self};
+}
+
+=method set_genebuild_version
+
+  Usage       : $gene->set_genebuild_version('e61');
+  Purpose     : Setter for genebuild version attribute
+  Returns     : undef
+  Parameters  : String (the genebuild version)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_genebuild_version {
+    my ( $self, $arg ) = @_;
+    $genebuild_version{ id $self} = check_genebuild_version($arg);
+    return;
+}
+
+=method check_genebuild_version
+
+  Usage       : $gv = check_genebuild_version($gv);
+  Purpose     : Check for valid genebuild version
+  Returns     : String (the valid genebuild version)
+  Parameters  : String (the genebuild version)
+  Throws      : If genebuild version is missing or invalid (i.e. not
+                alphanumeric)
+  Comments    : None
+
+=cut
+
+sub check_genebuild_version {
+    my ($genebuild_version) = @_;
+    return $genebuild_version
+      if defined $genebuild_version && $genebuild_version =~ m/\A \w+ \z/xms;
+    confess 'No genebuild version specified' if !defined $genebuild_version;
+    confess "Invalid genebuild version ($genebuild_version) specified";
+}
+
+=method stable_id
+
+  Usage       : my $stable_id = $gene->stable_id;
+  Purpose     : Getter for stable id attribute
+  Returns     : String (e.g. "ENSDARG00000095747")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub stable_id {
+    my ($self) = @_;
+    return $stable_id{ id $self};
+}
+
+=method set_stable_id
+
+  Usage       : $gene->set_stable_id('ENSDARG00000095747');
+  Purpose     : Setter for stable id attribute
+  Returns     : undef
+  Parameters  : String (the stable id)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_stable_id {
+    my ( $self, $arg ) = @_;
+    $stable_id{ id $self} = check_stable_id($arg);
+    return;
+}
+
+=method check_stable_id
+
+  Usage       : $stable_id = check_stable_id($stable_id);
+  Purpose     : Check for valid stable id
+  Returns     : String (the valid stable id)
+  Parameters  : String (the stable id)
+  Throws      : If stable id is missing or invalid
+  Comments    : None
+
+=cut
+
+sub check_stable_id {
+    my ($stable_id) = @_;
+    return $stable_id
+      if defined $stable_id && $stable_id =~ m/\A [[:upper:]]+ \d{11} \z/xms;
+    confess 'No stable id specified' if !defined $stable_id;
+    confess "Invalid stable id ($stable_id) specified";
+}
+
+=method name
+
+  Usage       : my $name = $gene->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "cxc64")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $gene->set_name('cxc64');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name > $MAX_NAME_LENGTH characters
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+    return $name
+      if !defined $name
+      || ( length $name > 0 && length $name <= $MAX_NAME_LENGTH );
+    confess 'Name is empty' if !length $name;
+    confess "Name ($name) longer than $MAX_NAME_LENGTH characters";
+}
+
+=method description
+
+  Usage       : my $description = $gene->description;
+  Purpose     : Getter for description attribute
+  Returns     : String (e.g. "CXC chemokine 64")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub description {
+    my ($self) = @_;
+    return $description{ id $self};
+}
+
+=method set_description
+
+  Usage       : $gene->set_description('CXC chemokine 64');
+  Purpose     : Setter for description attribute
+  Returns     : undef
+  Parameters  : String (the description)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_description {
+    my ( $self, $arg ) = @_;
+    $description{ id $self} = $arg;
+    return;
+}
+
+=method biotype
+
+  Usage       : my $biotype = $gene->biotype;
+  Purpose     : Getter for biotype attribute
+  Returns     : String (e.g. "protein_coding")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub biotype {
+    my ($self) = @_;
+    return $biotype{ id $self};
+}
+
+=method set_biotype
+
+  Usage       : $gene->set_biotype('protein_coding');
+  Purpose     : Setter for biotype attribute
+  Returns     : undef
+  Parameters  : String (the biotype)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_biotype {
+    my ( $self, $arg ) = @_;
+    $biotype{ id $self} = check_biotype($arg);
+    return;
+}
+
+=method check_biotype
+
+  Usage       : $biotype = check_biotype($biotype);
+  Purpose     : Check for valid biotype
+  Returns     : String (the valid biotype)
+  Parameters  : String (the biotype)
+  Throws      : If biotype is missing or invalid (i.e. not alphanumeric)
+  Comments    : None
+
+=cut
+
+sub check_biotype {
+    my ($biotype) = @_;
+    return $biotype if defined $biotype && $biotype =~ m/\A \w+ \z/xms;
+    confess 'No biotype specified' if !defined $biotype;
+    confess "Invalid biotype ($biotype) specified";
+}
+
+=method seq_name
+
+  Usage       : my $seq_name = $gene->seq_name;
+  Purpose     : Getter for sequence name attribute
+  Returns     : String (e.g. "5")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub seq_name {
+    my ($self) = @_;
+    return $seq_name{ id $self};
+}
+
+=method set_seq_name
+
+  Usage       : $gene->set_seq_name('5');
+  Purpose     : Setter for sequence name attribute
+  Returns     : undef
+  Parameters  : String (the sequence name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_seq_name {
+    my ( $self, $arg ) = @_;
+    $seq_name{ id $self} = check_seq_name($arg);
+    return;
+}
+
+=method check_seq_name
+
+  Usage       : $seq_name = check_seq_name($seq_name);
+  Purpose     : Check for valid sequence name
+  Returns     : String (the valid sequence name)
+  Parameters  : String (the sequence name)
+  Throws      : If sequence name is missing or invalid (i.e. not alphanumeric)
+  Comments    : None
+
+=cut
+
+sub check_seq_name {
+    my ($seq_name) = @_;
+    return $seq_name if defined $seq_name && $seq_name =~ m/\A \w+ \z/xms;
+    confess 'No sequence name specified' if !defined $seq_name;
+    confess "Invalid sequence name ($seq_name) specified";
+}
+
+=method start
+
+  Usage       : my $start = $gene->start;
+  Purpose     : Getter for start attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub start {
+    my ($self) = @_;
+    return $start{ id $self};
+}
+
+=method set_start
+
+  Usage       : $gene->set_start(40352744);
+  Purpose     : Setter for start attribute
+  Returns     : undef
+  Parameters  : +ve Int (the start)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_start {
+    my ( $self, $arg ) = @_;
+    $start{ id $self} = check_start($arg);
+    return;
+}
+
+=method check_start
+
+  Usage       : $start = check_start($start);
+  Purpose     : Check for valid start
+  Returns     : +ve Int (the valid start)
+  Parameters  : +ve Int (the start)
+  Throws      : If start is missing or not a positive integer
+  Comments    : None
+
+=cut
+
+sub check_start {
+    my ($start) = @_;
+    return $start if defined $start && $start =~ m/\A \d+ \z/xms;
+    confess 'No start specified' if !defined $start;
+    confess "Invalid start ($start) specified";
+}
+
+=method end
+
+  Usage       : my $end = $gene->end;
+  Purpose     : Getter for end attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end {
+    my ($self) = @_;
+    return $end{ id $self};
+}
+
+=method set_end
+
+  Usage       : $gene->set_end(40352744);
+  Purpose     : Setter for end attribute
+  Returns     : undef
+  Parameters  : +ve Int (the end)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_end {
+    my ( $self, $arg ) = @_;
+    $end{ id $self} = check_end($arg);
+    return;
+}
+
+=method check_end
+
+  Usage       : $end = check_end($end);
+  Purpose     : Check for valid end
+  Returns     : +ve Int (the valid end)
+  Parameters  : +ve Int (the end)
+  Throws      : If end is missing or not a positive integer
+  Comments    : None
+
+=cut
+
+sub check_end {
+    my ($end) = @_;
+    return $end if defined $end && $end =~ m/\A \d+ \z/xms;
+    confess 'No end specified' if !defined $end;
+    confess "Invalid end ($end) specified";
+}
+
+=method strand
+
+  Usage       : my $strand = $gene->strand;
+  Purpose     : Getter for strand attribute
+  Returns     : Int (1 or -1)
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub strand {
+    my ($self) = @_;
+    return $strand{ id $self};
+}
+
+=method set_strand
+
+  Usage       : $gene->set_strand(1);
+  Purpose     : Setter for strand attribute
+  Returns     : undef
+  Parameters  : Int (the strand)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_strand {
+    my ( $self, $arg ) = @_;
+    $strand{ id $self} = _check_strand($arg);
+    return;
+}
+
+# Usage       : $strand = _check_strand($strand);
+# Purpose     : Check for valid strand
+# Returns     : Int (1 or -1) (the valid strand)
+# Parameters  : Int (1 or -1) (the strand)
+# Throws      : If strand is missing or not 1 or -1
+# Comments    : None
+sub _check_strand {
+    my ($strand) = @_;
+    return $strand if defined $strand && $strand =~ m/\A \-? 1 \z/xms;
+    confess 'No strand specified' if !defined $strand;
+    confess "Invalid strand ($strand) specified";
+}
+
+=method add_transcript
+
+  Usage       : $gene->add_transcript($transcript);
+  Purpose     : Add a transcript to a gene
+  Returns     : undef
+  Parameters  : DETCT::Transcript
+  Throws      : If transcript is missing or invalid (i.e. not a
+                DETCT::Transcript object)
+  Comments    : None
+
+=cut
+
+sub add_transcript {
+    my ( $self, $transcript ) = @_;
+
+    confess 'No transcript specified' if !defined $transcript;
+    confess 'Class of transcript (', ref $transcript, ') not DETCT::Transcript'
+      if !$transcript->isa('DETCT::Transcript');
+
+    weaken($transcript);    # Avoid circular references
+
+    if ( !exists $transcript{ id $self} ) {
+        $transcript{ id $self} = [$transcript];
+    }
+    else {
+        push @{ $transcript{ id $self} }, $transcript;
+    }
+
+    return;
+}
+
+=method get_all_transcripts
+
+  Usage       : $transcripts = $gene->get_all_transcripts();
+  Purpose     : Get all transcripts of a gene
+  Returns     : Arrayref of DETCT::Transcript objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_transcripts {
+    my ($self) = @_;
+
+    return $transcript{ id $self} || [];
+}
+
+1;
diff --git a/lib/DETCT/GeneFinder.pm b/lib/DETCT/GeneFinder.pm
new file mode 100644
index 0000000..33269e4
--- /dev/null
+++ b/lib/DETCT/GeneFinder.pm
@@ -0,0 +1,417 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::GeneFinder;
+## use critic
+
+# ABSTRACT: Object for finding genes (and transcripts) by location
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-11-24
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Class::InsideOut qw( private register id );
+use DETCT::Gene;
+use DETCT::Transcript;
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private slice_adaptor => my %slice_adaptor;  # Bio::EnsEMBL::DBSQL::SliceAdaptor
+private cache         => my %cache;          # Hashref
+
+=method new
+
+  Usage       : my $gene_finder = DETCT::GeneFinder->new( {
+                    slice_adaptor => $slice_adaptor,
+                } );
+  Purpose     : Constructor for gene finder objects
+  Returns     : DETCT::GeneFinder
+  Parameters  : Hashref {
+                    slice_adaptor => Bio::EnsEMBL::DBSQL::SliceAdaptor,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_slice_adaptor( $arg_ref->{slice_adaptor} );
+    return $self;
+}
+
+=method slice_adaptor
+
+  Usage       : my $slice_adaptor = $analysis->slice_adaptor;
+  Purpose     : Getter for Ensembl slice adaptor attribute
+  Returns     : Bio::EnsEMBL::DBSQL::SliceAdaptor
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub slice_adaptor {
+    my ($self) = @_;
+    return $slice_adaptor{ id $self};
+}
+
+=method set_slice_adaptor
+
+  Usage       : $analysis->set_slice_adaptor($slice_adaptor);
+  Purpose     : Setter for Ensembl slice adaptor attribute
+  Returns     : undef
+  Parameters  : Bio::EnsEMBL::DBSQL::SliceAdaptor
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_slice_adaptor {
+    my ( $self, $arg ) = @_;
+    $slice_adaptor{ id $self} = _check_slice_adaptor($arg);
+    return;
+}
+
+# Usage       : $slice_adaptor = _check_slice_adaptor($slice_adaptor);
+# Purpose     : Check for valid Ensembl slice adaptor
+# Returns     : Bio::EnsEMBL::DBSQL::SliceAdaptor
+# Parameters  : Bio::EnsEMBL::DBSQL::SliceAdaptor
+# Throws      : If slice adaptor is missing or invalid (i.e. not a
+#               Bio::EnsEMBL::DBSQL::SliceAdaptor object)
+# Comments    : None
+sub _check_slice_adaptor {
+    my ($slice_adaptor) = @_;
+    return $slice_adaptor
+      if defined $slice_adaptor
+      && $slice_adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor');
+    confess 'No Ensembl slice adaptor specified' if !defined $slice_adaptor;
+    confess 'Class of Ensembl slice adaptor (', ref $slice_adaptor,
+      ') not Bio::EnsEMBL::DBSQL::SliceAdaptor';
+}
+
+=method get_nearest_transcripts
+
+  Usage       : $gene_finder->get_nearest_transcripts($seq_name, $pos, $strand);
+  Purpose     : Retrieve the nearest transcripts to a 3' end
+  Returns     : Arrayref (of DETCT::Transcript objects)
+                Int (distance)
+                Int (nearest 3' end position)
+  Parameters  : String (the 3' end sequence name)
+                Int (the 3' end position)
+                Int (the 3' end strand)
+  Throws      : No exceptions
+  Comments    : Distance is positive if downstream of 3' end and negative if
+                upstream
+
+=cut
+
+sub get_nearest_transcripts {
+    my ( $self, $seq_name, $pos, $strand ) = @_;
+
+    # Ensure cache is filled
+    $self->_fill_cache_from_ensembl($seq_name);
+
+    my $nearest_distance;
+    my $nearest_transcripts = [];
+    my $nearest_end_pos;
+
+    # Iterate over all 3' end transcript positions in relevant portion of cache
+    my @transcript_positions = keys %{ $cache{ id $self}->{$seq_name} };
+
+    # Favour upstream if get two transcripts same distance upstream and
+    # downstream (and strand is known)
+    @transcript_positions = sort { $a <=> $b } @transcript_positions;
+    ## no critic (ProhibitMagicNumbers)
+    if ( defined $strand && $strand == -1 ) {
+        ## use critic
+        @transcript_positions = reverse @transcript_positions;
+    }
+
+    foreach my $transcript_position (@transcript_positions) {
+        my @transcripts =
+          @{ $cache{ id $self}->{$seq_name}->{$transcript_position} };
+
+        # Only consider transcripts matching strand (if specified)
+        if ( defined $strand ) {
+            @transcripts = grep { $_->strand == $strand } @transcripts;
+        }
+        next if !@transcripts;
+
+        my $distance = $pos - $transcript_position;
+        ## no critic (ProhibitMagicNumbers)
+        if ( $transcripts[0]->strand == -1 ) {
+            ### use critic
+            $distance = -$distance;
+        }
+
+        # Keep transcripts if nearer than seen before
+        if (  !defined $nearest_distance
+            || abs $distance < abs $nearest_distance )
+        {
+            $nearest_transcripts = \@transcripts;
+            $nearest_distance    = $distance;
+            $nearest_end_pos     = $transcript_position;
+        }
+    }
+
+    # Sort by stable id
+    @{$nearest_transcripts} =
+      sort { $a->stable_id cmp $b->stable_id } @{$nearest_transcripts};
+
+    return $nearest_transcripts, $nearest_distance, $nearest_end_pos;
+}
+
+=method get_nearest_genes
+
+  Usage       : $gene_finder->get_nearest_genes($seq_name, $pos, $strand);
+  Purpose     : Retrieve the nearest genes to a 3' end
+  Returns     : Arrayref (of DETCT::Gene objects)
+                Int (distance)
+                Int (nearest 3' end position)
+  Parameters  : String (the 3' end sequence name)
+                Int (the 3' end position)
+                Int (the 3' end strand)
+  Throws      : No exceptions
+  Comments    : Distance is positive if downstream of 3' end and negative if
+                upstream
+
+=cut
+
+sub get_nearest_genes {
+    my ( $self, $seq_name, $pos, $strand ) = @_;
+
+    my ( $transcripts, $distance, $nearest_end_pos ) =
+      $self->get_nearest_transcripts( $seq_name, $pos, $strand );
+
+    my %tmp_cache;    # Temporarily store genes by stable id
+
+    # Get all genes corresponding to these transcripts
+    foreach my $transcript ( @{$transcripts} ) {
+        $tmp_cache{ $transcript->gene->stable_id } = $transcript->gene;
+    }
+
+    my $nearest_genes = [ values %tmp_cache ];
+
+    # Sort by stable id
+    @{$nearest_genes} =
+      sort { $a->stable_id cmp $b->stable_id } @{$nearest_genes};
+
+    return $nearest_genes, $distance, $nearest_end_pos;
+}
+
+# Usage       : $self->_fill_cache_from_ensembl( $seq_name );
+# Purpose     : Fill the cache from Ensembl for a particular sequence
+# Returns     : undef
+# Parameters  : String (the sequence name)
+# Throws      : No exceptions
+# Comments    : Cache is a hashref (keyed by sequence name) of hashrefs (keyed
+#               by 3' end position) of arrayrefs of transcripts
+
+sub _fill_cache_from_ensembl {
+    my ( $self, $seq_name ) = @_;
+
+    # Skip if cache already filled
+    return if exists $cache{ id $self}->{$seq_name};
+
+    # Make sure default key exists (in case there are no genes)
+    $cache{ id $self}->{$seq_name} = {};
+
+    my $slice = $self->slice_adaptor->fetch_by_region( 'toplevel', $seq_name );
+
+    require Bio::EnsEMBL::ApiVersion;
+    my $genebuild_version = 'e' . Bio::EnsEMBL::ApiVersion::software_version();
+
+    my $ens_genes = $slice->get_all_Genes( undef, undef, 1 ); # Plus transcripts
+    foreach my $ens_gene ( @{$ens_genes} ) {
+        my $gene = DETCT::Gene->new(
+            {
+                genebuild_version => $genebuild_version,
+                stable_id         => $ens_gene->stable_id,
+                name              => $ens_gene->external_name,
+                description       => $ens_gene->description,
+                biotype           => $ens_gene->biotype,
+                seq_name          => $seq_name,
+                start             => $ens_gene->seq_region_start,
+                end               => $ens_gene->seq_region_end,
+                strand            => $ens_gene->seq_region_strand,
+            }
+        );
+
+        # Get 3' end position for each transcript
+        my $ens_transcripts = $ens_gene->get_all_Transcripts();
+        foreach my $ens_transcript ( @{$ens_transcripts} ) {
+            my $transcript = DETCT::Transcript->new(
+                {
+                    stable_id   => $ens_transcript->stable_id,
+                    name        => $ens_transcript->external_name,
+                    description => $ens_transcript->description,
+                    biotype     => $ens_transcript->biotype,
+                    seq_name    => $seq_name,
+                    start       => $ens_transcript->seq_region_start,
+                    end         => $ens_transcript->seq_region_end,
+                    strand      => $ens_transcript->seq_region_strand,
+                    gene        => $gene,
+                }
+            );
+            $gene->add_transcript($transcript);
+
+            my $pos =
+                $ens_transcript->seq_region_strand == 1
+              ? $ens_transcript->seq_region_end
+              : $ens_transcript->seq_region_start;
+
+            push @{ $cache{ id $self}->{$seq_name}->{$pos} }, $transcript;
+        }
+    }
+
+    return;
+}
+
+=method add_gene_annotation
+
+  Usage       : my $regions_ref
+                    = $gene_finder->add_gene_annotation($regions_ary_ref);
+  Purpose     : Add gene annotation to regions with 3' ends
+  Returns     : Arrayref [
+                    Arrayref [
+                        String (region sequence name),
+                        Int (region start),
+                        Int (region end),
+                        Int (region maximum read count),
+                        Float (region log probability sum),
+                        String (3' end sequence name) or undef,
+                        Int (3' end position) or undef,
+                        Int (3' end strand) or undef,
+                        Int (3' end read count) or undef,
+                        Arrayref [
+                            Int (count)
+                            ...
+                        ],
+                        Arrayref [
+                            Int (normalised count)
+                            ...
+                        ],
+                        Int (p value) or undef,
+                        Int (adjusted p value) or undef,
+                        Arrayref [
+                            Int (condition fold change) or undef,
+                            Int (log2 condition fold change) or undef,
+                        ],
+                        Arrayref [
+                            Arrayref [
+                                Int (group fold change) or undef,
+                                Int (log2 group fold change) or undef,
+                            ],
+                            ... (groups)
+                        ],
+                        Hashref {
+                            String (genebuild version) => Arrayref [
+                                Arrayref [
+                                    String (gene stable id),
+                                    String (gene name) or undef,
+                                    String (gene description) or undef,
+                                    String (gene biotype),
+                                    Int (distance to 3' end),
+                                    Arrayref [
+                                        Arrayref [
+                                            String (transcript stable id),
+                                            String (transcript biotype),
+                                        ],
+                                        ... (transcripts)
+                                    ],
+                                ],
+                                ... (genes)
+                            ],
+                        }
+                    ],
+                    ... (regions)
+                }
+  Parameters  : Arrayref (of regions)
+  Throws      : If regions are missing
+  Comments    : None
+
+=cut
+
+sub add_gene_annotation {
+    my ( $self, $regions ) = @_;
+
+    confess 'No regions specified' if !defined $regions;
+
+    my @output;
+
+    foreach my $region ( @{$regions} ) {
+
+        # Get details for region and 3' end
+        my $region_seq_name = $region->[0];
+        my $region_start    = $region->[1];
+        my $region_end      = $region->[2];
+        ## no critic (ProhibitMagicNumbers)
+        my $three_prime_seq_name = $region->[5];
+        my $three_prime_pos      = $region->[6];
+        my $three_prime_strand   = $region->[7];
+        ## use critic
+
+        my %gene_annotation = ();
+        my $genes;
+        my $distance;
+        my $nearest_end_pos;
+
+        if ( defined $three_prime_seq_name ) {
+
+            # Find nearest genes to 3' end (taking strand into account)
+            ( $genes, $distance, $nearest_end_pos ) =
+              $self->get_nearest_genes( $three_prime_seq_name, $three_prime_pos,
+                $three_prime_strand );
+        }
+
+        # Add annotation if got genes
+        foreach my $gene ( @{$genes} ) {
+            my @transcripts;
+            foreach my $transcript ( @{ $gene->get_all_transcripts() } ) {
+
+                # Only add those transcripts nearest to 3' end
+                ## no critic (ProhibitMagicNumbers)
+                if (
+                    (
+                           $transcript->strand == 1
+                        && $transcript->end == $nearest_end_pos
+                    )
+                    || (   $transcript->strand == -1
+                        && $transcript->start == $nearest_end_pos )
+                  )
+                {
+                    ## use critic
+                    push @transcripts,
+                      [ $transcript->stable_id, $transcript->biotype, ];
+                }
+            }
+            push @{ $gene_annotation{ $gene->genebuild_version } },
+              [
+                $gene->stable_id, $gene->name, $gene->description,
+                $gene->biotype,   $distance,   \@transcripts,
+              ];
+        }
+
+        push @{$region}, \%gene_annotation;
+        push @output, $region;
+    }
+
+    return \@output;
+}
+
+1;
diff --git a/lib/DETCT/Misc/BAM.pm b/lib/DETCT/Misc/BAM.pm
new file mode 100644
index 0000000..ae059a0
--- /dev/null
+++ b/lib/DETCT/Misc/BAM.pm
@@ -0,0 +1,1332 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Misc::BAM;
+## use critic
+
+# ABSTRACT: Miscellaneous functions for interacting with BAM files
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-20
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Bio::DB::Sam;
+use List::Util qw( min );
+use Data::Compare;
+use DETCT::Misc::Tag;
+
+use base qw( Exporter );
+our @EXPORT_OK = qw(
+  get_reference_sequence_lengths
+  get_sequence
+  count_tags
+  bin_reads
+  get_read_peaks
+  get_three_prime_ends
+  merge_three_prime_ends
+  filter_three_prime_ends
+  choose_three_prime_end
+  count_reads
+  merge_read_counts
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Constants
+
+# Regexps for checking for polyA
+Readonly our @POLYA_REGEXP => (
+    qr/\A AAA.AAA... \z/xms,
+    qr/\A AAA.AA.A.. \z/xms,
+    qr/\A AAA.A.AA.. \z/xms,
+    qr/\A AA.AAAA... \z/xms,
+    qr/\A AA.AAA.A.. \z/xms,
+    qr/\A AA.A.AAA.. \z/xms,
+    qr/\A A.AAAAA... \z/xms,
+    qr/\A A.AAAA.A.. \z/xms,
+    qr/\A A.AAA.AA.. \z/xms,
+    qr/\A A.AA.AAA.. \z/xms,
+    qr/\A A.A.AAAA.. \z/xms,
+    qr/\A AA.AA.AA.. \z/xms,
+);
+
+=func get_reference_sequence_lengths
+
+  Usage       : my %length_of
+                    = DETCT::Misc::BAM::get_reference_sequence_lengths($bam_file);
+  Purpose     : Get length of each reference sequence from a BAM file
+  Returns     : Hash (
+                    seq_region => length
+                )
+  Parameters  : String (the BAM file)
+  Throws      : If BAM file is missing
+  Comments    : None
+
+=cut
+
+sub get_reference_sequence_lengths {
+    my ($bam_file) = @_;
+
+    confess 'No BAM file specified' if !defined $bam_file;
+
+    my $sam = Bio::DB::Sam->new( -bam => $bam_file );
+
+    my %length_of;
+
+    foreach my $seq_id ( $sam->seq_ids ) {
+        $length_of{$seq_id} = $sam->length($seq_id);
+    }
+
+    return %length_of;
+}
+
+=func get_sequence
+
+  Usage       : my $seq = DETCT::Misc::BAM::get_sequence( {
+                    fasta_index => $fai,
+                    seq_name    => '1',
+                    start       => 1,
+                    end         => 1000,
+                    strand      => 1,
+                } );
+  Purpose     : Get sequence from FASTA file
+  Returns     : String (sequence)
+  Parameters  : Hashref {
+                    fasta_index => Bio::DB::Sam::Fai
+                    ref_fasta   => String (the FASTA file)
+                    seq_name    => String (the sequence name)
+                    start       => Int (the sequence start)
+                    end         => Int (the sequence end)
+                    strand      => Int (the sequence strand)
+                }
+  Throws      : If FASTA index and file are both missing
+                If sequence name is missing
+                If sequence start is missing
+                If sequence end is missing
+                If sequence strand is missing
+  Comments    : None
+
+=cut
+
+sub get_sequence {
+    my ($arg_ref) = @_;
+
+    confess 'No FASTA index or FASTA file specified'
+      if !defined $arg_ref->{fasta_index} && !defined $arg_ref->{ref_fasta};
+    confess 'No sequence name specified'   if !defined $arg_ref->{seq_name};
+    confess 'No sequence start specified'  if !defined $arg_ref->{start};
+    confess 'No sequence end specified'    if !defined $arg_ref->{end};
+    confess 'No sequence strand specified' if !defined $arg_ref->{strand};
+
+    my $fai =
+        $arg_ref->{fasta_index}
+      ? $arg_ref->{fasta_index}
+      : Bio::DB::Sam::Fai->load( $arg_ref->{ref_fasta} );
+
+    my $query = sprintf '%s:%d-%d', $arg_ref->{seq_name}, $arg_ref->{start},
+      $arg_ref->{end};
+
+    my $seq = uc $fai->fetch($query);
+
+    if ( $arg_ref->{strand} == -1 ) {    ## no critic (ProhibitMagicNumbers)
+        $seq = reverse $seq;
+        $seq =~ tr/ACGT/TGCA/;
+    }
+
+    return $seq;
+}
+
+=func count_tags
+
+  Usage       : my $count_ref = DETCT::Misc::BAM::count_tags( {
+                    bam_file           => $bam_file,
+                    mismatch_threshold => 2,
+                    seq_name           => '1',
+                    start              => 1,
+                    end                => 1000,
+                    tags               => ['NNNNBGAGGC', 'NNNNBAGAAG'],
+                } );
+  Purpose     : Count tags and random bases in a BAM file
+  Returns     : Hashref {
+                    String (tag) => Hashref {
+                        String (random bases) => Int (count)
+                    }
+                }
+  Parameters  : Hashref {
+                    bam_file           => String (the BAM file)
+                    mismatch_threshold => Int (the mismatch threshold)
+                    seq_name           => String (the sequence name)
+                    start              => Int (the start) or undef
+                    end                => Int (the end) or undef
+                    tags               => Arrayref of strings (the tags)
+                }
+  Throws      : If BAM file is missing
+                If mismatch threshold is missing
+                If tags are missing
+  Comments    : None
+
+=cut
+
+sub count_tags {
+    my ($arg_ref) = @_;
+
+    confess 'No BAM file specified' if !defined $arg_ref->{bam_file};
+    confess 'No mismatch threshold specified'
+      if !defined $arg_ref->{mismatch_threshold};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No tags specified'          if !defined $arg_ref->{tags};
+
+    my @tags = @{ $arg_ref->{tags} };
+
+    # Convert tags to regular expressions
+    my %re_for = DETCT::Misc::Tag::convert_tag_to_regexp(@tags);
+
+    # Count random bases per tag
+    my %random_count_for;
+    foreach my $tag (@tags) {
+        my @random = $tag =~ m/[NRYKMSWBDHV]/xmsg;
+        $random_count_for{$tag} = scalar @random;
+    }
+
+    my $sam = Bio::DB::Sam->new( -bam => $arg_ref->{bam_file} );
+
+    my %count;
+
+    # Callback for filtering
+    my $callback = sub {
+        my ($alignment) = @_;
+        return if !is_read2($alignment);
+        return if is_duplicate($alignment);
+        return if $alignment->unmapped;
+        return
+          if above_mismatch_threshold( $alignment,
+            $arg_ref->{mismatch_threshold} );
+
+        # Match tag
+        my ($tag_in_read) = $alignment->query->name =~ m/[#] ([AGCT]+) \z/xmsg;
+        return if !$tag_in_read;
+      TAG: foreach my $tag ( sort keys %re_for ) {
+            my $regexps = $re_for{$tag};
+            foreach my $re ( @{$regexps} ) {
+                if ( $tag_in_read =~ $re ) {
+                    my $random = substr $tag_in_read, 0,
+                      $random_count_for{$tag};
+                    $count{$tag}{$random}++;
+                    last TAG;
+                }
+            }
+        }
+
+        return;
+    };
+
+    # Construct region
+    my $region = $arg_ref->{seq_name};
+    if ( exists $arg_ref->{start} ) {
+        $region .= q{:} . $arg_ref->{start};
+        if ( exists $arg_ref->{end} ) {
+            $region .= q{-} . $arg_ref->{end};
+        }
+    }
+
+    $sam->fetch( $region, $callback );
+
+    return \%count;
+}
+
+=func bin_reads
+
+  Usage       : my $bin_ref = DETCT::Misc::BAM::bin_reads( {
+                    bam_file           => $bam_file,
+                    mismatch_threshold => 2,
+                    bin_size           => 100,
+                    seq_name           => '1',
+                    tags               => ['NNNNBGAGGC', 'NNNNBAGAAG'],
+                } );
+  Purpose     : Bin reads in a BAM file
+  Returns     : Hashref {
+                    Int (bin) => Int (count)
+                }
+  Parameters  : Hashref {
+                    bam_file           => String (the BAM file)
+                    mismatch_threshold => Int (the mismatch threshold)
+                    bin_size           => Int (the bin size)
+                    seq_name           => String (the sequence name)
+                    tags               => Arrayref of strings (the tags)
+                }
+  Throws      : If BAM file is missing
+                If mismatch threshold is missing
+                If bin size is missing
+                If sequence name is missing
+                If tags are missing
+  Comments    : None
+
+=cut
+
+sub bin_reads {
+    my ($arg_ref) = @_;
+
+    confess 'No BAM file specified' if !defined $arg_ref->{bam_file};
+    confess 'No mismatch threshold specified'
+      if !defined $arg_ref->{mismatch_threshold};
+    confess 'No bin size specified'      if !defined $arg_ref->{bin_size};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No tags specified'          if !defined $arg_ref->{tags};
+
+    my @tags = @{ $arg_ref->{tags} };
+
+    # Convert tags to regular expressions
+    my %re_for = DETCT::Misc::Tag::convert_tag_to_regexp(@tags);
+
+    my $sam = Bio::DB::Sam->new( -bam => $arg_ref->{bam_file} );
+
+    my %read_count_for;
+
+    # Callback for filtering
+    my $callback = sub {
+        my ($alignment) = @_;
+        return if !is_read2($alignment);
+        return if is_duplicate($alignment);
+        return if $alignment->unmapped;
+        return
+          if above_mismatch_threshold( $alignment,
+            $arg_ref->{mismatch_threshold} );
+        return if !matched_tag( $alignment, \%re_for );
+
+        # Read can span multiple bins
+        my $start_bin = int( ( $alignment->start - 1 ) / $arg_ref->{bin_size} );
+        my $end_bin   = int( ( $alignment->end - 1 ) / $arg_ref->{bin_size} );
+
+        foreach my $bin ( $start_bin .. $end_bin ) {
+            $read_count_for{$bin}++;
+        }
+
+        return;
+    };
+
+    $sam->fetch( $arg_ref->{seq_name}, $callback );
+
+    return { $arg_ref->{seq_name} => \%read_count_for };
+}
+
+=func get_read_peaks
+
+  Usage       : my $peaks_ref = DETCT::Misc::BAM::get_read_peaks( {
+                    bam_file           => $bam_file,
+                    mismatch_threshold => 2,
+                    peak_buffer_width  => 100,
+                    seq_name           => '1',
+                    tags               => ['NNNNBGAGGC', 'NNNNBAGAAG'],
+                } );
+  Purpose     : Get read peaks (overlapping reads) for a BAM file
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (peak start),
+                            Int (peak end),
+                            Int (peak read count),
+                        ],
+                        ... (peaks)
+                    ]
+                }
+  Parameters  : Hashref {
+                    bam_file           => String (the BAM file)
+                    mismatch_threshold => Int (the mismatch threshold)
+                    peak_buffer_width  => Int (the peak buffer size),
+                    seq_name           => String (the sequence name)
+                    tags               => Arrayref of strings (the tags)
+                }
+  Throws      : If BAM file is missing
+                If mismatch threshold is missing
+                If peak buffer width is missing
+                If sequence name is missing
+                If tags are missing
+  Comments    : BAM file must be sorted by coordinate
+
+=cut
+
+sub get_read_peaks {
+    my ($arg_ref) = @_;
+
+    confess 'No BAM file specified' if !defined $arg_ref->{bam_file};
+    confess 'No mismatch threshold specified'
+      if !defined $arg_ref->{mismatch_threshold};
+    confess 'No peak buffer width specified'
+      if !defined $arg_ref->{peak_buffer_width};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No tags specified'          if !defined $arg_ref->{tags};
+
+    my @tags = @{ $arg_ref->{tags} };
+
+    # Convert tags to regular expressions
+    my %re_for = DETCT::Misc::Tag::convert_tag_to_regexp(@tags);
+
+    my $sam = Bio::DB::Sam->new( -bam => $arg_ref->{bam_file} );
+
+    # Peak variables
+    my @peaks;
+    my $current_peak_read_count;
+    my $current_peak_start;
+    my $current_peak_end;
+
+    # Read variables
+    my $current_read_start;
+    my $current_read_end;
+
+    # Callback for filtering
+    my $callback = sub {
+        my ($alignment) = @_;
+        return if !is_read2($alignment);
+        return if is_duplicate($alignment);
+        return if $alignment->unmapped;
+        return
+          if above_mismatch_threshold( $alignment,
+            $arg_ref->{mismatch_threshold} );
+        return if !matched_tag( $alignment, \%re_for );
+
+        $current_read_start = $alignment->start;
+        $current_read_end   = $alignment->end;
+
+        # We're starting the first peak
+        if ( !defined $current_peak_start ) {
+            $current_peak_start      = $current_read_start;
+            $current_peak_end        = $current_read_end;
+            $current_peak_read_count = 1;
+            return;
+        }
+
+        # Extend or finish current peak?
+        if ( $current_read_start - $current_peak_end <
+            $arg_ref->{peak_buffer_width} )
+        {
+            # Extend current peak
+            $current_peak_end = $current_read_end;
+            $current_peak_read_count++;
+        }
+        else {
+            # Finish current peak
+            push @peaks,
+              [
+                $current_peak_start, $current_peak_end,
+                $current_peak_read_count
+              ];
+
+            # Start new peak
+            $current_peak_start      = $current_read_start;
+            $current_peak_end        = $current_read_end;
+            $current_peak_read_count = 1;
+        }
+
+        return;
+    };
+
+    # Identify peaks (where peaks are read 2s separated by a buffer of specific
+    # size)
+    $sam->fetch( $arg_ref->{seq_name}, $callback );
+
+    # Finish last peak
+    if ($current_peak_read_count) {
+        push @peaks,
+          [ $current_peak_start, $current_peak_end, $current_peak_read_count ];
+    }
+
+    return { $arg_ref->{seq_name} => \@peaks };
+}
+
+=func get_three_prime_ends
+
+  Usage       : my $three_prime_ref = DETCT::Misc::BAM::get_three_prime_ends( {
+                    bam_file           => $bam_file,
+                    mismatch_threshold => 2,
+                    seq_name           => '1',
+                    tags               => ['NNNNBGAGGC', 'NNNNBAGAAG'],
+                    regions            => $regions_ary_ref,
+                } );
+  Purpose     : Get all 3' ends for a list of regions
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            Arrayref [
+                                Arrayref [
+                                    String (3' end sequence name),
+                                    Int (3' end position),
+                                    Int (3' end strand),
+                                    Int (3' end read count),
+                                ],
+                                ... (3' ends)
+                            ],
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    bam_file           => String (the BAM file)
+                    mismatch_threshold => Int (the mismatch threshold)
+                    seq_name           => String (the sequence name)
+                    tags               => Arrayref of strings (the tags)
+                    regions            => Arrayref (of regions)
+                }
+  Throws      : If BAM file is missing
+                If mismatch threshold is missing
+                If sequence name is missing
+                If tags are missing
+                If regions are missing
+  Comments    : regions parameter is a list of regions, unlike the regions
+                parameter for merge_three_prime_ends where it is a list of lists
+                of regions
+
+=cut
+
+sub get_three_prime_ends {
+    my ($arg_ref) = @_;
+
+    confess 'No BAM file specified' if !defined $arg_ref->{bam_file};
+    confess 'No mismatch threshold specified'
+      if !defined $arg_ref->{mismatch_threshold};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No tags specified'          if !defined $arg_ref->{tags};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+
+    my @tags = @{ $arg_ref->{tags} };
+
+    # Convert tags to regular expressions
+    my %re_for = DETCT::Misc::Tag::convert_tag_to_regexp(@tags);
+
+    my $sam = Bio::DB::Sam->new( -bam => $arg_ref->{bam_file} );
+
+    my @regions_with_three_prime_ends;
+
+    foreach my $region ( @{ $arg_ref->{regions} } ) {
+        my ( $start, $end, $max_read_count, $log_prob_sum ) = @{$region};
+
+        my %count_for;
+
+        # Get all second reads in region
+        my $read2_alignments = $sam->features(
+            -seq_id   => $arg_ref->{seq_name},
+            -start    => $start,
+            -end      => $end,
+            -flags    => { SECOND_MATE => 1 },
+            -iterator => 1,
+        );
+
+        # Get all 3' ends
+        while ( my $alignment = $read2_alignments->next_seq ) {
+            next if is_duplicate($alignment);
+
+            # next if $alignment->unmapped; # Not needed; always mapped
+            next if $alignment->munmapped;    # Want read 1 mapped too
+            next
+              if above_mismatch_threshold( $alignment,
+                $arg_ref->{mismatch_threshold} );
+            next if !matched_tag( $alignment, \%re_for );
+
+            # Skip if 3' end is on a different chromosome
+            # Hopefully not significant number of real 3' ends on different
+            # chromosomes because are hard to deal with
+            # If reads are on different chromosomes then TLEN will be 0 and
+            # mate_end will return undefined (i.e. can't get 3' end position
+            # without querying by read name, which is slow for a BAM file
+            # sorted by coordinate)
+            next if $alignment->mate_seq_id ne $arg_ref->{seq_name};
+
+            # Identify 3' end position and strand based on alignment of read 1
+            my $three_prime_seq = $alignment->mate_seq_id;
+            my $three_prime_pos;
+            my $three_prime_strand;
+            if ( $alignment->mstrand == 1 ) {
+                $three_prime_pos = $alignment->mate_start;
+                $three_prime_strand = -1;    ## no critic (ProhibitMagicNumbers)
+            }
+            else {
+                $three_prime_pos    = $alignment->mate_end;
+                $three_prime_strand = 1;
+            }
+
+            # Count number of reads supporting each 3' end
+            my $three_prime = join q{:}, $three_prime_seq, $three_prime_pos,
+              $three_prime_strand;
+            $count_for{$three_prime}++;
+        }
+
+        # Turn counts into an array
+        my @three_prime_ends;
+        foreach my $three_prime (
+            reverse sort { $count_for{$a} <=> $count_for{$b} }
+            keys %count_for
+          )
+        {
+            my ( $seq, $pos, $strand ) = split /:/xms, $three_prime;
+            push @three_prime_ends,
+              [ $seq, $pos, $strand, $count_for{$three_prime} ];
+        }
+
+        # Add three prime ends to regions
+        push @regions_with_three_prime_ends,
+          [ $start, $end, $max_read_count, $log_prob_sum, \@three_prime_ends, ];
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+=func merge_three_prime_ends
+
+  Usage       : my $three_prime_ref
+                    = DETCT::Misc::BAM::merge_three_prime_ends( {
+                    seq_name => '1',
+                    regions  => $regions_ary_ref,
+                } );
+  Purpose     : Merge multiple lists of regions with 3' ends
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            Arrayref [
+                                Arrayref [
+                                    String (3' end sequence name),
+                                    Int (3' end position),
+                                    Int (3' end strand),
+                                    Int (3' end read count),
+                                ],
+                                ... (3' ends)
+                            ],
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    seq_name => String (the sequence name)
+                    regions  => Arrayref (of arrayrefs of regions)
+                }
+  Throws      : If sequence name is missing
+                If regions are missing
+                If each list of regions doesn't have same number of regions
+                If regions are not in the same order or not the same in each
+                list
+  Comments    : regions parameter is a list of lists of regions, unlike
+                the regions parameter for get_three_prime_ends where it is a
+                list of regions
+
+=cut
+
+sub merge_three_prime_ends {
+    my ($arg_ref) = @_;
+
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+
+    my @list_of_lists_of_regions = @{ $arg_ref->{regions} };
+
+    # No need to merge if only one list of regions
+    my $num_lists = scalar @list_of_lists_of_regions;
+    if ( $num_lists == 1 ) {
+        return { $arg_ref->{seq_name} => $list_of_lists_of_regions[0] };
+    }
+
+    # Ensure each list has same number of regions as first list
+    my $num_regions1 = scalar @{ $list_of_lists_of_regions[0] };
+    foreach my $list_index ( 1 .. $num_lists - 1 ) {
+        my $num_regions2 = scalar @{ $list_of_lists_of_regions[$list_index] };
+        if ( $num_regions1 != $num_regions2 ) {
+            confess 'Number of regions does not match in all lists';
+        }
+    }
+
+    my @regions_with_three_prime_ends;
+
+    # Merge all lists
+    foreach my $region_index ( 0 .. $num_regions1 - 1 ) {
+
+        # Ensure region from first list is same in each list
+        my $region1 = $list_of_lists_of_regions[0]->[$region_index];
+        my ( $start1, $end1, $max_read_count1, $log_prob_sum1 ) = @{$region1};
+        foreach my $list_index ( 1 .. $num_lists - 1 ) {
+            my $region2 =
+              $list_of_lists_of_regions[$list_index]->[$region_index];
+            my ( $start2, $end2, $max_read_count2, $log_prob_sum2 ) =
+              @{$region2};
+            if (   $start1 != $start2
+                || $end1 != $end2
+                || $max_read_count1 != $max_read_count2
+                || $log_prob_sum1 != $log_prob_sum2 )
+            {
+                confess
+                  'Regions not in the same order or not the same in each list';
+            }
+        }
+
+        # Get all the 3' ends
+        my @unmerged_three_prime_ends;
+        foreach my $list_index ( 0 .. $num_lists - 1 ) {
+            my $list   = $list_of_lists_of_regions[$list_index];
+            my $region = $list->[$region_index];
+            my ( undef, undef, undef, undef, $three_prime_ends ) = @{$region};
+            push @unmerged_three_prime_ends, @{$three_prime_ends};
+        }
+
+        # Add up counts for identical 3' ends
+        my %count_for;
+        foreach my $three_prime_end (@unmerged_three_prime_ends) {
+            my ( $seq, $pos, $strand, $read_count ) = @{$three_prime_end};
+            my $three_prime = join q{:}, $seq, $pos, $strand;
+            $count_for{$three_prime} += $read_count;
+        }
+
+        # Turn counts into an array
+        my @three_prime_ends;
+        foreach my $three_prime (
+            reverse sort { $count_for{$a} <=> $count_for{$b} }
+            keys %count_for
+          )
+        {
+            my ( $seq, $pos, $strand ) = split /:/xms, $three_prime;
+            push @three_prime_ends,
+              [ $seq, $pos, $strand, $count_for{$three_prime} ];
+        }
+
+        # Add three prime ends to regions
+        push @regions_with_three_prime_ends,
+          [
+            $start1,        $end1, $max_read_count1,
+            $log_prob_sum1, \@three_prime_ends,
+          ];
+
+        $region_index++;
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+=func filter_three_prime_ends
+
+  Usage       : my $three_prime_ref
+                    = DETCT::Misc::BAM::filter_three_prime_ends( {
+                    analysis => $analysis,
+                    seq_name => '1',
+                    regions  => $regions_ary_ref,
+                } );
+  Purpose     : Filter list of regions with 3' ends
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            Arrayref [
+                                Arrayref [
+                                    String (3' end sequence name),
+                                    Int (3' end position),
+                                    Int (3' end strand),
+                                    Int (3' end read count),
+                                ],
+                                ... (3' ends)
+                            ],
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    analysis => DETCT::Analysis
+                    seq_name => String (the sequence name)
+                    regions  => Arrayref (of regions)
+                }
+  Throws      : If analysis is missing
+                If sequence name is missing
+                If regions are missing
+  Comments    : regions parameter is a list of regions, unlike the regions
+                parameter for merge_three_prime_ends where it is a list of lists
+                of regions
+
+=cut
+
+sub filter_three_prime_ends {
+    my ($arg_ref) = @_;
+
+    confess 'No analysis specified'      if !defined $arg_ref->{analysis};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+
+    my @regions_with_three_prime_ends;
+
+    # Iterate over regions
+    foreach my $region ( @{ $arg_ref->{regions} } ) {
+        my ( $region_start, $region_end, $region_max_read_count,
+            $region_log_prob_sum, $unfiltered_three_prime_ends )
+          = @{$region};
+
+        # Filter 3' ends
+        my @three_prime_ends;
+        foreach my $three_prime_end ( @{$unfiltered_three_prime_ends} ) {
+            my ( $seq_name, $pos, $strand, $read_count ) = @{$three_prime_end};
+
+            # Must be supported by more than 3 reads
+            next if $read_count <= 3;    ## no critic (ProhibitMagicNumbers)
+
+            # Check 10 bp downstream of 3' end for polyA
+            my $ten_bp_start;
+            my $ten_bp_end;
+            if ( $strand == 1 ) {
+                $ten_bp_start = $pos + 1;
+                $ten_bp_end   = $pos + 10;   ## no critic (ProhibitMagicNumbers)
+            }
+            else {
+                $ten_bp_start = $pos - 10;   ## no critic (ProhibitMagicNumbers)
+                $ten_bp_end   = $pos - 1;
+            }
+            my $ten_bp_seq =
+              $arg_ref->{analysis}
+              ->get_subsequence( $seq_name, $ten_bp_start, $ten_bp_end,
+                $strand );
+
+            # Check if 10 bp downstream is polyA
+            next if is_polya($ten_bp_seq);
+
+            push @three_prime_ends, $three_prime_end;
+        }
+
+        # Add three prime ends to regions
+        push @regions_with_three_prime_ends,
+          [
+            $region_start,          $region_end,
+            $region_max_read_count, $region_log_prob_sum,
+            \@three_prime_ends,
+          ];
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+=func choose_three_prime_end
+
+  Usage       : my $three_prime_ref
+                    = DETCT::Misc::BAM::choose_three_prime_end( {
+                    seq_name => '1',
+                    regions  => $regions_ary_ref,
+                } );
+  Purpose     : Filter and adjust list of regions and choose best 3' end
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            String (3' end sequence name) or undef,
+                            Int (3' end position) or undef,
+                            Int (3' end strand) or undef,
+                            Int (3' end read count) or undef,
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    seq_name => String (the sequence name)
+                    regions  => Arrayref (of regions)
+                }
+  Throws      : If sequence name is missing
+                If regions are missing
+  Comments    : regions parameter is a list of regions, unlike the regions
+                parameter for merge_three_prime_ends where it is a list of lists
+                of regions
+
+=cut
+
+sub choose_three_prime_end {
+    my ($arg_ref) = @_;
+
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+
+    my @regions_with_three_prime_ends;
+
+    # Iterate over regions
+    foreach my $region ( @{ $arg_ref->{regions} } ) {
+        my ( $region_start, $region_end, $region_max_read_count,
+            $region_log_prob_sum, $three_prime_ends )
+          = @{$region};
+
+        my (
+            $three_prime_seq_name, $three_prime_pos,
+            $three_prime_strand,   $three_prime_read_count
+        );
+
+        @{$three_prime_ends} = reverse sort {
+            _sort_three_prime_end( $a, $b, $arg_ref->{seq_name}, $region_start,
+                $region_end )
+        } @{$three_prime_ends};
+
+        # Get best 3' end (highest read count)
+        if ( @{$three_prime_ends} ) {
+            (
+                $three_prime_seq_name, $three_prime_pos,
+                $three_prime_strand,   $three_prime_read_count
+            ) = @{ $three_prime_ends->[0] };
+        }
+
+        # Reduce size of region if appropriate
+        ## no critic (ProhibitMagicNumbers)
+        if ( defined $three_prime_seq_name
+            && $three_prime_seq_name eq $arg_ref->{seq_name} )
+        {
+            if (   $three_prime_strand == 1
+                && $three_prime_pos < $region_end
+                && $three_prime_pos > $region_start )
+            {
+                $region_end = $three_prime_pos;
+            }
+            elsif ($three_prime_strand == -1
+                && $three_prime_pos > $region_start
+                && $three_prime_pos < $region_end )
+            {
+                $region_start = $three_prime_pos;
+            }
+        }
+        ## use critic
+
+        # Add three prime ends to regions
+        push @regions_with_three_prime_ends,
+          [
+            $region_start,          $region_end,
+            $region_max_read_count, $region_log_prob_sum,
+            $three_prime_seq_name,  $three_prime_pos,
+            $three_prime_strand,    $three_prime_read_count,
+          ];
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+# Sort by read count then distance to region
+sub _sort_three_prime_end {
+    my ( $a, $b, $seq_name, $region_start, $region_end ) = @_;
+
+    my $seq_name_a = $a->[0];
+    my $seq_name_b = $b->[0];
+    my $pos_a      = $a->[1];
+    my $pos_b      = $b->[1];
+    ## no critic (ProhibitMagicNumbers)
+    my $read_count_a = $a->[3];
+    my $read_count_b = $b->[3];
+    ## use critic
+
+    # Get minimum distance to region
+    my $dist_a = min( abs $region_start - $pos_a, abs $region_end - $pos_a );
+    my $dist_b = min( abs $region_start - $pos_b, abs $region_end - $pos_b );
+
+    # Make sure 3' end is on same chromosome as region
+    # (1e+100 is bigger than any chromosome can be to ensure sorting last)
+    if ( $seq_name_a ne $seq_name ) {
+        $dist_a = 1e+100;    ## no critic (ProhibitMagicNumbers)
+    }
+    if ( $seq_name_b ne $seq_name ) {
+        $dist_b = 1e+100;    ## no critic (ProhibitMagicNumbers)
+    }
+
+    return $read_count_a <=> $read_count_b || $dist_b <=> $dist_a;
+}
+
+=func count_reads
+
+  Usage       : my $count_ref = DETCT::Misc::BAM::count_reads( {
+                    bam_file           => $bam_file,
+                    mismatch_threshold => 2,
+                    seq_name           => '1',
+                    regions            => $regions_ary_ref,
+                    tags               => ['NNNNBGAGGC', 'NNNNBAGAAG'],
+                } );
+  Purpose     : Count reads in regions of a BAM file
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            String (3' end sequence name) or undef,
+                            Int (3' end position) or undef,
+                            Int (3' end strand) or undef,
+                            Int (3' end read count) or undef,
+                            Hashref {
+                                String (tag) => Int (count)
+                            }
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    bam_file           => String (the BAM file)
+                    mismatch_threshold => Int (the mismatch threshold)
+                    seq_name           => String (the sequence name) or undef
+                    regions            => Arrayref (of regions)
+                    tags               => Arrayref of strings (the tags)
+                }
+  Throws      : If BAM file is missing
+                If mismatch threshold is missing
+                If sequence name is missing
+                If regions are missing
+                If tags are missing
+  Comments    : regions parameter is a list of regions, unlike the regions
+                parameter for merge_read_counts where it is a hash keyed by BAM
+                file with values being lists of regions
+
+=cut
+
+sub count_reads {
+    my ($arg_ref) = @_;
+
+    confess 'No BAM file specified' if !defined $arg_ref->{bam_file};
+    confess 'No mismatch threshold specified'
+      if !defined $arg_ref->{mismatch_threshold};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+    confess 'No tags specified'          if !defined $arg_ref->{tags};
+
+    my @tags = @{ $arg_ref->{tags} };
+
+    # Convert tags to regular expressions
+    my %re_for = DETCT::Misc::Tag::convert_tag_to_regexp(@tags);
+
+    my $sam = Bio::DB::Sam->new( -bam => $arg_ref->{bam_file} );
+
+    my @regions_with_three_prime_ends;
+
+    # Iterate over regions
+    foreach my $region ( @{ $arg_ref->{regions} } ) {
+        my (
+            $region_start,          $region_end,
+            $region_max_read_count, $region_log_prob_sum,
+            $three_prime_seq_name,  $three_prime_pos,
+            $three_prime_strand,    $three_prime_read_count
+        ) = @{$region};
+
+        my %count = map { $_ => 0 } @tags;
+
+        # Get first read from each pair
+        my $read2_alignments = $sam->features(
+            -seq_id   => $arg_ref->{seq_name},
+            -start    => $region_start,
+            -end      => $region_end,
+            -flags    => { SECOND_MATE => 1 },
+            -iterator => 1,
+        );
+        while ( my $alignment = $read2_alignments->next_seq ) {
+            next if is_duplicate($alignment);
+
+            #next if $alignment->unmapped; # Not needed; always mapped
+            next
+              if above_mismatch_threshold( $alignment,
+                $arg_ref->{mismatch_threshold} );
+
+            # Match tag
+            my ($tag_in_read) =
+              $alignment->query->name =~ m/[#] ([AGCT]+) \z/xmsg;
+            next if !$tag_in_read;
+          TAG: foreach my $tag ( sort keys %re_for ) {
+                my $regexps = $re_for{$tag};
+                foreach my $re ( @{$regexps} ) {
+                    if ( $tag_in_read =~ $re ) {
+                        $count{$tag}++;
+                        last TAG;
+                    }
+                }
+            }
+        }
+
+        # Add read counts to regions
+        push @regions_with_three_prime_ends,
+          [
+            $region_start,          $region_end,
+            $region_max_read_count, $region_log_prob_sum,
+            $three_prime_seq_name,  $three_prime_pos,
+            $three_prime_strand,    $three_prime_read_count,
+            \%count,
+          ];
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+=func merge_read_counts
+
+  Usage       : my $count_ref
+                    = DETCT::Misc::BAM::merge_read_counts( {
+                    seq_name => '1',
+                    regions  => $regions_hash_ref,
+                    samples  => $samples_ary_ref,
+                } );
+  Purpose     : Merge multiple lists of regions with read counts
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                            String (3' end sequence name) or undef,
+                            Int (3' end position) or undef,
+                            Int (3' end strand) or undef,
+                            Int (3' end read count) or undef,
+                            Arrayref [
+                                Int (count)
+                                ...
+                            ]
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    seq_name => String (the sequence name)
+                    regions  => Arrayref (of arrayrefs of regions)
+                    samples  => Arrayref (of samples)
+                }
+  Throws      : If sequence name is missing
+                If regions are missing
+                If samples are missing
+                If each list of regions doesn't have same number of regions
+                If regions are not in the same order or not the same in each
+                list
+  Comments    : regions parameter is a hash keyed by BAM file with values being
+                lists of regions, unlike the regions parameter for count_reads
+                where it is a list of regions
+
+=cut
+
+sub merge_read_counts {
+    my ($arg_ref) = @_;
+
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No regions specified'       if !defined $arg_ref->{regions};
+    confess 'No samples specified'       if !defined $arg_ref->{samples};
+
+    my %hash_of_lists_of_regions = %{ $arg_ref->{regions} };
+
+    # Ensure each list has same number of regions
+    my @bam_files    = keys %hash_of_lists_of_regions;
+    my $num_regions1 = scalar @{ $hash_of_lists_of_regions{ $bam_files[0] } };
+    foreach my $list_index ( 1 .. scalar @bam_files - 1 ) {
+        my $num_regions2 =
+          scalar @{ $hash_of_lists_of_regions{ $bam_files[$list_index] } };
+        if ( $num_regions1 != $num_regions2 ) {
+            confess 'Number of regions does not match in all lists';
+        }
+    }
+
+    # Get index for each sample
+    my %sample_index_for;
+    my $index = 0;
+    foreach my $sample ( @{ $arg_ref->{samples} } ) {
+        my $bam_file = $sample->bam_file;
+        my $tag      = $sample->tag;
+        $sample_index_for{$bam_file}{$tag} = $index;
+        $index++;
+    }
+
+    my @regions_with_three_prime_ends;
+
+    # Merge all lists
+    foreach my $region_index ( 0 .. $num_regions1 - 1 ) {
+
+        # Ensure regions are the same in each list
+        my $region1 =
+          $hash_of_lists_of_regions{ $bam_files[0] }->[$region_index];
+        my @region1 = @{$region1}[ 0 .. 7 ]; ## no critic (ProhibitMagicNumbers)
+        foreach my $list_index ( 1 .. scalar @bam_files - 1 ) {
+            my $region2 =
+              $hash_of_lists_of_regions{ $bam_files[$list_index] }
+              ->[$region_index];
+
+            # Check first 8 fields of each region are identical
+            my @region2 =
+              @{$region2}[ 0 .. 7 ];         ## no critic (ProhibitMagicNumbers)
+            if ( !Compare( \@region1, \@region2 ) ) {
+                confess
+                  'Regions not in the same order or not the same in each list';
+            }
+        }
+
+        my @read_counts;
+
+        # Get read count for each BAM file / tag
+        foreach my $bam_file (@bam_files) {
+            my $region = $hash_of_lists_of_regions{$bam_file}->[$region_index];
+            my $read_counts_ref = $region->[-1];    # Read counts are last field
+            foreach my $tag ( keys %{$read_counts_ref} ) {
+                my $read_count = $read_counts_ref->{$tag};
+                if ( !exists $sample_index_for{$bam_file}{$tag} ) {
+                    confess "Unknown BAM file ($bam_file) / tag ($tag) pair";
+                }
+                my $sample_index = $sample_index_for{$bam_file}{$tag};
+                $read_counts[$sample_index] = $read_count;
+            }
+        }
+
+        push @regions_with_three_prime_ends, [ @region1, \@read_counts ];
+
+        $region_index++;
+    }
+
+    return { $arg_ref->{seq_name} => \@regions_with_three_prime_ends };
+}
+
+=func matched_tag
+
+  Usage       : next if !matched_tag($alignment, \%re_for);
+  Purpose     : Check if alignment doesn't match required tags
+  Returns     : 1 or 0
+  Parameters  : Bio::DB::Bam::Alignment or Bio::DB::Bam::AlignWrapper
+              : Hashref of regular expressions
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub matched_tag {
+    my ( $alignment, $re_for ) = @_;
+
+    my $got_match = 0;
+
+    # Match tag
+    my ($tag_in_read) = $alignment->query->name =~ m/[#] ([AGCT]+) \z/xmsg;
+    if ($tag_in_read) {
+      TAG: foreach my $tag ( sort keys %{$re_for} ) {
+            my $regexps = $re_for->{$tag};
+            foreach my $re ( @{$regexps} ) {
+                if ( $tag_in_read =~ $re ) {
+                    $got_match = 1;
+                    last TAG;
+                }
+            }
+        }
+    }
+
+    return $got_match;
+}
+
+=func is_read2
+
+  Usage       : next if is_read2($alignment);
+  Purpose     : Check if alignment is from read 2 (not read 1)
+  Returns     : 1 or 0
+  Parameters  : Bio::DB::Bam::AlignWrapper
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub is_read2 {
+    my ($alignment) = @_;
+
+    return ( $alignment->get_tag_values('FLAGS') =~ m/\bSECOND_MATE\b/xms )
+      ? 1
+      : 0;
+}
+
+=func is_duplicate
+
+  Usage       : next if is_duplicate($alignment);
+  Purpose     : Check if alignment is marked as a duplicate
+  Returns     : 1 or 0
+  Parameters  : Bio::DB::Bam::AlignWrapper
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub is_duplicate {
+    my ($alignment) = @_;
+
+    return ( $alignment->get_tag_values('FLAGS') =~ m/\bDUPLICATE\b/xms )
+      ? 1
+      : 0;
+}
+
+=func above_mismatch_threshold
+
+  Usage       : next if above_mismatch_threshold($alignment, 2);
+  Purpose     : Check if alignment has too many mismatches
+  Returns     : 1 or 0
+  Parameters  : Bio::DB::Bam::Alignment or Bio::DB::Bam::AlignWrapper
+              : Int (mismatch threshold)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub above_mismatch_threshold {
+    my ( $alignment, $threshold ) = @_;
+
+    # Count soft clipped bases
+    my $cigar_ref          = $alignment->cigar_array;
+    my $soft_clipped_bases = 0;
+    foreach my $pair_ref ( @{$cigar_ref} ) {
+        my ( $op, $count ) = @{$pair_ref};
+        if ( $op eq q{S} ) {
+            $soft_clipped_bases += $count;
+        }
+    }
+
+    # Get edit distance / number of mismatches
+    my $nm = $alignment->aux_get('NM');
+
+    # Check if above mismatch threshold
+    return ( $nm + $soft_clipped_bases > $threshold ) ? 1 : 0;
+}
+
+=func is_polya
+
+  Usage       : next if is_polya($seq);
+  Purpose     : Check if sequence contains polyA
+  Returns     : 1 or 0
+  Parameters  : String (sequence)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub is_polya {
+    my ($seq) = @_;
+
+    my $is_polya = 0;
+
+    # Check for more than 3 As at start
+    if ( $seq =~ m/\A AAAA /xms ) {
+        $is_polya = 1;
+    }
+
+    # Check for more than 6 As in total
+    if ( !$is_polya ) {
+        my $a = $seq =~ tr/A/A/;
+        if ( $a > 6 ) {    ## no critic (ProhibitMagicNumbers)
+            $is_polya = 1;
+        }
+    }
+
+    # Check specific patterns for polyA
+    if ( !$is_polya ) {
+        foreach my $regexp (@POLYA_REGEXP) {
+            if ( $seq =~ $regexp ) {
+                $is_polya = 1;
+                last;
+            }
+        }
+    }
+
+    return $is_polya;
+}
+
+1;
diff --git a/lib/DETCT/Misc/Output.pm b/lib/DETCT/Misc/Output.pm
new file mode 100644
index 0000000..18fb537
--- /dev/null
+++ b/lib/DETCT/Misc/Output.pm
@@ -0,0 +1,738 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Misc::Output;
+## use critic
+
+# ABSTRACT: Miscellaneous functions for outputting data
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-11-25
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use File::Spec;
+use File::Path qw( make_path );
+use Sort::Naturally;
+use List::MoreUtils qw( uniq all );
+
+use base qw( Exporter );
+our @EXPORT_OK = qw(
+  dump_as_table
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Constants
+
+# Types
+Readonly our $STRING => 1;
+Readonly our $INT    => 2;
+Readonly our $FLOAT  => 3;
+
+# Output formats
+Readonly our @FORMATS => qw( csv tsv html );
+
+=func dump_as_table
+
+  Usage       : DETCT::Misc::Output::dump_as_table( {
+                    analysis => $analysis,
+                    dir      => '.',
+                    regions  => $regions_hash_ref,
+                } );
+  Purpose     : Dump regions in tabular format
+  Returns     : undef
+  Parameters  : Hashref {
+                    analysis => DETCT::Analysis,
+                    dir      => String (the working directory),
+                    regions  => Arrayref (of regions),
+                }
+  Throws      : If analysis is missing
+                If regions are missing
+                If directory is missing
+  Comments    : None
+
+=cut
+
+sub dump_as_table {
+    my ($arg_ref) = @_;
+
+    confess 'No analysis specified'  if !defined $arg_ref->{analysis};
+    confess 'No directory specified' if !defined $arg_ref->{dir};
+    confess 'No regions specified'   if !defined $arg_ref->{regions};
+
+    # Get conditions and groups
+    my @samples = @{ $arg_ref->{analysis}->get_all_samples() };
+    my @conditions = uniq( nsort( map { $_->condition } @samples ) );
+    my @groups =
+      grep { defined $_ } uniq( nsort( map { $_->group } @samples ) );
+
+    # Get regions sorted by p value then location
+    my $regions = sort_regions( $arg_ref->{regions} );
+
+    # Get genebuild version
+    my $genebuild_version;
+    foreach my $region ( @{$regions} ) {
+        ## no critic (ProhibitMagicNumbers)
+        ($genebuild_version) = ( sort keys %{ $region->[15] } )[-1];   # Highest
+        ## use critic
+        last if $genebuild_version;
+    }
+
+    # Get definition for all columns (which determines formatting)
+    my $definition = get_definition( $arg_ref->{analysis}->ensembl_species,
+        $genebuild_version, \@samples, \@conditions, \@groups );
+
+    # Make sure working directory exists
+    if ( !-d $arg_ref->{dir} ) {
+        make_path( $arg_ref->{dir} );
+    }
+
+    # Open filehandles and begin all output files
+    my $fh = begin_output( $arg_ref->{dir}, $definition );
+
+    foreach my $region ( @{$regions} ) {
+        my @row;
+
+        # Region
+        my $seq_name = $region->[0];
+        my $start    = $region->[1];
+        my $end      = $region->[2];
+        push @row, [ $seq_name, [ $seq_name, $start, $end, $seq_name ] ];
+        push @row, [ $start,    [ $seq_name, $start, $end, $start ] ];
+        push @row, [ $end,      [ $seq_name, $start, $end, $end ] ];
+
+        # 3' end
+        ## no critic (ProhibitMagicNumbers)
+        my $tpe_seq_name   = $region->[5];
+        my $tpe_pos        = $region->[6];
+        my $tpe_strand     = $region->[7];
+        my $tpe_read_count = $region->[8];
+        ## use critic
+        push @row,
+          [ $tpe_pos, [ $tpe_seq_name, $tpe_pos, $tpe_pos, $tpe_pos ] ];
+        push @row, [$tpe_strand];
+        push @row, [$tpe_read_count];
+
+        # p values
+        ## no critic (ProhibitMagicNumbers)
+        my $pval = $region->[11];
+        my $padj = $region->[12];
+        ## use critic
+        push @row, [$pval];
+        push @row, [$padj];
+
+        # Gene details
+        ## no critic (ProhibitMagicNumbers)
+        my %gene = %{ $region->[15] };
+        my ($genebuild) = ( sort keys %gene )[-1];    # Highest
+        ## use critic
+        my @distance;
+        my ( @gene_stable_id, @gene_stable_id_to_link );
+        my @gene_biotype;
+        my ( @transcript_stable_id, @transcript_stable_id_to_link );
+        my @transcript_biotype;
+        my ( @name, @name_to_link );
+        my @description;
+
+        if ($genebuild) {
+            foreach my $gene ( @{ $gene{$genebuild} } ) {
+                my ( $gene_stable_id, $name, $description, $gene_biotype,
+                    $distance, $transcripts )
+                  = @{$gene};
+                push @distance,       $distance;
+                push @gene_stable_id, $gene_stable_id;
+                push @gene_stable_id_to_link,
+                  [ $gene_stable_id, $gene_stable_id ];
+                push @gene_biotype, $gene_biotype;
+                foreach my $transcript ( @{$transcripts} ) {
+                    my ( $transcript_stable_id, $transcript_biotype ) =
+                      @{$transcript};
+                    push @transcript_stable_id, $transcript_stable_id;
+                    push @transcript_stable_id_to_link,
+                      [ $transcript_stable_id, $transcript_stable_id ];
+                    push @transcript_biotype, $transcript_biotype;
+                }
+                push @name, $name;
+                push @name_to_link, [ $gene_stable_id, $name ];
+                push @description, $description;
+            }
+        }
+        push @row, [ \@distance ];
+        push @row, [ \@gene_stable_id, [@gene_stable_id_to_link] ];
+        push @row, [ \@gene_biotype ];
+        push @row, [ \@transcript_stable_id, [@transcript_stable_id_to_link] ];
+        push @row, [ \@transcript_biotype ];
+        push @row, [ \@name, [@name_to_link] ];
+        push @row, [ \@description ];
+
+        # Counts and normalised counts
+        ## no critic (ProhibitMagicNumbers)
+        my @counts            = map { [$_] } @{ $region->[9] };
+        my @normalised_counts = map { [$_] } @{ $region->[10] };
+        ## use critic
+        foreach my $count (@counts) {
+            push @row, [$count];
+        }
+        foreach my $normalised_count (@normalised_counts) {
+            push @row, [$normalised_count];
+        }
+
+        # Condition fold changes
+        if ( scalar @conditions == 2 ) {
+            ## no critic (ProhibitMagicNumbers)
+            my ( $condition_fold_change, $log2_condition_fold_change ) =
+              @{ $region->[13] };
+            ## use critic
+            push @row, [$log2_condition_fold_change];
+        }
+
+        # Group fold changes
+        if ( scalar @conditions == 2 && scalar @groups > 1 ) {
+            ## no critic (ProhibitMagicNumbers)
+            my @group_fold_changes = @{ $region->[14] };
+            ## use critic
+            foreach my $group_fold_change (@group_fold_changes) {
+                my (
+                    $condition_group_fold_change,
+                    $log2_condition_group_fold_change
+                ) = @{$group_fold_change};
+                push @row, [$log2_condition_group_fold_change];
+            }
+        }
+
+        # Dump row in all required formats
+        my @levels = qw( all );
+        if (   defined $padj
+            && $padj ne 'NA'
+            && $padj < $arg_ref->{analysis}->output_sig_level )
+        {
+            push @levels, 'sig';
+        }
+        dump_output( \@levels, $fh, $definition, \@row );
+    }
+
+    # End all output files and close filehandles
+    end_output($fh);
+
+    return;
+}
+
+=func sort_regions
+
+  Usage       : $regions = sort_regions( $regions );
+  Purpose     : Sort regions by p value then location
+  Returns     : Arrayref (of regions)
+  Parameters  : Arrayref of regions
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub sort_regions {
+    my ($regions) = @_;
+
+    # Separate regions with no p value from rest
+    my @regions_with_pval;
+    my @regions_no_pval;
+    foreach my $region ( @{$regions} ) {
+        ## no critic (ProhibitMagicNumbers)
+        if ( defined $region->[11] && $region->[11] ne 'NA' ) {
+            ## use critic
+            push @regions_with_pval, $region;
+        }
+        else {
+            push @regions_no_pval, $region;
+        }
+    }
+
+    # Sort by adjusted p value and p value then regions without p value
+    ## no critic (ProhibitMagicNumbers)
+    my @regions =
+      sort { $a->[12] <=> $b->[12] || $a->[11] <=> $b->[11] }
+      @regions_with_pval;
+    ## use critic
+    push @regions, @regions_no_pval;    # Sorted by location already
+
+    return \@regions;
+}
+
+=func get_definition
+
+  Usage       : $definition = get_definition($genebuild_version, $samples,
+                    $conditions, $groups);
+  Purpose     : Return the definitions for all columns of the table
+  Returns     : Arrayref (of column definitions)
+  Parameters  : String (Ensembl species)
+                String (genebuild version)
+                Arrayref of samples
+                Arrayref of conditions
+                Arrayref of groups
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_definition {
+    my ( $species, $genebuild_version, $samples, $conditions, $groups ) = @_;
+
+    # Ensembl links
+    my $loc_link =
+      $species
+      ? qq{<a href="http://www.ensembl.org/$species/psychic?q=%s:%d-%d" target="_blank">%s</a>}
+      : undef;
+    my $gene_link =
+      q{<a href="http://www.ensembl.org/id/%s" target="_blank">%s</a>};
+
+    my @def;
+
+    push @def, [ 'Chr',              $STRING, $loc_link, ];
+    push @def, [ 'Region start',     $INT,    $loc_link, ];
+    push @def, [ 'Region end',       $INT,    $loc_link, ];
+    push @def, [ q{3' end position}, $INT,    $loc_link, ];
+    push @def, [ q{3' end strand},   $INT, ];
+    push @def, [ q{3' end read count},   $INT, ];
+    push @def, [ 'p value',              $FLOAT, ];
+    push @def, [ 'Adjusted p value',     $FLOAT, ];
+    push @def, [ q{Distance to 3' end }, $INT, ];
+    push @def,
+      [ $genebuild_version . ' Ensembl Gene ID', $STRING, $gene_link, ];
+    push @def, [ 'Gene type', $STRING, ];
+    push @def,
+      [ $genebuild_version . ' Ensembl Transcript ID', $STRING, $gene_link, ];
+    push @def, [ 'Transcript type',  $STRING, ];
+    push @def, [ 'Gene name',        $STRING, $gene_link, ];
+    push @def, [ 'Gene description', $STRING, ];
+
+    foreach my $sample ( @{$samples} ) {
+        push @def, [ $sample->name . ' count', $INT ];
+    }
+    foreach my $sample ( @{$samples} ) {
+        push @def, [ $sample->name . ' normalised count', $FLOAT ];
+    }
+
+    if ( scalar @{$conditions} == 2 ) {
+        my $heading = sprintf 'Log2 fold change (%s/%s)', $conditions->[0],
+          $conditions->[1];
+        push @def, [ $heading, $FLOAT ];
+    }
+
+    if ( scalar @{$conditions} == 2 && scalar @{$groups} > 1 ) {
+        foreach my $group ( @{$groups} ) {
+            my $heading = sprintf 'Log2 fold change (%s/%s) for group %s',
+              $conditions->[0], $conditions->[1], $group;
+            push @def, [ $heading, $FLOAT ];
+        }
+    }
+
+    return \@def;
+}
+
+=func begin_output
+
+  Usage       : my $fh = begin_output( $dir, $defintion );
+  Purpose     : Open filehandles and begin all output files
+  Returns     : undef
+  Parameters  : String (the directory)
+                Arrayref (the definition)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub begin_output {
+    my ( $dir, $definition ) = @_;
+
+    my %fh;
+    foreach my $format (@FORMATS) {
+
+        # Level determines whether output all regions or just significant ones
+        foreach my $level (qw( all sig )) {
+            my $file = File::Spec->catfile( $dir, $level . q{.} . $format );
+            open my $fh, '>', $file;    ## no critic (RequireBriefOpen)
+            $fh{$format}{$level} = $fh;
+            my $begin_sub_name = 'begin_' . $format;
+            my $sub_ref        = \&{$begin_sub_name};
+            &{$sub_ref}( $fh, $definition );
+        }
+    }
+
+    return \%fh;
+}
+
+=func end_output
+
+  Usage       : end_output( $fh );
+  Purpose     : End all output files and close filehandles
+  Returns     : undef
+  Parameters  : Hashref (of filehandles)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end_output {
+    my ($fh) = @_;
+
+    foreach my $format (@FORMATS) {
+        foreach my $level (qw( all sig )) {
+            my $end_sub_name = 'end_' . $format;
+            my $sub_ref      = \&{$end_sub_name};
+            &{$sub_ref}( $fh->{$format}{$level} );
+            close $fh->{$format}{$level};
+        }
+    }
+
+    return;
+}
+
+=func dump_output
+
+  Usage       : dump_output( $levels, $fh, $definition, $row );
+  Purpose     : Dump row in all required formats at all required levels
+  Returns     : undef
+  Parameters  : Arrayref (of levels)
+                Hashref (of filehandles)
+                Arrayref (the definition)
+                Arrayref (of row data)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub dump_output {
+    my ( $levels, $fh, $definition, $row ) = @_;
+
+    foreach my $format (@FORMATS) {
+        foreach my $level ( @{$levels} ) {
+            my $dump_sub_name = 'dump_' . $format;
+            my $sub_ref       = \&{$dump_sub_name};
+            &{$sub_ref}( $fh->{$format}{$level}, $definition, $row );
+        }
+    }
+
+    return;
+}
+
+=func begin_csv
+
+  Usage       : begin_csv( $fh, $defintion );
+  Purpose     : Begin CSV table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the definition)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub begin_csv {
+    my ( $fh, $definition ) = @_;
+
+    my @headings;
+    foreach my $column ( @{$definition} ) {
+        my ($heading) = @{$column};
+        $heading =~ s/"/""/xmsg;
+        push @headings, q{"} . $heading . q{"};
+    }
+    print {$fh} ( join q{,}, @headings ), "\r\n";
+
+    return;
+}
+
+=func end_csv
+
+  Usage       : end_csv( $fh );
+  Purpose     : End CSV table
+  Returns     : undef
+  Parameters  : Filehandle
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end_csv {
+    return;
+}
+
+=func dump_csv
+
+  Usage       : dump_csv( $fh, $definition, $row );
+  Purpose     : Dump the data in a CSV table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the defintion)
+                Arrayref (the row data)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub dump_csv {
+    my ( $fh, $definition, $row ) = @_;
+
+    my @output_row;
+    my $i = 0;    # Index to definition
+    foreach my $cell ( @{$row} ) {
+        my $type = $definition->[$i]->[1];
+        my ($data) = @{$cell};
+
+        # Turn into a list of data, even if just one
+        if ( ref $data ne 'ARRAY' ) {
+            $data = [$data];
+        }
+
+        # Substitute default if undefined
+        my @output_cell;
+        foreach my $datum ( @{$data} ) {
+            $datum = defined $datum ? $datum : q{};
+            push @output_cell, $datum;
+        }
+
+        # Add default if necessary
+        if ( !@output_cell ) {
+            push @output_cell, q{};
+        }
+
+        my $output_cell = join q{,}, @output_cell;
+
+        # Strings and lists need quoting
+        if ( $type == $STRING || scalar @output_cell > 1 ) {
+            $output_cell =~ s/"/""/xmsg;
+            $output_cell = q{"} . $output_cell . q{"};
+        }
+
+        push @output_row, $output_cell;
+
+        $i++;
+    }
+    print {$fh} ( join q{,}, @output_row ), "\n";
+
+    return;
+}
+
+=func begin_tsv
+
+  Usage       : begin_tsv( $fh, $defintion );
+  Purpose     : Begin TSV table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the definition)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub begin_tsv {
+    my ( $fh, $definition ) = @_;
+
+    my @headings = map { $_->[0] } @{$definition};
+    print {$fh} q{#}, ( join "\t", @headings ), "\n";
+
+    return;
+}
+
+=func end_tsv
+
+  Usage       : end_tsv( $fh );
+  Purpose     : End TSV table
+  Returns     : undef
+  Parameters  : Filehandle
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end_tsv {
+    return;
+}
+
+=func dump_tsv
+
+  Usage       : dump_tsv( $fh, $definition, $row );
+  Purpose     : Dump the data in a TSV table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the defintion)
+                Arrayref (the row data)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub dump_tsv {
+    my ( $fh, $definition, $row ) = @_;
+
+    my @output_row;
+    my $i = 0;    # Index to definition
+    foreach my $cell ( @{$row} ) {
+        my $type = $definition->[$i]->[1];
+        my ($data) = @{$cell};
+
+        # Turn into a list of data, even if just one
+        if ( ref $data ne 'ARRAY' ) {
+            $data = [$data];
+        }
+
+        # Substitute default if undefined
+        my @output_cell;
+        foreach my $datum ( @{$data} ) {
+            $datum = defined $datum && length $datum > 0 ? $datum : q{-};
+            push @output_cell, $datum;
+        }
+
+        # Add default if necessary
+        if ( !@output_cell ) {
+            push @output_cell, q{-};
+        }
+
+        push @output_row, ( join q{,}, @output_cell );
+
+        $i++;
+    }
+    print {$fh} ( join "\t", @output_row ), "\n";
+
+    return;
+}
+
+=func begin_html
+
+  Usage       : begin_html( $fh, $defintion );
+  Purpose     : Begin HTML table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the definition)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub begin_html {
+    my ( $fh, $definition ) = @_;
+
+    print {$fh} <<'HTML';
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>DETCT</title>
+        <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.1/css/bootstrap-combined.min.css" rel="stylesheet">
+    </head>
+    <body>
+        <table class="table table-bordered table-hover table-condensed">
+            <thead>
+                <tr>
+HTML
+
+    foreach my $column ( @{$definition} ) {
+        my ($heading) = @{$column};
+        print {$fh} '<th>', $heading, '</th>', "\n";
+    }
+
+    print {$fh} <<'HTML';
+                </tr>
+            </thead>
+            <tbody>
+HTML
+
+    return;
+}
+
+=func end_html
+
+  Usage       : end_html( $fh );
+  Purpose     : End HTML table
+  Returns     : undef
+  Parameters  : Filehandle
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end_html {
+    my ($fh) = @_;
+
+    print {$fh} <<'HTML';
+            </tbody>
+        </table>
+    </body>
+</html>
+HTML
+
+    return;
+}
+
+=func dump_html
+
+  Usage       : dump_html( $fh, $definition, $row );
+  Purpose     : Dump the data in an HTML table
+  Returns     : undef
+  Parameters  : Filehandle
+                Arrayref (the defintion)
+                Arrayref (the row data)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub dump_html {
+    my ( $fh, $definition, $row ) = @_;
+
+    print {$fh} '<tr>', "\n";
+
+    my $i = 0;    # Index to definition
+    foreach my $cell ( @{$row} ) {
+        my ( undef, $type, $link ) = @{ $definition->[$i] };
+        my ( $data, $data_to_link ) = @{$cell};
+
+        # Turn into a list of data, even if just one
+        if ( ref $data ne 'ARRAY' ) {
+            $data         = [$data];
+            $data_to_link = [$data_to_link];
+        }
+
+        print {$fh} '<td>';
+
+        my @data;
+        my $j = 0;    # Index to each item when multiple items in one table cell
+        foreach my $datum ( @{$data} ) {
+            my $datum_to_link = $data_to_link->[$j];
+
+            $datum = defined $datum ? $datum : q{};
+
+            # Make a link if there's a link and all data for the link is defined
+            if ( $link && $datum_to_link && all { defined $_ }
+                @{$datum_to_link} )
+            {
+                $datum = sprintf $link, @{$datum_to_link};
+            }
+
+            push @data, $datum;
+
+            $j++;
+        }
+
+        print {$fh} join '<br />', @data;
+
+        print {$fh} '</td>', "\n";
+
+        $i++;
+    }
+
+    print {$fh} '</tr>', "\n";
+
+    return;
+}
+
+1;
diff --git a/lib/DETCT/Misc/PeakHMM.pm b/lib/DETCT/Misc/PeakHMM.pm
new file mode 100644
index 0000000..24d3ed9
--- /dev/null
+++ b/lib/DETCT/Misc/PeakHMM.pm
@@ -0,0 +1,520 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Misc::PeakHMM;
+## use critic
+
+# ABSTRACT: Miscellaneous functions for running peaks HMM
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-10-29
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use English qw( -no_match_vars );
+use POSIX qw( WIFEXITED);
+use File::Slurp;
+use File::Spec;
+use File::Path qw( make_path );
+use Memoize qw( memoize flush_cache );
+
+use base qw( Exporter );
+our @EXPORT_OK = qw(
+  merge_read_peaks
+  summarise_read_peaks
+  run_peak_hmm
+  join_hmm_bins
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+=func merge_read_peaks
+
+  Usage       : my $peaks_ref = DETCT::Misc::PeakHMM::merge_read_peaks( {
+                    peak_buffer_width => 100,
+                    seq_name          => '1',
+                    peaks             => $peaks_ary_ref,
+                } );
+  Purpose     : Merge read peaks (overlapping reads)
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (peak start),
+                            Int (peak end),
+                            Int (peak read count)
+                        ],
+                        ... (peaks)
+                    ]
+                }
+  Parameters  : Hashref {
+                    peak_buffer_width => Int (the peak buffer size),
+                    seq_name          => String (the sequence name),
+                    peaks             => Arrayref (of peaks),
+                }
+  Throws      : If peak buffer width is missing
+                If sequence name is missing
+                If peaks are missing
+  Comments    : None
+
+=cut
+
+sub merge_read_peaks {
+    my ($arg_ref) = @_;
+
+    confess 'No peak buffer width specified'
+      if !defined $arg_ref->{peak_buffer_width};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No peaks specified'         if !defined $arg_ref->{peaks};
+
+    my $peaks_ref = $arg_ref->{peaks};
+
+    # Sort peaks by start then end
+    @{$peaks_ref} =
+      sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @{$peaks_ref};
+
+    # Peak variables
+    my @merged_peaks;
+    my $current_merged_peak_read_count;
+    my $current_merged_peak_start;
+    my $current_merged_peak_end;
+
+    # Merge peaks
+    foreach my $peak ( @{$peaks_ref} ) {
+        my ( $peak_start, $peak_end, $peak_read_count ) = @{$peak};
+
+        # We're starting the first merged peak
+        if ( !defined $current_merged_peak_start ) {
+            $current_merged_peak_start      = $peak_start;
+            $current_merged_peak_end        = $peak_end;
+            $current_merged_peak_read_count = $peak_read_count;
+            next;
+        }
+
+        # Extend or finish current merged peak?
+        if ( $peak_start - $current_merged_peak_end <
+            $arg_ref->{peak_buffer_width} )
+        {
+            # Extend current merged peak
+            $current_merged_peak_end = $peak_end;
+            $current_merged_peak_read_count += $peak_read_count;
+        }
+        else {
+            # Finish current merged peak
+            push @merged_peaks,
+              [
+                $current_merged_peak_start, $current_merged_peak_end,
+                $current_merged_peak_read_count
+              ];
+
+            # Start new merged peak
+            $current_merged_peak_start      = $peak_start;
+            $current_merged_peak_end        = $peak_end;
+            $current_merged_peak_read_count = $peak_read_count;
+        }
+    }
+
+    # Finish last merged peak
+    if ($current_merged_peak_read_count) {
+        push @merged_peaks,
+          [
+            $current_merged_peak_start, $current_merged_peak_end,
+            $current_merged_peak_read_count
+          ];
+    }
+
+    return { $arg_ref->{seq_name} => \@merged_peaks };
+}
+
+=func summarise_read_peaks
+
+  Usage       : my $summary_ref = DETCT::Misc::PeakHMM::summarise_read_peaks( {
+                    bin_size          => 100,
+                    peak_buffer_width => 100,
+                    hmm_sig_level     => 0.001,
+                    seq_name          => '1',
+                    seq_bp            => 10_000_000,
+                    read_length       => 54,
+                    peaks             => $peaks_ary_ref,
+                } );
+  Purpose     : Summarise read peak distribution for HMM
+  Returns     : Hashref {
+                    String (sequence name) => Hashref {
+                        total_read_count_per_mb     => Float,
+                        total_sig_read_count_per_mb => Float,
+                        total_sig_peak_width_in_mb  => Float,
+                        median_sig_peak_width       => Int,
+                        total_sig_peaks             => Int,
+                        peak_buffer_width           => Int,
+                        read_threshold              => Int,
+                        bin_size                    => Int,
+                        num_bins                    => Int,
+                    }
+                }
+  Parameters  : Hashref {
+                    bin_size          => Int (the bin size),
+                    peak_buffer_width => Int (the peak buffer size),
+                    hmm_sig_level     => Float (the HMM significance level),
+                    seq_name          => String (the sequence name),
+                    seq_bp            => Int (the sequence bp),
+                    read_length       => Int (the read length),
+                    peaks             => Arrayref (of peaks),
+                }
+  Throws      : If bin size is missing
+                If peak buffer width is missing
+                If HMM significance level is missing
+                If sequence name is missing
+                If sequence bp is missing
+                if read length is missing
+                If peaks are missing
+  Comments    : Source of logic is summary.pl from
+                http://www.sph.umich.edu/csg/qin/HPeak/
+
+=cut
+
+sub summarise_read_peaks {
+    my ($arg_ref) = @_;
+
+    confess 'No bin size specified' if !defined $arg_ref->{bin_size};
+    confess 'No peak buffer width specified'
+      if !defined $arg_ref->{peak_buffer_width};
+    confess 'No HMM significance level specified'
+      if !defined $arg_ref->{hmm_sig_level};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No sequence bp specified'   if !defined $arg_ref->{seq_bp};
+    confess 'No read length specified'   if !defined $arg_ref->{read_length};
+    confess 'No peaks specified'         if !defined $arg_ref->{peaks};
+
+    my $total_peaks = scalar @{ $arg_ref->{peaks} };
+
+    if ( !$total_peaks ) {
+
+        # No peaks so won't be running HMM
+        return { $arg_ref->{seq_name} => {} };
+    }
+
+    # Get total read count
+    my $total_read_count = 0;
+    foreach my $peak ( @{ $arg_ref->{peaks} } ) {
+        my ( $start, $end, $read_count ) = @{$peak};
+        $total_read_count += $read_count;
+    }
+
+    # Get avg reads/bp
+    my $avg_reads_per_bp = $total_read_count / $arg_ref->{seq_bp};
+
+    # Identify significant peaks
+    memoize('_calc_log_sum');
+    my @sig_peak_widths;
+    my $total_sig_read_count = 0;
+    my $total_sig_peak_width = 0;
+    foreach my $peak ( @{ $arg_ref->{peaks} } ) {
+        my ( $start, $end, $read_count ) = @{$peak};
+        my $width         = $end - $start + 1;
+        my $avg_reads     = $avg_reads_per_bp * $width;
+        my $log_avg_reads = log $avg_reads;
+        my $exp_avg_reads = exp $avg_reads;
+
+        # Gather info for significant peaks
+        my $sum = 1;
+        my $i   = 1;
+        while ( $i < $read_count ) {
+            $sum += exp _calc_log_sum( $i, $log_avg_reads );
+            last if $sum >= $exp_avg_reads;
+            $i++;
+        }
+        my $prob = 1 - exp( -$avg_reads ) * $sum;
+        if ( $prob < $arg_ref->{hmm_sig_level} / $total_peaks ) {
+            push @sig_peak_widths, $width;
+            $total_sig_read_count += $read_count;
+            $total_sig_peak_width += $width;
+        }
+
+        # Expire Memoize cache for each peak
+        flush_cache('_calc_log_sum');
+    }
+
+    # Calculate hit threshold
+    my $proportion_bp_in_peaks =
+      $total_read_count * $arg_ref->{read_length} / $arg_ref->{seq_bp};
+    my $read_threshold = 0;
+    my $prob           = 1;
+    my $sum            = 1;
+    while ( $prob > $arg_ref->{hmm_sig_level} / $total_peaks ) {
+        $read_threshold++;
+        my $log_sum = 0;
+        foreach my $i ( 1 .. $read_threshold ) {
+            $log_sum += log($proportion_bp_in_peaks) - log $i;
+        }
+        $sum += exp $log_sum;
+        $prob = 1 - exp( -$proportion_bp_in_peaks ) * $sum;
+    }
+
+    # Sort widths and get median
+    my $total_sig_peaks = scalar @sig_peak_widths;
+    @sig_peak_widths = sort { $a <=> $b } @sig_peak_widths;
+    my $median_sig_peak_width = $sig_peak_widths[ int( $total_sig_peaks / 2 ) ];
+
+    ## no critic (ProhibitMagicNumbers)
+    my $num_bins = int( $arg_ref->{seq_bp} / $arg_ref->{bin_size} + 0.5 );
+    ## use critic
+
+    ## no critic (ProhibitMagicNumbers)
+    my %summary = (
+        total_read_count_per_mb     => $total_read_count / 1_000_000,
+        total_sig_read_count_per_mb => $total_sig_read_count / 1_000_000,
+        total_sig_peak_width_in_mb  => $total_sig_peak_width / 1_000_000,
+        median_sig_peak_width       => $median_sig_peak_width || 0,
+        total_sig_peaks             => $total_sig_peaks,
+        peak_buffer_width           => $arg_ref->{peak_buffer_width},
+        read_threshold              => $read_threshold,
+        bin_size                    => $arg_ref->{bin_size},
+        num_bins                    => $num_bins,
+    );
+    ## use critic
+
+    return { $arg_ref->{seq_name} => \%summary };
+}
+
+# Calculate log sum
+sub _calc_log_sum {
+    my ( $i, $log ) = @_;
+
+    if ( $i == 0 ) {
+        return 0;
+    }
+    else {
+        return $log - log($i) + _calc_log_sum( $i - 1, $log );
+    }
+}
+
+=func run_peak_hmm
+
+  Usage       : my $hmm_ref = DETCT::Misc::PeakHMM::run_peak_hmm( {
+                    dir           => '.',
+                    hmm_sig_level => 0.001,
+                    seq_name      => '1',
+                    read_bins     => $read_bins_hash_ref,
+                    summary       => $summary_hash_ref,
+                    hmm_binary    => 'bin/quince_chiphmmnew',
+                } );
+  Purpose     : Run peak HMM
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (bin),
+                            Int (read count),
+                            Float (log probability),
+                        ],
+                        ... (peaks)
+                }
+  Parameters  : Hashref {
+                    dir           => String (the working directory),
+                    hmm_sig_level => Float (the HMM significance level),
+                    seq_name      => String (the sequence name),
+                    read_bins     => Hashref (of read bins),
+                    summary       => Hashref (of summary),
+                    hmm_binary    => String (the HMM binary)
+                }
+  Throws      : If directory is missing
+                If HMM significance level is missing
+                If sequence name is missing
+                If read bins are missing
+                If summary is missing
+                If HMM binary is missing
+                If command line can't be run
+  Comments    : None
+
+=cut
+
+sub run_peak_hmm {
+    my ($arg_ref) = @_;
+
+    confess 'No directory specified' if !defined $arg_ref->{dir};
+    confess 'No HMM significance level specified'
+      if !defined $arg_ref->{hmm_sig_level};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No read bins specified'     if !defined $arg_ref->{read_bins};
+    confess 'No summary specified'       if !defined $arg_ref->{summary};
+    confess 'No HMM binary specified'    if !defined $arg_ref->{hmm_binary};
+
+    if ( !scalar keys %{ $arg_ref->{summary} } ) {
+
+        # No summary (i.e. no peaks), so won't run HMM
+        return { $arg_ref->{seq_name} => [] };
+    }
+
+    # Make sure working directory exists
+    if ( !-d $arg_ref->{dir} ) {
+        make_path( $arg_ref->{dir} );
+    }
+
+    # Sanitise sequence name for using in filenames
+    my $safe_seq_name = $arg_ref->{seq_name};
+    $safe_seq_name =~ s/\W+//xmsg;
+
+    # Write read bins to file
+    my $bin_file =
+      File::Spec->catfile( $arg_ref->{dir}, $safe_seq_name . '.bins' );
+    open my $bin_fh, '>', $bin_file;
+    foreach my $bin ( sort { $a <=> $b } keys %{ $arg_ref->{read_bins} } ) {
+        print {$bin_fh} $bin, "\t", $arg_ref->{read_bins}->{$bin}, "\n";
+    }
+    close $bin_fh;
+
+    # Write summary to file
+    my $sum_file =
+      File::Spec->catfile( $arg_ref->{dir}, $safe_seq_name . '.params' );
+    ## no critic (RequireBriefOpen)
+    open my $sum_fh, '>', $sum_file;
+    print {$sum_fh} $arg_ref->{summary}->{total_read_count_per_mb},     "\n";
+    print {$sum_fh} $arg_ref->{summary}->{total_sig_read_count_per_mb}, "\n";
+    print {$sum_fh} $arg_ref->{summary}->{total_sig_peak_width_in_mb},  "\n";
+    print {$sum_fh} $arg_ref->{summary}->{median_sig_peak_width},       "\n";
+    print {$sum_fh} $arg_ref->{summary}->{total_sig_peaks},             "\n";
+    print {$sum_fh} $arg_ref->{summary}->{peak_buffer_width},           "\n";
+    print {$sum_fh} $arg_ref->{summary}->{read_threshold},              "\n";
+    print {$sum_fh} $arg_ref->{summary}->{bin_size},                    "\n";
+    print {$sum_fh} $arg_ref->{summary}->{num_bins},                    "\n";
+    close $sum_fh;
+    ## use critic
+
+    my $hmm_file =
+      File::Spec->catfile( $arg_ref->{dir}, $safe_seq_name . '.hmm' );
+    my $stdout_file =
+      File::Spec->catfile( $arg_ref->{dir}, $safe_seq_name . '.o' );
+    my $stderr_file =
+      File::Spec->catfile( $arg_ref->{dir}, $safe_seq_name . '.e' );
+
+    my $cmd = join q{ }, $arg_ref->{hmm_binary}, $bin_file, $sum_file,
+      $hmm_file;
+    $cmd .= ' 1>' . $stdout_file;
+    $cmd .= ' 2>' . $stderr_file;
+    WIFEXITED( system $cmd) or confess "Couldn't run $cmd ($OS_ERROR)";
+
+    # Reformat output into array of arrayrefs
+    my $log_hmm_sig_level = log $arg_ref->{hmm_sig_level};
+    my @hmm_output        = ();
+    if ( -r $hmm_file ) {    # Peak HMM can fail
+        foreach my $line ( read_file($hmm_file) ) {
+            chomp $line;
+            my ( $bin, undef, undef, $read_count, $log_prob ) = split /\t/xms,
+              $line;
+            next if $log_prob >= $log_hmm_sig_level;
+            push @hmm_output, [ $bin, $read_count, $log_prob ];
+        }
+    }
+
+    return { $arg_ref->{seq_name} => \@hmm_output };
+}
+
+=func join_hmm_bins
+
+  Usage       : my $regions_ref = DETCT::Misc::PeakHMM::join_hmm_bins( {
+                    bin_size => 100,
+                    seq_name => '1',
+                    hmm_bins => $hmm_bins_ary_ref,
+                } );
+  Purpose     : Join reads bins output by peak HMM into regions
+  Returns     : Hashref {
+                    String (sequence name) => Arrayref [
+                        Arrayref [
+                            Int (region start),
+                            Int (region end),
+                            Int (region maximum read count),
+                            Float (region log probability sum),
+                        ],
+                        ... (regions)
+                }
+  Parameters  : Hashref {
+                    bin_size => Int (the bin size),
+                    seq_name => String (the sequence name),
+                    hmm_bins => Arrayref (of HMM bins),
+                }
+  Throws      : If bin size is missing
+                If sequence name is missing
+                If HMM bins are missing
+  Comments    : None
+
+=cut
+
+sub join_hmm_bins {
+    my ($arg_ref) = @_;
+
+    confess 'No bin size specified'      if !defined $arg_ref->{bin_size};
+    confess 'No sequence name specified' if !defined $arg_ref->{seq_name};
+    confess 'No HMM bins specified'      if !defined $arg_ref->{hmm_bins};
+
+    my @regions;
+
+    # Region variables (where a region is a set of merged bins)
+    my $region_bin_start;
+    my $region_bin_end;
+    my $region_max_read_count;
+    my $region_log_prob_sum;
+
+    foreach my $hmm_bin ( @{ $arg_ref->{hmm_bins} } ) {
+        my ( $bin, $read_count, $log_prob ) = @{$hmm_bin};
+
+        # We're starting the first region
+        if ( !defined $region_bin_start ) {
+            $region_bin_start      = $bin;
+            $region_bin_end        = $bin;
+            $region_max_read_count = $read_count;
+            $region_log_prob_sum   = $log_prob;
+            next;
+        }
+
+        # Extend or finish current region?
+        if ( $bin == $region_bin_end + 1 ) {
+
+            # Next bin, so extend current region
+            $region_bin_end = $bin;
+            if ( $read_count > $region_max_read_count ) {
+                $region_max_read_count = $read_count;
+            }
+            $region_log_prob_sum += $log_prob;
+        }
+        else {
+            # Finish current region and convert to genomic coordinates
+            push @regions,
+              [
+                $region_bin_start * $arg_ref->{bin_size} + 1,
+                ( $region_bin_end + 1 ) * $arg_ref->{bin_size},
+                $region_max_read_count,
+                $region_log_prob_sum,
+              ];
+
+            # Start new region
+            $region_bin_start      = $bin;
+            $region_bin_end        = $bin;
+            $region_max_read_count = $read_count;
+            $region_log_prob_sum   = $log_prob;
+        }
+    }
+
+    # Finish last region
+    if ( defined $region_bin_start ) {
+        push @regions,
+          [
+            $region_bin_start * $arg_ref->{bin_size} + 1,
+            ( $region_bin_end + 1 ) * $arg_ref->{bin_size},
+            $region_max_read_count,
+            $region_log_prob_sum,
+          ];
+    }
+
+    return { $arg_ref->{seq_name} => \@regions };
+}
+
+1;
diff --git a/lib/DETCT/Misc/R.pm b/lib/DETCT/Misc/R.pm
new file mode 100644
index 0000000..64b5cea
--- /dev/null
+++ b/lib/DETCT/Misc/R.pm
@@ -0,0 +1,271 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Misc::R;
+## use critic
+
+# ABSTRACT: Miscellaneous functions for running R
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-11-21
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use English qw( -no_match_vars );
+use POSIX qw( WIFEXITED);
+use File::Slurp;
+use File::Spec;
+use File::Path qw( make_path );
+use Sort::Naturally;
+use List::Util qw( sum );
+use List::MoreUtils qw( uniq );
+
+use base qw( Exporter );
+our @EXPORT_OK = qw(
+  run_deseq
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+=func run_deseq
+
+  Usage       : my $regions_ref = DETCT::Misc::R::run_deseq( {
+                    dir          => '.',
+                    regions      => $regions_hash_ref,
+                    samples      => $samples_ary_ref,
+                    r_binary     => 'R',
+                    deseq_script => 'bin/run_deseq.R',
+                } );
+  Purpose     : Run DESeq
+  Returns     : Arrayref [
+                    Arrayref [
+                        String (region sequence name),
+                        Int (region start),
+                        Int (region end),
+                        Int (region maximum read count),
+                        Float (region log probability sum),
+                        String (3' end sequence name) or undef,
+                        Int (3' end position) or undef,
+                        Int (3' end strand) or undef,
+                        Int (3' end read count) or undef,
+                        Arrayref [
+                            Int (count)
+                            ...
+                        ],
+                        Arrayref [
+                            Int (normalised count)
+                            ...
+                        ],
+                        Int (p value) or undef,
+                        Int (adjusted p value) or undef,
+                        Arrayref [
+                            Int (condition fold change) or undef,
+                            Int (log2 condition fold change) or undef,
+                        ],
+                        Arrayref [
+                            Arrayref [
+                                Int (group fold change) or undef,
+                                Int (log2 group fold change) or undef,
+                            ],
+                            ... (groups)
+                        ]
+                    ],
+                    ... (regions)
+                }
+  Parameters  : Hashref {
+                    dir          => String (the working directory),
+                    regions      => Hashref (of arrayrefs of regions),
+                    samples      => Arrayref (of samples)
+                    r_binary     => String (the R binary),
+                    deseq_script => String (the DESeq script),
+                }
+  Throws      : If directory is missing
+                If regions are missing
+                If samples are missing
+                If R binary is missing
+                If DESeq script is missing
+                If command line can't be run
+  Comments    : None
+
+=cut
+
+sub run_deseq {
+    my ($arg_ref) = @_;
+
+    confess 'No directory specified'    if !defined $arg_ref->{dir};
+    confess 'No regions specified'      if !defined $arg_ref->{regions};
+    confess 'No samples specified'      if !defined $arg_ref->{samples};
+    confess 'No R binary specified'     if !defined $arg_ref->{r_binary};
+    confess 'No DESeq script specified' if !defined $arg_ref->{deseq_script};
+
+    # Get conditions and groups
+    my @samples    = @{ $arg_ref->{samples} };
+    my @conditions = uniq( nsort( map { $_->condition } @samples ) );
+    my @groups     = uniq( nsort( map { $_->group } @samples ) );
+    @groups = grep { defined $_ } @groups;
+
+    # Make sure working directory exists
+    if ( !-d $arg_ref->{dir} ) {
+        make_path( $arg_ref->{dir} );
+    }
+
+    # Write regions to input file
+    my $input_file = File::Spec->catfile( $arg_ref->{dir}, 'input.txt' );
+    my @sample_names = map { $_->name } @samples;
+    open my $input_fh, '>', $input_file;
+    print {$input_fh} ( join "\t", q{}, @sample_names ), "\n";
+    foreach my $seq_name ( nsort( keys %{ $arg_ref->{regions} } ) ) {
+        foreach my $region ( @{ $arg_ref->{regions}->{$seq_name} } ) {
+            my $counts = $region->[-1];
+            my $region_text = join q{:}, $seq_name, $region->[0], $region->[1];
+            print {$input_fh} ( join "\t", $region_text, @{$counts} ), "\n";
+        }
+    }
+    close $input_fh;
+
+    # Write samples to input file
+    my $samples_file = File::Spec->catfile( $arg_ref->{dir}, 'samples.txt' );
+    my $last_col_to_print = @groups > 1 ? 2 : 1;
+    my @header = ( q{}, 'condition', 'group' )[ 0 .. $last_col_to_print ];
+    open my $samples_fh, '>', $samples_file;
+    print {$samples_fh} ( join "\t", @header ), "\n";
+    foreach my $sample (@samples) {
+        my @row =
+          ( $sample->name, $sample->condition, $sample->group )
+          [ 0 .. $last_col_to_print ];
+        print {$samples_fh} ( join "\t", @row ), "\n";
+    }
+    close $samples_fh;
+
+    my $output_file = File::Spec->catfile( $arg_ref->{dir}, 'output.txt' );
+    my $size_factors_file =
+      File::Spec->catfile( $arg_ref->{dir}, 'size_factors.txt' );
+    my $qc_pdf_file = File::Spec->catfile( $arg_ref->{dir}, 'qc.pdf' );
+    my $stdout_file = File::Spec->catfile( $arg_ref->{dir}, 'deseq.o' );
+    my $stderr_file = File::Spec->catfile( $arg_ref->{dir}, 'deseq.e' );
+
+    my $cmd = join q{ }, $arg_ref->{r_binary}, '--slave', '--args',
+      $input_file, $samples_file, $output_file, $size_factors_file,
+      $qc_pdf_file, '<', $arg_ref->{deseq_script};
+    $cmd .= ' 1>' . $stdout_file;
+    $cmd .= ' 2>' . $stderr_file;
+    WIFEXITED( system $cmd) or confess "Couldn't run $cmd ($OS_ERROR)";
+
+    # Get size factors for each sample
+    my @size_factors = read_file($size_factors_file);
+    chomp @size_factors;
+
+    # Get output
+    my %pval_for;
+    my %padj_for;
+    foreach my $line ( read_file($output_file) ) {
+        chomp $line;
+        my ( $region_text, $pval, $padj ) = split /\t/xms, $line;
+        $pval_for{$region_text} = $pval;
+        $padj_for{$region_text} = $padj;
+    }
+
+    # Reformat output into array of arrayrefs
+    my @output;
+    foreach my $seq_name ( nsort( keys %{ $arg_ref->{regions} } ) ) {
+        foreach my $region ( @{ $arg_ref->{regions}->{$seq_name} } ) {
+            my $region_text = join q{:}, $seq_name, $region->[0], $region->[1];
+            my $counts = $region->[-1];
+
+            # Add sequence name to region
+            unshift @{$region}, $seq_name;
+
+            # Normalise counts and store for fold change calculation
+            my @normalised_counts;
+            my %counts_for_condition;
+            my %counts_for_group_condition;
+            my $sample_index = 0;
+            foreach my $sample (@samples) {
+                my $normalised_count =
+                  $counts->[$sample_index] / $size_factors[$sample_index];
+                push @normalised_counts, $normalised_count;
+                push @{ $counts_for_condition{ $sample->condition } },
+                  $normalised_count;
+                push @{ $counts_for_group_condition{ $sample->group }
+                      { $sample->condition } }, $normalised_count;
+                $sample_index++;
+            }
+            push @{$region}, \@normalised_counts;
+
+            # Add p value and adjusted p value
+            push @{$region}, $pval_for{$region_text}, $padj_for{$region_text};
+
+            # Calculate fold change if two conditions
+            my $fold_change;
+            my $log2_fold_change;
+            if ( scalar @conditions == 2 ) {
+                ( $fold_change, $log2_fold_change ) = calc_fold_change(
+                    $counts_for_condition{ $conditions[0] },
+                    $counts_for_condition{ $conditions[1] }
+                );
+            }
+            push @{$region}, [ $fold_change, $log2_fold_change ];
+
+            # Calculate fold change for each group if two conditions
+            my @group_fold_changes;
+            if ( scalar @conditions == 2 && scalar @groups > 1 ) {
+                foreach my $group (@groups) {
+                    my ( $group_fold_change, $group_log2_fold_change ) =
+                      calc_fold_change(
+                        $counts_for_group_condition{$group}{ $conditions[0] },
+                        $counts_for_group_condition{$group}{ $conditions[1] }
+                      );
+                    push @group_fold_changes,
+                      [ $group_fold_change, $group_log2_fold_change ];
+                }
+            }
+            push @{$region}, \@group_fold_changes;
+
+            push @output, $region;
+        }
+    }
+
+    return \@output;
+}
+
+=func calc_fold_change
+
+  Usage       : ($fold_change, $log2_fold_change)
+                    = calc_fold_change(\@array1, \@array2);
+  Purpose     : Calculate the fold change in mean value of two arrays
+  Returns     : Int (fold change)
+                Int (log2 fold change)
+  Parameters  : Arrayref
+                Arrayref
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub calc_fold_change {
+    my ( $array1_ref, $array2_ref ) = @_;
+
+    my $fold_change;
+    my $log2_fold_change;
+    my $mean1 = sum( @{$array1_ref} ) / scalar @{$array1_ref};
+    my $mean2 = sum( @{$array2_ref} ) / scalar @{$array2_ref};
+    if ( $mean1 && $mean2 ) {
+        $fold_change      = $mean1 / $mean2;             # e.g. mutant / sibling
+        $log2_fold_change = log($fold_change) / log 2;
+    }
+
+    return $fold_change, $log2_fold_change;
+}
+
+1;
diff --git a/lib/DETCT/Misc/Tag.pm b/lib/DETCT/Misc/Tag.pm
new file mode 100644
index 0000000..4c507e2
--- /dev/null
+++ b/lib/DETCT/Misc/Tag.pm
@@ -0,0 +1,269 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Misc::Tag;
+## use critic
+
+# ABSTRACT: Miscellaneous functions for interacting with DETCT read tags
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-07
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use base qw( Exporter );
+our @EXPORT_OK = qw(
+  detag_trim_fastq
+  convert_tag_to_regexp
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+=func detag_trim_fastq
+
+  Usage       : DETCT::Misc::Tag::detag_trim_fastq( {
+                    fastq_read1_input     => $fastq_read1_input,
+                    fastq_read2_input     => $fastq_read2_input,
+                    fastq_output_prefix   => $fastq_output_prefix,
+                    pre_detag_trim_length => $pre_detag_trim_length,
+                    polyt_trim_length     => $polyt_trim_length,
+                    polyt_min_length      => $polyt_min_length,
+                    read_tags             => \@read_tags,
+                } );
+  Purpose     : Detag and trim FASTQ files
+  Returns     : undef
+  Parameters  : Hashref {
+                    fastq_read1_input     => String (read 1 FASTQ file),
+                    fastq_read2_input     => String (read 2 FASTQ file),
+                    fastq_output_prefix   => String (prefix for output FASTQs),
+                    pre_detag_trim_length => Int (length to trim reads to),
+                    polyt_trim_length     => Int (polyT length to be trimmed),
+                    polyt_min_length      => Int (min Ts to define polyT),
+                    read_tags             => Arrayref (of read tags),
+                    no_pair_suffix        => Boolean or undef,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub detag_trim_fastq {
+    my ($arg_ref) = @_;
+
+    # Assume all tags are same length
+    my $tag_length = length $arg_ref->{read_tags}[0];
+
+    my $min_polyt = q{T} x $arg_ref->{polyt_min_length};
+    my $polyt_re = qr/$min_polyt/xms;    # Regexp for polyT matching
+
+    my $pre_detag_trim_length = $arg_ref->{pre_detag_trim_length};
+    my $polyt_trim_length     = $arg_ref->{polyt_trim_length};
+
+    # Convert tags to regular expressions
+    my @read_tags  = @{ $arg_ref->{read_tags} };
+    my %re_tag_for = convert_tag_to_regexp(@read_tags);
+
+    ## no critic (RequireBriefOpen)
+    open my $fh1_in, '<', $arg_ref->{fastq_read1_input};
+    open my $fh2_in, '<', $arg_ref->{fastq_read2_input};
+    ## use critic
+    my $fh_out_for = _open_output_fhs( $arg_ref->{fastq_output_prefix},
+        $tag_length, @read_tags );
+
+    while ( my $read1_id = <$fh1_in> ) {
+        my $read2_id   = <$fh2_in>;
+        my $read1_seq  = <$fh1_in>;
+        my $read2_seq  = <$fh2_in>;
+        my $read1_plus = <$fh1_in>;
+        my $read2_plus = <$fh2_in>;
+        my $read1_qual = <$fh1_in>;
+        my $read2_qual = <$fh2_in>;
+
+        chomp $read1_id;
+        chomp $read2_id;
+        chomp $read1_seq;
+        chomp $read2_seq;
+        chomp $read1_plus;
+        chomp $read2_plus;
+        chomp $read1_qual;
+        chomp $read2_qual;
+
+        # Do we need to add pair suffix to read IDs?
+        if ( $arg_ref->{no_pair_suffix} ) {
+            $read1_id .= '/1';
+            $read2_id .= '/2';
+        }
+
+        # Remove /1 or /2 from read ids and then check they match
+        my $read1_id_no_suffix = $read1_id;
+        my $read2_id_no_suffix = $read2_id;
+        ## no critic (ProhibitMagicNumbers)
+        substr $read1_id_no_suffix, -2, 2, q{};
+        substr $read2_id_no_suffix, -2, 2, q{};
+        ## use critic
+        if ( $read1_id_no_suffix ne $read2_id_no_suffix ) {
+            confess 'Read order does not match in input '
+              . "($read1_id_no_suffix does not match $read2_id_no_suffix)";
+        }
+
+        # Trim reads to specified length if necessary
+        if ( length $read1_seq > $pre_detag_trim_length ) {
+            $read1_seq  = substr $read1_seq,  0, $pre_detag_trim_length;
+            $read2_seq  = substr $read2_seq,  0, $pre_detag_trim_length;
+            $read1_qual = substr $read1_qual, 0, $pre_detag_trim_length;
+            $read2_qual = substr $read2_qual, 0, $pre_detag_trim_length;
+        }
+
+        # Get tag and putative polyT from read 1
+        my $tag_in_read = substr $read1_seq, 0, $tag_length;
+        my $polyt_seq = substr $read1_seq, $tag_length, $polyt_trim_length;
+
+        # Default tag to add to id if no match
+        my $tag_for_id = q{X} x $tag_length;
+        my $tag_found  = q{X} x $tag_length;
+
+        # Make sure a tag matches and polyT is present
+      TAG: foreach my $tag ( sort keys %re_tag_for ) {
+            my $regexps = $re_tag_for{$tag};
+            foreach my $re ( @{$regexps} ) {
+                if ( $tag_in_read =~ $re && $polyt_seq =~ $polyt_re ) {
+                    $tag_for_id = $tag_in_read;
+                    $tag_found  = $tag;
+                    substr $read1_seq, 0, $tag_length + $polyt_trim_length, q{};
+                    substr $read1_qual, 0, $tag_length + $polyt_trim_length,
+                      q{};
+                    last TAG;    # Skip rest if got a match
+                }
+            }
+        }
+
+        # Add tag to id
+        $read1_id =~ s{ /1 \z}{#$tag_for_id/1}xms;
+        $read2_id =~ s{ /2 \z}{#$tag_for_id/2}xms;
+
+        print { $fh_out_for->{$tag_found}->{1} } $read1_id,   "\n";
+        print { $fh_out_for->{$tag_found}->{1} } $read1_seq,  "\n";
+        print { $fh_out_for->{$tag_found}->{1} } $read1_plus, "\n";
+        print { $fh_out_for->{$tag_found}->{1} } $read1_qual, "\n";
+        print { $fh_out_for->{$tag_found}->{2} } $read2_id,   "\n";
+        print { $fh_out_for->{$tag_found}->{2} } $read2_seq,  "\n";
+        print { $fh_out_for->{$tag_found}->{2} } $read2_plus, "\n";
+        print { $fh_out_for->{$tag_found}->{2} } $read2_qual, "\n";
+    }
+
+    close $fh1_in;
+    close $fh2_in;
+    _close_output_fhs($fh_out_for);
+
+    return;
+}
+
+# Usage       : my $fh_out_for = _open_output_fhs(
+#                   $fastq_output_prefix, $tag_length, @read_tags
+#               );
+# Purpose     : Open filehandles for all output FASTQ files
+# Returns     : Hashref of hashref of filehandles
+# Parameters  : String (prefix for output FASTQs)
+#               Int (tag length)
+#               Array of strings (the tags)
+# Throws      : No exceptions
+# Comments    : None
+sub _open_output_fhs {
+    my ( $fastq_output_prefix, $tag_length, @tags ) = @_;
+
+    push @tags, q{X} x $tag_length;    # Default tag if no match
+
+    my %fh_for;
+    foreach my $tag (@tags) {
+        foreach my $read ( 1, 2 ) {
+            my $filename = join q{_}, $fastq_output_prefix, $tag, $read;
+            $filename .= '.fastq';
+            open my $fh, '>', $filename;    ## no critic (RequireBriefOpen)
+            $fh_for{$tag}->{$read} = $fh;
+        }
+    }
+
+    return \%fh_for;
+}
+
+# Usage       : _close_output_fhs($fh_out_for);
+# Purpose     : Close filehandles for all output FASTQ files
+# Returns     : undef
+# Parameters  : Hashref of hashref of filehandles
+# Throws      : No exceptions
+# Comments    : None
+sub _close_output_fhs {
+    my ($fh_for) = @_;
+
+    foreach my $tag ( keys %{$fh_for} ) {
+        foreach my $read ( keys %{ $fh_for->{$tag} } ) {
+            close $fh_for->{$tag}->{$read};
+        }
+    }
+
+    return;
+}
+
+=func convert_tag_to_regexp
+
+  Usage       : %re_for = convert_tag_to_regexp( 'NNNNBGAGGC', 'NNNNBAGAAG' );
+  Purpose     : Convert tags to regular expressions for matching
+  Returns     : Hash (
+                    String (tag) => Arrayref (of Regexps)
+                )
+  Parameters  : Array of strings (the tags)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub convert_tag_to_regexp {
+    my @tags = @_;
+
+    my %re_for;
+    foreach my $tag (@tags) {
+        my @mismatch_tags = ($tag);    # Start with tag without mismatches
+
+        # Add tag with each possible mismatch
+        foreach my $i ( 0 .. length($tag) - 1 ) {
+            my $mismatch_tag = $tag;
+            my $base = substr $mismatch_tag, $i, 1, q{N};    # Replace with N
+            if ( $base ne q{N} ) {
+
+                # Not completely random base already
+                push @mismatch_tags, $mismatch_tag;
+            }
+        }
+
+        # Convert IUPAC codes to AGCT (or N)
+        foreach my $re (@mismatch_tags) {
+            $re =~ s/N/[NAGCT]/xmsg;    # Random bases can be called as N
+            $re =~ s/B/[GCT]/xmsg;
+            $re =~ s/D/[AGT]/xmsg;
+            $re =~ s/H/[ACT]/xmsg;
+            $re =~ s/V/[AGC]/xmsg;
+            $re =~ s/R/[AG]/xmsg;
+            $re =~ s/Y/[CT]/xmsg;
+            $re =~ s/K/[GT]/xmsg;
+            $re =~ s/M/[AC]/xmsg;
+            $re =~ s/S/[GC]/xmsg;
+            $re =~ s/W/[AT]/xmsg;
+            push @{ $re_for{$tag} }, qr/\A $re \Z/xms;
+        }
+    }
+
+    return %re_for;
+}
+
+1;
diff --git a/lib/DETCT/Pipeline.pm b/lib/DETCT/Pipeline.pm
new file mode 100644
index 0000000..8f0c925
--- /dev/null
+++ b/lib/DETCT/Pipeline.pm
@@ -0,0 +1,1280 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Pipeline;
+## use critic
+
+# ABSTRACT: Object representing a pipeline
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-09
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+use Scalar::Util qw( refaddr );
+use English qw( -no_match_vars );
+use POSIX qw( WIFEXITED WIFSIGNALED WTERMSIG );
+use File::Slurp;
+use File::Spec;
+use File::Path qw( make_path );
+use Hash::Merge;
+use YAML::Tiny qw( DumpFile );
+use Sys::Hostname;
+use File::Basename;
+use File::Find;
+use DETCT;
+use DETCT::Pipeline::Job;
+use DETCT::Pipeline::Stage;
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private scheduler    => my %scheduler;       # e.g. lsf
+private analysis_dir => my %analysis_dir;    # e.g. .
+private analysis     => my %analysis;        # DETCT::Analysis object
+private cmd_line     => my %cmd_line;        # e.g. run_de_pipeline.pl
+private max_retries  => my %max_retries;     # e.g. 10
+private sleep_time   => my %sleep_time;      # e.g. 600
+private stage_to_run => my %stage_to_run;    # DETCT::Pipeline::Stage object
+private component_to_run => my %component_to_run;    # e.g. 5
+private verbose          => my %verbose;             # e.g. 1
+private hash_merge       => my %hash_merge;          # Hash::Merge object
+private stage            => my %stage;               # arrayref of stages
+
+# Constants
+Readonly our %EXTENSION_TO_KEEP => map { $_ => 1 } qw(
+  csv html pdf tsv txt
+);
+
+=method new
+
+  Usage       : my $pipeline = DETCT::Pipeline->new( {
+                    scheduler    => 'lsf',
+                    analysis_dir => '.',
+                    analysis     => $analysis,
+                    cmd_line     => 'run_de_pipeline.pl',
+                    max_retries  => 10,
+                    sleep_time   => 600,
+                    verbose      => 1,
+                } );
+  Purpose     : Constructor for pipeline objects
+  Returns     : DETCT::Pipeline
+  Parameters  : Hashref {
+                    scheduler    => String,
+                    analysis_dir => String,
+                    analysis     => DETCT::Analysis,
+                    cmd_line     => String,
+                    max_retries  => Int,
+                    sleep_time   => Int,
+                    verbose      => Boolean or undef
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_scheduler( $arg_ref->{scheduler} );
+    $self->set_analysis_dir( $arg_ref->{analysis_dir} );
+    $self->set_analysis( $arg_ref->{analysis} );
+    $self->set_cmd_line( $arg_ref->{cmd_line} );
+    $self->set_max_retries( $arg_ref->{max_retries} );
+    $self->set_sleep_time( $arg_ref->{sleep_time} );
+    $self->set_verbose( $arg_ref->{verbose} );
+    return $self;
+}
+
+=method scheduler
+
+  Usage       : my $scheduler = $pipeline->scheduler;
+  Purpose     : Getter for scheduler attribute
+  Returns     : String (e.g. "lsf")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub scheduler {
+    my ($self) = @_;
+    return $scheduler{ id $self};
+}
+
+=method set_scheduler
+
+  Usage       : $pipeline->set_scheduler('lsf');
+  Purpose     : Setter for scheduler attribute
+  Returns     : undef
+  Parameters  : String (the scheduler)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_scheduler {
+    my ( $self, $arg ) = @_;
+    $scheduler{ id $self} = _check_scheduler($arg);
+    return;
+}
+
+# Usage       : $scheduler = _check_scheduler($scheduler);
+# Purpose     : Check for valid scheduler
+# Returns     : String (the valid scheduler)
+# Parameters  : String (the scheduler)
+# Throws      : If scheduler is not lsf or local
+# Comments    : None
+sub _check_scheduler {
+    my ($scheduler) = @_;
+
+    confess 'Invalid scheduler specified'
+      if !defined $scheduler
+      || ( $scheduler ne 'lsf' && $scheduler ne 'local' );
+
+    return $scheduler;
+}
+
+=method analysis_dir
+
+  Usage       : my $analysis_dir = $pipeline->analysis_dir;
+  Purpose     : Getter for analysis directory attribute
+  Returns     : String (e.g. ".")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub analysis_dir {
+    my ($self) = @_;
+    return $analysis_dir{ id $self};
+}
+
+=method set_analysis_dir
+
+  Usage       : $pipeline->set_analysis_dir('.');
+  Purpose     : Setter for analysis directory attribute
+  Returns     : undef
+  Parameters  : String (the analysis directory)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_analysis_dir {
+    my ( $self, $arg ) = @_;
+    $analysis_dir{ id $self} = _check_analysis_dir($arg);
+    return;
+}
+
+# Usage       : $analysis_dir = _check_analysis_dir($analysis_dir);
+# Purpose     : Check for valid analysis directory
+# Returns     : String (the valid analysis directory)
+# Parameters  : String (the analysis directory)
+# Throws      : If analysis directory is missing or invalid
+# Comments    : None
+sub _check_analysis_dir {
+    my ($analysis_dir) = @_;
+
+    # Make sure analysis directory exists
+    if ( defined $analysis_dir && !-d $analysis_dir ) {
+        make_path($analysis_dir);
+    }
+
+    return $analysis_dir if defined $analysis_dir && -d $analysis_dir;
+    confess 'No analysis_dir specified' if !defined $analysis_dir;
+    confess "Invalid analysis_dir ($analysis_dir) specified";
+}
+
+=method analysis
+
+  Usage       : my $analysis = $pipeline->analysis;
+  Purpose     : Getter for analysis attribute
+  Returns     : DETCT::Analysis
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub analysis {
+    my ($self) = @_;
+    return $analysis{ id $self};
+}
+
+=method set_analysis
+
+  Usage       : $pipeline->set_analysis($analysis);
+  Purpose     : Setter for analysis attribute
+  Returns     : undef
+  Parameters  : DETCT::Analysis
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_analysis {
+    my ( $self, $arg ) = @_;
+    $analysis{ id $self} = _check_analysis($arg);
+    return;
+}
+
+# Usage       : $analysis = _check_analysis($analysis);
+# Purpose     : Check for valid analysis object
+# Returns     : DETCT::Analysis
+# Parameters  : DETCT::Analysis
+# Throws      : If analysis object is missing or invalid (i.e. not a
+#               DETCT::Analysis object)
+# Comments    : None
+sub _check_analysis {
+    my ($analysis) = @_;
+    return $analysis if defined $analysis && $analysis->isa('DETCT::Analysis');
+    confess 'No analysis specified' if !defined $analysis;
+    confess 'Class of analysis (', ref $analysis, ') not DETCT::Analysis';
+}
+
+=method cmd_line
+
+  Usage       : my $cmd_line = $pipeline->cmd_line;
+  Purpose     : Getter for command line attribute
+  Returns     : String (e.g. "run_de_pipeline.pl")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub cmd_line {
+    my ($self) = @_;
+    return $cmd_line{ id $self};
+}
+
+=method set_cmd_line
+
+  Usage       : $pipeline->set_cmd_line('run_de_pipeline.pl');
+  Purpose     : Setter for command line attribute
+  Returns     : undef
+  Parameters  : String (the command line)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_cmd_line {
+    my ( $self, $arg ) = @_;
+    $cmd_line{ id $self} = _check_cmd_line($arg);
+    return;
+}
+
+# Usage       : $cmd_line = _check_cmd_line($cmd_line);
+# Purpose     : Check for valid command line
+# Returns     : String (the valid command line)
+# Parameters  : String (the command line)
+# Throws      : If command line is missing
+# Comments    : None
+sub _check_cmd_line {
+    my ($cmd_line) = @_;
+
+    confess 'No command line specified' if !defined $cmd_line || !$cmd_line;
+
+    return $cmd_line;
+}
+
+=method max_retries
+
+  Usage       : my $max_retries = $pipeline->max_retries;
+  Purpose     : Getter for max retries attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub max_retries {
+    my ($self) = @_;
+    return $max_retries{ id $self};
+}
+
+=method set_max_retries
+
+  Usage       : $pipeline->set_max_retries(10);
+  Purpose     : Setter for max retries attribute
+  Returns     : undef
+  Parameters  : +ve Int (the max retries)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_max_retries {
+    my ( $self, $arg ) = @_;
+    $max_retries{ id $self} = _check_max_retries($arg);
+    return;
+}
+
+# Usage       : $max_retries = _check_max_retries($max_retries);
+# Purpose     : Check for valid max retries
+# Returns     : +ve Int (the valid max retries)
+# Parameters  : +ve Int (the max retries)
+# Throws      : If max retries is missing or not a positive integer
+# Comments    : None
+sub _check_max_retries {
+    my ($max_retries) = @_;
+    return $max_retries
+      if defined $max_retries && $max_retries =~ m/\A \d+ \z/xms;
+    confess 'No max retries specified' if !defined $max_retries;
+    confess "Invalid max retries ($max_retries) specified";
+}
+
+=method sleep_time
+
+  Usage       : my $sleep_time = $pipeline->sleep_time;
+  Purpose     : Getter for sleep time attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub sleep_time {
+    my ($self) = @_;
+    return $sleep_time{ id $self};
+}
+
+=method set_sleep_time
+
+  Usage       : $pipeline->set_sleep_time(600);
+  Purpose     : Setter for sleep time attribute
+  Returns     : undef
+  Parameters  : +ve Int (the sleep time)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_sleep_time {
+    my ( $self, $arg ) = @_;
+    $sleep_time{ id $self} = _check_sleep_time($arg);
+    return;
+}
+
+# Usage       : $sleep_time = _check_sleep_time($sleep_time);
+# Purpose     : Check for valid sleep time
+# Returns     : +ve Int (the valid sleep time)
+# Parameters  : +ve Int (the sleep time)
+# Throws      : If sleep time is missing or not a positive integer
+# Comments    : None
+sub _check_sleep_time {
+    my ($sleep_time) = @_;
+    return $sleep_time
+      if defined $sleep_time && $sleep_time =~ m/\A \d+ \z/xms;
+    confess 'No sleep time specified' if !defined $sleep_time;
+    confess "Invalid sleep time ($sleep_time) specified";
+}
+
+=method stage_to_run
+
+  Usage       : my $stage = $pipeline->stage_to_run;
+  Purpose     : Getter for stage to be run attribute
+  Returns     : DETCT::Pipeline::Stage
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub stage_to_run {
+    my ($self) = @_;
+    return $stage_to_run{ id $self};
+}
+
+=method set_stage_to_run
+
+  Usage       : $pipeline->set_stage_to_run($stage);
+  Purpose     : Setter for stage to be run attribute
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_stage_to_run {
+    my ( $self, $arg ) = @_;
+    $stage_to_run{ id $self} = _check_stage_to_run($arg);
+    return;
+}
+
+# Usage       : $stage = _check_stage_to_run($stage);
+# Purpose     : Check for valid stage to be run object
+# Returns     : DETCT::Pipeline::Stage
+# Parameters  : DETCT::Pipeline::Stage
+# Throws      : If stage to be run object is missing or invalid (i.e. not a
+#               DETCT::Pipeline::Stage object)
+# Comments    : None
+sub _check_stage_to_run {
+    my ($stage_to_run) = @_;
+    return $stage_to_run
+      if defined $stage_to_run && $stage_to_run->isa('DETCT::Pipeline::Stage');
+    confess 'No stage to be run specified' if !defined $stage_to_run;
+    confess 'Class of stage to be run (', ref $stage_to_run,
+      ') not DETCT::Pipeline::Stage';
+}
+
+=method component_to_run
+
+  Usage       : my $component = $pipeline->component_to_run;
+  Purpose     : Getter for component to be run attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub component_to_run {
+    my ($self) = @_;
+    return $component_to_run{ id $self};
+}
+
+=method set_component_to_run
+
+  Usage       : $pipeline->set_component_to_run(5);
+  Purpose     : Setter for component to be run attribute
+  Returns     : undef
+  Parameters  : +ve Int (the component to be run)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_component_to_run {
+    my ( $self, $arg ) = @_;
+    $component_to_run{ id $self} = _check_component_to_run($arg);
+    return;
+}
+
+# Usage       : $component = _check_component_to_run($component);
+# Purpose     : Check for valid component to be run
+# Returns     : +ve Int (the valid component to be run)
+# Parameters  : +ve Int (the component to be run)
+# Throws      : If component to be run is missing or not a positive integer
+# Comments    : None
+sub _check_component_to_run {
+    my ($component_to_run) = @_;
+    return $component_to_run
+      if defined $component_to_run && $component_to_run =~ m/\A \d+ \z/xms;
+    confess 'No component to be run specified' if !defined $component_to_run;
+    confess "Invalid component to be run ($component_to_run) specified";
+}
+
+=method verbose
+
+  Usage       : my $verbose = $pipeline->verbose;
+  Purpose     : Getter for verbose flag
+  Returns     : Boolean
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub verbose {
+    my ($self) = @_;
+    return $verbose{ id $self} || 0;
+}
+
+=method set_verbose
+
+  Usage       : $pipeline->set_verbose(1);
+  Purpose     : Setter for verbose flag
+  Returns     : undef
+  Parameters  : Boolean
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_verbose {
+    my ( $self, $arg ) = @_;
+    $verbose{ id $self} = $arg ? 1 : 0;
+    return;
+}
+
+=method hash_merge
+
+  Usage       : %chunk_hmm
+                    = %{ $pipeline->hash_merge->merge(\%chunk_hmm, $seq_hmm) };
+  Purpose     : Return a Hash::Merge object for merging job output
+  Returns     : Hash::Merge
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub hash_merge {
+    my ($self) = @_;
+
+    if ( !exists $hash_merge{ id $self} ) {
+        ## no critic (ProtectPrivateSubs)
+        Hash::Merge::specify_behavior(
+            {
+                SCALAR => {
+                    SCALAR => sub { $_[0] + $_[1] },    # Add scalars
+                    ARRAY  => sub { undef },
+                    HASH   => sub { undef },
+                },
+                ARRAY => {
+                    SCALAR => sub { undef },
+                    ARRAY  => sub { [ @{ $_[0] }, @{ $_[1] } ] },  # Join arrays
+                    HASH   => sub { undef },
+                },
+                HASH => {
+                    SCALAR => sub { undef },
+                    ARRAY  => sub { undef },
+                    HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) },
+                },
+            },
+            'detct',
+        );
+        ## use critic
+        $hash_merge{ id $self} = Hash::Merge->new('detct');
+    }
+
+    return $hash_merge{ id $self};
+}
+
+=method add_stages_from_yaml
+
+  Usage       : $pipeline->add_stages_from_yaml( 'detct.yaml' );
+  Purpose     : Add stages from a YAML file
+  Returns     : undef
+  Parameters  : String (the YAML file)
+  Throws      : If YAML file is missing or not readable or invalid
+  Comments    : None
+
+=cut
+
+sub add_stages_from_yaml {
+    my ( $self, $yaml_file ) = @_;
+
+    confess "YAML file ($yaml_file) does not exist or cannot be read"
+      if !-r $yaml_file;
+
+    my $yaml = YAML::Tiny->read($yaml_file);
+
+    if ( !$yaml ) {
+        confess sprintf 'YAML file (%s) is invalid: %s', $yaml_file,
+          YAML::Tiny->errstr;
+    }
+
+    my %tmp_cache;    # Temporarily store stages by name
+
+    foreach my $stage_hash ( @{ $yaml->[0] } ) {
+        my $stage = DETCT::Pipeline::Stage->new(
+            {
+                name           => $stage_hash->{name},
+                default_memory => $stage_hash->{default_memory},
+            }
+        );
+        foreach my $prerequisite_name ( @{ $stage_hash->{prerequisites} } ) {
+            $stage->add_prerequisite( $tmp_cache{$prerequisite_name} );
+        }
+        $self->add_stage($stage);
+
+        $tmp_cache{ $stage_hash->{name} } = $stage;
+    }
+
+    return;
+}
+
+=method add_stage
+
+  Usage       : $pipeline->add_stage($stage);
+  Purpose     : Add a stage to a pipeline
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : If stage is missing or invalid (i.e. not a
+                DETCT::Pipeline::Stage object)
+  Comments    : None
+
+=cut
+
+sub add_stage {
+    my ( $self, $stage ) = @_;
+
+    confess 'No stage specified' if !defined $stage;
+    confess 'Class of stage (', ref $stage, ') not DETCT::Pipeline::Stage'
+      if !$stage->isa('DETCT::Pipeline::Stage');
+
+    if ( !exists $stage{ id $self} ) {
+        $stage{ id $self} = [$stage];
+    }
+    else {
+        push @{ $stage{ id $self} }, $stage;
+    }
+
+    return;
+}
+
+=method get_all_stages
+
+  Usage       : $stages = $pipeline->get_all_stages();
+  Purpose     : Get all stages of a pipeline
+  Returns     : Arrayref of DETCT::Pipeline::Stage objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_stages {
+    my ($self) = @_;
+
+    return $stage{ id $self} || [];
+}
+
+=method get_stage_by_name
+
+  Usage       : $stage = $pipeline->get_stage_by_name('run_deseq');
+  Purpose     : Get a named stage of a pipeline
+  Returns     : DETCT::Pipeline::Stage
+  Parameters  : String (the stage name)
+  Throws      : If stage with specified name does not exist
+  Comments    : None
+
+=cut
+
+sub get_stage_by_name {
+    my ( $self, $name ) = @_;
+
+    foreach my $stage ( @{ $stage{ id $self} } ) {
+        return $stage if $stage->name eq $name;
+    }
+
+    confess "Invalid stage name ($name)";
+}
+
+=method run
+
+  Usage       : $pipeline->run();
+  Purpose     : Run pipeline
+  Returns     : undef
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run {
+    my ($self) = @_;
+
+    $self->init_run();
+
+    my $all_stages_run = 0;
+
+    while ( !$all_stages_run ) {
+        $all_stages_run = 1;
+
+        my $jobs_running_or_run = 0;
+
+        # Iterate over all stages of pipeline
+      STAGE: foreach my $stage ( @{ $self->get_all_stages() } ) {
+
+            # Check prerequisites have already run and skip this stage if not
+            foreach my $prereq_stage ( @{ $stage->get_all_prerequisites() } ) {
+                if ( !$prereq_stage->all_jobs_run ) {
+                    $self->say_if_verbose(
+                        sprintf 'Skipping %s because %s not run',
+                        $stage->name, $prereq_stage->name );
+                    next STAGE;
+                }
+            }
+
+            # Create directory for current stage of analysis
+            my $dir = $self->get_and_create_stage_dir($stage);
+
+            # Assume all jobs have run OK until we know otherwise
+            $stage->set_all_jobs_run(1);
+
+            # All jobs marked as having run OK?
+            my $done_marker_file = $dir . '.done';
+            if ( -e $done_marker_file ) {
+                $self->say_if_verbose( sprintf 'Stage %s has finished',
+                    $stage->name );
+                next STAGE;
+            }
+
+            # Running a specific stage, but not this one
+            if ( $self->stage_to_run
+                && refaddr( $self->stage_to_run ) != refaddr($stage) )
+            {
+                next STAGE;
+            }
+
+            # Get all parameters for all components of current stage
+            my @all_parameters = $self->all_parameters($stage);
+
+            $self->say_if_verbose( sprintf 'Stage %s has %d components',
+                $stage->name, scalar @all_parameters );
+
+            my $component = 0;    # Index for current component of current stage
+            foreach my $parameters (@all_parameters) {
+                $component++;
+
+                # Running a specific component, but not this one
+                if (
+                       $self->stage_to_run
+                    && $self->component_to_run
+                    && ( refaddr( $self->stage_to_run ) != refaddr($stage)
+                        || $self->component_to_run != $component )
+                  )
+                {
+                    next;
+                }
+
+                my $job = DETCT::Pipeline::Job->new(
+                    {
+                        stage     => $stage,
+                        component => $component,
+                        scheduler => $self->scheduler,
+                        base_filename =>
+                          File::Spec->catfile( $dir, $component ),
+                        parameters => $parameters,
+                    }
+                );
+
+                # Run job if running a specific component of a specific stage
+                if ( $self->stage_to_run && $self->component_to_run ) {
+                    $self->run_job($job);
+                    return;
+                }
+
+                $jobs_running_or_run += $self->process_job($job);
+            }
+
+            if ( $stage->all_jobs_run ) {
+                write_file( $done_marker_file, '1' );
+            }
+            else {
+                $all_stages_run = 0;
+            }
+        }
+
+        if ( !$all_stages_run && !$jobs_running_or_run ) {
+            $self->_delete_lock();
+            die 'Stopping pipeline - no jobs to run' . "\n";
+        }
+
+        if ( !$all_stages_run ) {
+            $self->say_if_verbose( sprintf 'Sleeping for %d seconds',
+                $self->sleep_time );
+            sleep $self->sleep_time;
+        }
+    }
+
+    print 'Pipeline finished - all jobs run' . "\n";
+
+    $self->clean_up();
+
+    $self->_delete_lock();
+
+    return;
+}
+
+=method init_run
+
+  Usage       : $self->init_run();
+  Purpose     : Initialise a pipeline run
+  Returns     : undef
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub init_run {
+    my ($self) = @_;
+
+    if ( !$self->stage_to_run && !$self->component_to_run ) {
+        ## no critic (RequireLocalizedPunctuationVars)
+        $SIG{INT} = sub {
+            $self->_delete_lock();
+            die "\n" . 'Interrupted' . "\n";
+        };
+        ## use critic
+        $self->_create_lock();
+    }
+
+    return;
+}
+
+=method get_and_create_stage_dir
+
+  Usage       : my $dir = $pipeline->get_and_create_stage_dir( $stage );
+  Purpose     : Get (and create if necessary) a directory for the current stage
+  Returns     : String (the directory)
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_and_create_stage_dir {
+    my ( $self, $stage ) = @_;
+
+    my $stage_dir = File::Spec->catdir( $self->analysis_dir, $stage->name );
+    if ( !-d $stage_dir ) {
+        make_path($stage_dir);
+    }
+
+    return $stage_dir;
+}
+
+=method get_and_check_output_file
+
+  Usage       : my $file = $pipeline->get_and_check_output_file('run_deseq', 1);
+  Purpose     : Get an output file for a particular component of a stage
+  Returns     : String (the file)
+  Parameters  : String (the stage)
+                Int (the component)
+  Throws      : If output file doesn't exist
+  Comments    : None
+
+=cut
+
+sub get_and_check_output_file {
+    my ( $self, $stage_name, $component ) = @_;
+
+    my $output_file = File::Spec->catfile( $self->analysis_dir, $stage_name,
+        $component . '.out' );
+    if ( !-e $output_file ) {
+        confess "$output_file doesn't exist, but should";
+    }
+
+    return $output_file;
+}
+
+=method process_job
+
+  Usage       : $jobs_running_or_run += $pipeline->process_job($job);
+  Purpose     : Process a job to see if needs to be submitted
+  Returns     : Boolean
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : Returns whether or not job has been run or was already running
+
+=cut
+
+sub process_job {
+    my ( $self, $job ) = @_;
+
+    my $job_running_or_run = 0;
+
+    if ( $job->status_code eq 'NOT_RUN' ) {
+
+        # Job not yet run so submit it
+        $job->stage->set_all_jobs_run(0);
+        $self->say_if_verbose( sprintf '  Running component %d of %s',
+            $job->component, $job->stage->name );
+        $job_running_or_run = 1;
+        $self->submit_job($job);
+    }
+    elsif ( $job->status_code eq 'RUNNING' ) {
+
+        # Job is running
+        $job->stage->set_all_jobs_run(0);
+        $self->say_if_verbose( sprintf '  Component %d of %s is still running',
+            $job->component, $job->stage->name );
+        $job_running_or_run = 1;
+    }
+    elsif ( $job->status_code eq 'FAILED' ) {
+
+        # Job has failed, so submit again
+        $job->stage->set_all_jobs_run(0);
+        $self->say_if_verbose( sprintf '  Component %d of %s has FAILED: %s',
+            $job->component, $job->stage->name, $job->status_text );
+        if ( $job->retries < $self->max_retries ) {
+            $self->say_if_verbose( sprintf '  Running component %d of %s',
+                $job->component, $job->stage->name );
+            $job_running_or_run = 1;
+            $self->submit_job($job);
+        }
+        else {
+            $self->say_if_verbose(
+                sprintf
+                  '  Not running component %d of %s because retried %d times',
+                $job->component, $job->stage->name, $job->retries );
+        }
+    }
+
+    return $job_running_or_run;
+}
+
+=method submit_job
+
+  Usage       : $pipeline->submit_job($job);
+  Purpose     : Submit a job
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : If job or bsub can't be run
+                If job id can't be extracted from bsub output
+  Comments    : None
+
+=cut
+
+sub submit_job {
+    my ( $self, $job ) = @_;
+
+    my $stdout_file = $job->base_filename . '.o';
+    my $stderr_file = $job->base_filename . '.e';
+
+    my $cmd =
+        $self->cmd_line
+      . ' --stage '
+      . $job->stage->name
+      . ' --component '
+      . $job->component;
+
+    if ( $job->scheduler eq 'local' ) {
+
+        # Just run job
+        $cmd .= ' 1>' . $stdout_file;
+        $cmd .= ' 2>' . $stderr_file;
+        my $cmd_status = system $cmd;
+
+        # Die if the command was interrupted
+        if ( WIFSIGNALED($cmd_status) && WTERMSIG($cmd_status) == 2 ) {
+            $self->_delete_lock();
+            die "\n" . 'Interrupted' . "\n";
+        }
+
+        # Die if the command couldn't be run
+        confess "Couldn't run $cmd ($OS_ERROR)" if !WIFEXITED($cmd_status);
+
+        if ( defined $job->retries ) {
+            $job->set_retries( $job->retries + 1 );
+        }
+        else {
+            $job->set_retries(0);
+        }
+        my $dump = { retries => $job->retries, };
+        my $job_file = $job->base_filename . '.job';
+        DumpFile( $job_file, $dump );
+    }
+    elsif ( $job->scheduler eq 'lsf' ) {
+
+        # Either use default memory or increase by 50% (if retrying failed job)
+        if ( !$job->memory ) {
+            $job->set_memory( $job->stage->default_memory );
+        }
+        elsif ( $job->status_text =~ m/\A MEMLIMIT /xms ) {
+            ## no critic (ProhibitMagicNumbers)
+            $job->set_memory( int( $job->memory * 1.5 ) );
+            ## use critic
+        }
+
+        # bsub job
+        my $bsub_stdout_file = $job->base_filename . '.bsub.o';
+        my $bsub_stderr_file = $job->base_filename . '.bsub.e';
+        ## no critic (ProhibitMagicNumbers)
+        my $memory_clause = sprintf q{ -R'select[mem>%d] rusage[mem=%d]' -M%d },
+          $job->memory, $job->memory, $job->memory * 1000;
+        ## use critic
+        $cmd =
+            'bsub' . ' -oo '
+          . $stdout_file . ' -eo '
+          . $stderr_file
+          . $memory_clause
+          . $cmd . ' 1>'
+          . $bsub_stdout_file . ' 2>'
+          . $bsub_stderr_file;
+        WIFEXITED( system $cmd) or confess "Couldn't run $cmd ($OS_ERROR)";
+
+        # Extract job id from bsub output and store along with other parameters
+        my $bsub_stdout = read_file($bsub_stdout_file);
+        if ( $bsub_stdout =~ m/Job \s <(\d+)> \s is \s submitted/xms ) {
+            my $id = $1;
+            if ( defined $job->retries ) {
+                $job->set_retries( $job->retries + 1 );
+            }
+            else {
+                $job->set_retries(0);
+            }
+            my $dump = {
+                id      => $id,
+                retries => $job->retries,
+                memory  => $job->memory,
+            };
+            my $job_file = $job->base_filename . '.job';
+            DumpFile( $job_file, $dump );
+        }
+        else {
+            confess "Couldn't get job id from $bsub_stdout_file";
+        }
+    }
+
+    return;
+}
+
+=method all_parameters
+
+  Usage       : @all_parameters = $pipeline->all_parameters( $stage );
+  Purpose     : Get all the parameters for a stage
+  Returns     : Array
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : No exceptions
+  Comments    : This function calls the all_parameters_for_ method associated
+                with the current stage and gets all the parameters for that
+                stage as an array of arbitrary data (e.g. arrayref or scalar)
+
+=cut
+
+sub all_parameters {
+    my ( $self, $stage ) = @_;
+
+    my $sub_name = 'all_parameters_for_' . $stage->name;
+
+    return $self->$sub_name();
+}
+
+=method run_job
+
+  Usage       : $pipeline->run_job($job);
+  Purpose     : Run a job
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : This function calls the run_ method associated with the current
+                stage and passes along the parameters for the current component,
+                which are arbitrary (e.g. arrayref or scalar)
+
+=cut
+
+sub run_job {
+    my ( $self, $job ) = @_;
+
+    my $sub_name = 'run_' . $job->stage->name;
+
+    $self->$sub_name($job);
+
+    return;
+}
+
+=method input_overview
+
+  Usage       : $pipeline->say_if_verbose($pipeline->input_overview);
+  Purpose     : Return textual overview of pipeline's input
+  Returns     : Array of Strings
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub input_overview {
+    my ($self) = @_;
+
+    my @output;
+
+    push @output, 'Command line:', $self->cmd_line;
+    if ( defined $DETCT::VERSION ) {
+        push @output, 'DETCT version:' . $DETCT::VERSION;
+    }
+    push @output, 'Working directory: ' . $self->analysis_dir;
+
+    push @output, 'BAM files: ' . join q{ },
+      $self->analysis->list_all_bam_files();
+
+    push @output, sprintf 'Number of samples: %d',
+      scalar @{ $self->analysis->get_all_samples };
+    push @output, sprintf 'Number of sequences: %d',
+      scalar @{ $self->analysis->get_all_sequences };
+    push @output, sprintf 'Number of chunks: %d', $self->analysis->chunk_total;
+
+    push @output, 'Number of sequences per chunk:';
+    my $chunk_component = 0;
+    foreach my $chunk ( @{ $self->analysis->get_all_chunks } ) {
+        $chunk_component++;
+        push @output, sprintf "  Chunk $chunk_component: %d sequences",
+          scalar @{$chunk};
+    }
+
+    return @output;
+}
+
+=method say_if_verbose
+
+  Usage       : $pipeline->say_if_verbose( 'Command line:', $cmd_line );
+  Purpose     : Print output if pipeline is set to verbose
+  Returns     : undef
+  Parameters  : Array of Strings
+  Throws      : No exceptions
+  Comments    : Each string is a line without carriage returns or newlines
+
+=cut
+
+sub say_if_verbose {
+    my ( $self, @output ) = @_;
+    if ( $self->verbose ) {
+        print join "\n", @output;
+        print "\n";
+    }
+    return;
+}
+
+=method write_log_file
+
+  Usage       : $pipeline->write_log_file( @output );
+  Purpose     : Write data to a specified log file
+  Returns     : undef
+  Parameters  : String (the filename)
+                Array of Strings
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub write_log_file {
+    my ( $self, $filename, @output ) = @_;
+
+    my $log_file = File::Spec->catfile( $self->analysis_dir, $filename );
+    write_file( $log_file, @output );
+
+    return;
+}
+
+# Usage       : $self->_create_lock();
+# Purpose     : Create lock file
+# Returns     : undef
+# Parameters  : None
+# Throws      : If lock file already exists
+# Comments    : None
+sub _create_lock {
+    my ($self) = @_;
+
+    my $lock_file = File::Spec->catfile( $self->analysis_dir, 'pipeline.lock' );
+
+    if ( -e $lock_file ) {
+        my $message =
+            "\nERROR: Is another pipeline running?\n"
+          . "Make sure before deleting $lock_file and restarting.\n"
+          . "Lock file contains:\n\n";
+        $message .= read_file($lock_file);
+        die $message . "\n";
+    }
+
+    my $hostname  = hostname();
+    my $timestamp = localtime;
+    write_file( $lock_file, $hostname . "\n" . $timestamp . "\n" );
+
+    return;
+}
+
+# Usage       : $self->_delete_lock();
+# Purpose     : Delete lock file
+# Returns     : undef
+# Parameters  : None
+# Throws      : No exceptions
+# Comments    : None
+sub _delete_lock {
+    my ($self) = @_;
+
+    my $lock_file = File::Spec->catfile( $self->analysis_dir, 'pipeline.lock' );
+
+    unlink $lock_file;
+
+    return;
+}
+
+=method clean_up
+
+  Usage       : $self->clean_up();
+  Purpose     : Move results, archive stages and delete data
+  Returns     : undef
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub clean_up {
+    my ($self) = @_;
+
+    # Already cleaned up?
+    my $done_marker_file =
+      File::Spec->catfile( $self->analysis_dir, 'cleanup.done' );
+    return if -e $done_marker_file;
+
+    print 'Cleaning up...' . "\n";
+
+    # Tar all stages
+    my @stage_dirs =
+      map { $self->get_and_create_stage_dir($_) } @{ $self->get_all_stages() };
+    my $tarball_file =
+      File::Spec->catfile( $self->analysis_dir, 'archive.tar.gz' );
+    my $cmd = join q{ }, 'tar', 'cf', q{-}, @stage_dirs, q{|}, 'gzip', '-9',
+      '-c', q{>}, $tarball_file;
+    WIFEXITED( system $cmd) or confess "Couldn't run $cmd ($OS_ERROR)";
+
+    # Delete or move files
+    my $wanted = \&_move_or_delete;
+    find(
+        {
+            wanted      => sub { $wanted->( $self->analysis_dir ) },
+            postprocess => sub { rmdir $File::Find::dir },
+            no_chdir    => 1,
+        },
+        @stage_dirs
+    );
+
+    write_file( $done_marker_file, '1' );
+
+    print 'Done' . "\n";
+
+    return;
+}
+
+# Usage       : find(\&_move_or_delete, $dir);
+# Purpose     : Move results files and delete other files
+# Returns     : undef
+# Parameters  : None
+# Throws      : No exceptions
+# Comments    : None
+sub _move_or_delete {
+    my ($archive_dir) = @_;
+
+    return if -d;    # Ignore directories
+
+    my ( $filename, undef, $extension ) = fileparse($File::Find::name);
+
+    # Move or delete?
+    if ( $EXTENSION_TO_KEEP{$extension} ) {
+        rename $File::Find::name,
+          File::Spec->catfile( $archive_dir, $filename );
+    }
+    else {
+        unlink $File::Find::name;
+    }
+    
+    return;
+}
+
+1;
diff --git a/lib/DETCT/Pipeline/Job.pm b/lib/DETCT/Pipeline/Job.pm
new file mode 100644
index 0000000..ce8f348
--- /dev/null
+++ b/lib/DETCT/Pipeline/Job.pm
@@ -0,0 +1,697 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Pipeline::Job;
+## use critic
+
+# ABSTRACT: Object representing a pipeline job
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-17
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+use English qw( -no_match_vars );
+use File::ReadBackwards;
+use YAML::Tiny qw( LoadFile );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private stage         => my %stage;            # DETCT::Pipeline::Stage object
+private component     => my %component;        # e.g. 2
+private scheduler     => my %scheduler;        # e.g. lsf
+private base_filename => my %base_filename;    # e.g. ./run_deseq/1
+private parameters    => my %parameters;       # e.g. arrayref or scalar
+private retries       => my %retries;          # e.g. 5
+private memory        => my %memory;           # e.g. 3000
+private status_code   => my %status_code;      # e.g. DONE
+private status_text   => my %status_text;      # e.g. Job killed by owner
+
+# Constants
+Readonly our %STATUS_FOR => (
+    PEND  => 'RUNNING',
+    PSUSP => 'RUNNING',
+    RUN   => 'RUNNING',
+    USUSP => 'RUNNING',
+    SSUSP => 'RUNNING',
+    WAIT  => 'RUNNING',
+    EXIT  => 'FAILED',
+    UNKWN => 'FAILED',
+    ZOMBI => 'FAILED',
+    DONE  => 'DONE',
+);
+
+=method new
+
+  Usage       : my $job = DETCT::Pipeline::Job->new( {
+                    stage         => $stage,
+                    component     => 2,
+                    scheduler     => 'lsf',
+                    base_filename => './run_deseq/1',
+                    parameters    => $parameters,
+                } );
+  Purpose     : Constructor for job objects
+  Returns     : DETCT::Pipeline::Job
+  Parameters  : Hashref {
+                    stage         => DETCT::Pipeline::Stage,
+                    component     => Int,
+                    scheduler     => String,
+                    base_filename => String,
+                    parameters    => Any (probably arrayref or scalar)
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_stage( $arg_ref->{stage} );
+    $self->set_component( $arg_ref->{component} );
+    $self->set_scheduler( $arg_ref->{scheduler} );
+    $self->set_base_filename( $arg_ref->{base_filename} );
+    $self->set_parameters( $arg_ref->{parameters} );
+    $self->set_state_from_filesystem();
+    return $self;
+}
+
+=method stage
+
+  Usage       : my $stage = $job->stage;
+  Purpose     : Getter for stage attribute
+  Returns     : DETCT::Pipeline::Stage
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub stage {
+    my ($self) = @_;
+    return $stage{ id $self};
+}
+
+=method set_stage
+
+  Usage       : $job->set_stage($stage);
+  Purpose     : Setter for stage attribute
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_stage {
+    my ( $self, $arg ) = @_;
+    $stage{ id $self} = _check_stage($arg);
+    return;
+}
+
+# Usage       : $stage = _check_stage($stage);
+# Purpose     : Check for valid stage object
+# Returns     : DETCT::Pipeline::Stage
+# Parameters  : DETCT::Pipeline::Stage
+# Throws      : If stage object is missing or invalid (i.e. not a
+#               DETCT::Pipeline::Stage object)
+# Comments    : None
+sub _check_stage {
+    my ($stage) = @_;
+    return $stage if defined $stage && $stage->isa('DETCT::Pipeline::Stage');
+    confess 'No stage specified' if !defined $stage;
+    confess 'Class of stage (', ref $stage, ') not DETCT::Pipeline::Stage';
+}
+
+=method component
+
+  Usage       : my $component = $job->component;
+  Purpose     : Getter for component attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub component {
+    my ($self) = @_;
+    return $component{ id $self};
+}
+
+=method set_component
+
+  Usage       : $job->set_component(2);
+  Purpose     : Setter for component attribute
+  Returns     : undef
+  Parameters  : +ve Int (the component)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_component {
+    my ( $self, $arg ) = @_;
+    $component{ id $self} = _check_component($arg);
+    return;
+}
+
+# Usage       : $component = _check_component($component);
+# Purpose     : Check for valid component
+# Returns     : +ve Int (the valid component)
+# Parameters  : +ve Int (the component)
+# Throws      : If component is missing or not a positive integer
+# Comments    : None
+sub _check_component {
+    my ($component) = @_;
+    return $component if defined $component && $component =~ m/\A \d+ \z/xms;
+    confess 'No component specified' if !defined $component;
+    confess "Invalid component ($component) specified";
+}
+
+=method scheduler
+
+  Usage       : my $scheduler = $job->scheduler;
+  Purpose     : Getter for scheduler attribute
+  Returns     : String (e.g. "lsf")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub scheduler {
+    my ($self) = @_;
+    return $scheduler{ id $self};
+}
+
+=method set_scheduler
+
+  Usage       : $job->set_scheduler('lsf');
+  Purpose     : Setter for scheduler attribute
+  Returns     : undef
+  Parameters  : String (the scheduler)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_scheduler {
+    my ( $self, $arg ) = @_;
+    $scheduler{ id $self} = _check_scheduler($arg);
+    return;
+}
+
+# Usage       : $scheduler = _check_scheduler($scheduler);
+# Purpose     : Check for valid scheduler
+# Returns     : String (the valid scheduler)
+# Parameters  : String (the scheduler)
+# Throws      : If scheduler is not lsf or local
+# Comments    : None
+sub _check_scheduler {
+    my ($scheduler) = @_;
+
+    confess 'Invalid scheduler specified'
+      if !defined $scheduler
+      || ( $scheduler ne 'lsf' && $scheduler ne 'local' );
+
+    return $scheduler;
+}
+
+=method base_filename
+
+  Usage       : my $base_filename = $job->base_filename;
+  Purpose     : Getter for the base filename attribute
+  Returns     : String (e.g. "./run_deseq/1")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub base_filename {
+    my ($self) = @_;
+    return $base_filename{ id $self};
+}
+
+=method set_base_filename
+
+  Usage       : $job->set_base_filename('./run_deseq/1');
+  Purpose     : Setter for the base filename attribute
+  Returns     : undef
+  Parameters  : String (the base filename)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_base_filename {
+    my ( $self, $arg ) = @_;
+    $base_filename{ id $self} = _check_base_filename($arg);
+    return;
+}
+
+# Usage       : $base_filename = _check_base_filename($base_filename);
+# Purpose     : Check for valid base filename
+# Returns     : String (the valid base filename) or undef
+# Parameters  : String (the base filename)
+# Throws      : If base filename is missing
+# Comments    : None
+sub _check_base_filename {
+    my ($base_filename) = @_;
+
+    confess 'No base filename specified'
+      if !defined $base_filename || !$base_filename;
+
+    return $base_filename;
+}
+
+=method parameters
+
+  Usage       : my $parameters = $job->parameters;
+  Purpose     : Getter for parameters attribute
+  Returns     : Any (usually arrayref or scalar)
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub parameters {
+    my ($self) = @_;
+    return $parameters{ id $self};
+}
+
+=method set_parameters
+
+  Usage       : $job->set_parameters($parameters);
+  Purpose     : Setter for parameters attribute
+  Returns     : undef
+  Parameters  : Any (the parameters; usually arrayref or scalar)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_parameters {
+    my ( $self, $arg ) = @_;
+    $parameters{ id $self} = $arg;
+    return;
+}
+
+=method retries
+
+  Usage       : my $retries = $job->retries;
+  Purpose     : Getter for retries attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub retries {
+    my ($self) = @_;
+    return $retries{ id $self};
+}
+
+=method set_retries
+
+  Usage       : $job->set_retries(5);
+  Purpose     : Setter for retries attribute
+  Returns     : undef
+  Parameters  : +ve Int (the retries)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_retries {
+    my ( $self, $arg ) = @_;
+    $retries{ id $self} = _check_retries($arg);
+    return;
+}
+
+# Usage       : $retries = _check_retries($retries);
+# Purpose     : Check for valid retries
+# Returns     : +ve Int (the valid retries)
+# Parameters  : +ve Int (the retries)
+# Throws      : If retries is not a positive integer
+# Comments    : None
+sub _check_retries {
+    my ($retries) = @_;
+
+    confess "Invalid retries ($retries) specified"
+      if defined $retries && $retries !~ m/\A \d+ \z/xms;
+
+    return $retries;
+}
+
+=method memory
+
+  Usage       : my $memory = $job->memory;
+  Purpose     : Getter for memory attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub memory {
+    my ($self) = @_;
+    return $memory{ id $self};
+}
+
+=method set_memory
+
+  Usage       : $job->set_memory(1000);
+  Purpose     : Setter for memory attribute
+  Returns     : undef
+  Parameters  : +ve Int (the memory)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_memory {
+    my ( $self, $arg ) = @_;
+    $memory{ id $self} = _check_memory($arg);
+    return;
+}
+
+# Usage       : $memory = _check_memory($memory);
+# Purpose     : Check for valid memory
+# Returns     : +ve Int (the valid memory)
+# Parameters  : +ve Int (the memory)
+# Throws      : If memory is not a positive integer
+# Comments    : None
+sub _check_memory {
+    my ($memory) = @_;
+
+    confess "Invalid memory ($memory) specified"
+      if defined $memory && $memory !~ m/\A \d+ \z/xms;
+
+    return $memory;
+}
+
+=method status_code
+
+  Usage       : my $status_code = $job->status_code;
+  Purpose     : Getter for the status code attribute
+  Returns     : String (e.g. "DONE")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : Status code can be RUNNING, FAILED, DONE or NOT_RUN
+
+=cut
+
+sub status_code {
+    my ($self) = @_;
+    return $status_code{ id $self};
+}
+
+=method set_status_code
+
+  Usage       : $job->set_status_code('DONE');
+  Purpose     : Setter for the status code attribute
+  Returns     : undef
+  Parameters  : String (the status code)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_status_code {
+    my ( $self, $arg ) = @_;
+    $status_code{ id $self} = _check_status_code($arg);
+    return;
+}
+
+# Usage       : $status_code = _check_status_code($status_code);
+# Purpose     : Check for valid status code
+# Returns     : String (the valid status code)
+# Parameters  : String (the status code)
+# Throws      : If status code is not valid
+# Comments    : None
+sub _check_status_code {
+    my ($status_code) = @_;
+
+    return $status_code
+      if defined $status_code
+      && ( $status_code eq 'RUNNING'
+        || $status_code eq 'FAILED'
+        || $status_code eq 'DONE'
+        || $status_code eq 'NOT_RUN' );
+    confess 'No status code specified' if !defined $status_code;
+    confess "Invalid status code ($status_code) specified";
+}
+
+=method status_text
+
+  Usage       : my $status_text = $job->status_text;
+  Purpose     : Getter for status text attribute
+  Returns     : String (e.g. "Job killed by owner")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub status_text {
+    my ($self) = @_;
+    return $status_text{ id $self};
+}
+
+=method set_status_text
+
+  Usage       : $job->set_status_text('Job killed by owner');
+  Purpose     : Setter for status text attribute
+  Returns     : undef
+  Parameters  : String (the status text)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_status_text {
+    my ( $self, $arg ) = @_;
+    $status_text{ id $self} = $arg;
+    return;
+}
+
+=method set_state_from_filesystem
+
+  Usage       : $job->set_state_from_filesystem();
+  Purpose     : Set state-related attributes of a job from filesystem
+  Returns     : undef
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_state_from_filesystem {
+    my ($self) = @_;
+
+    if ( $self->scheduler eq 'local' ) {
+        $self->_set_state_from_filesystem_for_local();
+    }
+    elsif ( $self->scheduler eq 'lsf' ) {
+        $self->_set_state_from_filesystem_for_lsf();
+    }
+
+    return;
+}
+
+# Usage       : $self->_set_state_from_filesystem_for_local();
+# Purpose     : Set state-related attributes of a job run locally (probably for
+#               testing)
+# Returns     : None
+# Parameters  : None
+# Throws      : No exceptions
+# Comments    : None
+sub _set_state_from_filesystem_for_local {
+    my ($self) = @_;
+
+    my $output_file = $self->base_filename . '.out';
+
+    my $job_file = $self->base_filename . '.job';
+
+    # Check if job has even been run yet
+    if ( !-e $job_file ) {
+        $self->set_status_code('NOT_RUN');
+        return;
+    }
+
+    # Get number of retries
+    my $yaml    = LoadFile($job_file);
+    my $retries = $yaml->{retries};
+    $self->set_retries($retries);
+
+    if ( -e $output_file && !-z $output_file ) {
+        $self->set_status_code('DONE');
+    }
+    elsif ( !-e $output_file || -z $output_file ) {
+        $self->set_status_code('FAILED');
+        $self->set_status_text( 'Enpty output file: ' . $output_file );
+    }
+
+    return;
+}
+
+# Usage       : $self->_set_state_from_filesystem_for_lsf();
+# Purpose     : Set state-related attributes of a job submitted to LSF
+# Returns     : None
+# Parameters  : None
+# Throws      : If job id in job file is not an integer
+#               If the status returned by bjobs is not recognised
+# Comments    : None
+sub _set_state_from_filesystem_for_lsf {
+    my ($self) = @_;
+
+    my $output_file = $self->base_filename . '.out';
+
+    my $job_file = $self->base_filename . '.job';
+
+    # Check if job has even been run yet
+    if ( !-e $job_file ) {
+        $self->set_status_code('NOT_RUN');
+        return;
+    }
+
+    # Get job id
+    my $yaml   = LoadFile($job_file);
+    my $job_id = $yaml->{id};
+    if ( $job_id !~ /\A \d+ \z/xms ) {
+        confess "Job ID ($job_id) not valid";
+    }
+
+    # Get number of retries
+    my $retries = $yaml->{retries};
+    $self->set_retries($retries);
+
+    # Get memory requested
+    my $memory = $yaml->{memory};
+    $self->set_memory($memory);
+
+    my ( $status_code, $status_text );
+
+    # Get job status for job id from bjobs command
+    my $lsf_status;
+    open my $pipe, q{-|}, 'bjobs ' . $job_id . ' 2>/dev/null';    # Hide STDERR
+    while ( my $job_line = <$pipe> ) {
+        if ( $job_line =~ m/\A $job_id \s+ \S+ \s+ (\S+)/xms ) {
+            $lsf_status = $1;
+        }
+    }
+    close $pipe;
+    if ($lsf_status) {
+
+        # Got job status from bjobs
+        if ( !exists $STATUS_FOR{$lsf_status} ) {
+            confess "Unknown LSF status ($lsf_status)";
+        }
+        $status_code = $STATUS_FOR{$lsf_status};
+        $status_text = 'LSF status: ' . $lsf_status;
+    }
+    if ( !$status_code || $status_code eq 'FAILED' ) {
+
+        # If bjobs doesn't return status or failed then check job's STDOUT
+        ( $status_code, $status_text ) = $self->_parse_lsf_stdout($job_id);
+    }
+
+    $self->set_status_code($status_code);
+    $self->set_status_text($status_text);
+
+    return;
+}
+
+# Usage       : ($status_code, $status_text)
+#                   = $pipeline->_parse_lsf_stdout($job_id);
+# Purpose     : Parses LSF's STDOUT to get a job's status
+# Returns     : String (status code: DONE or FAILED)
+#               String (status info) or undef
+# Parameters  : Int (the job id)
+# Throws      : If STDOUT file can't be read
+# Comments    : Based on
+#               https://github.com/VertebrateResequencing/vr-pipe/blob/master/modules/VRPipe/Parser/lsf.pm
+sub _parse_lsf_stdout {
+    my ( $self, $job_id ) = @_;
+
+    # Check STDOUT file exists at all (in case job was killed whilst pending)
+    my $stdout_file = $self->base_filename . '.o';
+    if ( !-e $stdout_file ) {
+        return 'FAILED', 'Job did not run';
+    }
+
+    # STDOUT file is overwritten so no need to read backwards to get last job
+    my ( $status_code, $status_text, $stdout_job_id );
+    my $found_start = 0;
+    my $found_end   = 0;
+    my $bw          = File::ReadBackwards->new($stdout_file)
+      or confess "Can't read $stdout_file: $OS_ERROR";
+    while ( defined( my $line = $bw->readline ) ) {
+        if ( $line =~ m/\A Resource \s usage \s summary: /xms ) {
+            $found_end = 1;
+            next;
+        }
+        elsif ( $line =~ m/\A Sender: \s LSF \s System/xms ) {
+            $found_start = 1;
+            last;    # Will find start after end
+        }
+        elsif ($found_end) {
+
+            # Get job id
+            if ( $line =~ m/\A Subject: \s Job \s (\d+):/xms ) {
+                $stdout_job_id = $1;
+            }
+
+            # Get job's status code
+            if ( $line =~ m/\A Successfully \s completed[.] /xms ) {
+                $status_code = 'DONE';
+            }
+            elsif ( !$status_code
+                && $line =~
+                m/\A Exited \s with \s exit \s code \s (\d+) [.] /xms )
+            {
+                $status_code = 'FAILED';
+                $status_text = "Exit code: $1";
+            }
+            elsif ( $line =~ m/\A TERM_ (\w+: .*) [.] /xms ) {
+                $status_code = 'FAILED';
+                $status_text = $1;
+            }
+        }
+    }
+
+    # Ensure correct job
+    if ( defined $stdout_job_id && $job_id != $stdout_job_id ) {
+        $status_code = 'FAILED';
+        $status_text = "Wrong job id (expecting $job_id, got $stdout_job_id)";
+    }
+
+    # If no status then STDOUT could not be parsed
+    if ( !defined $status_code ) {
+        $status_code = 'FAILED';
+        $status_text = "Could not parse job's STDOUT: $stdout_file";
+    }
+
+    return $status_code, $status_text;
+}
+
+1;
diff --git a/lib/DETCT/Pipeline/Stage.pm b/lib/DETCT/Pipeline/Stage.pm
new file mode 100644
index 0000000..f626279
--- /dev/null
+++ b/lib/DETCT/Pipeline/Stage.pm
@@ -0,0 +1,236 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Pipeline::Stage;
+## use critic
+
+# ABSTRACT: Object representing a pipeline stage
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-09
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Class::InsideOut qw( private register id );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private name           => my %name;              # e.g. count_tags
+private default_memory => my %default_memory;    # e.g. 3000
+private all_jobs_run   => my %all_jobs_run;      # e.g. 1
+private prerequisite   => my %prerequisite;      # arrayref of stages
+
+=method new
+
+  Usage       : my $stage = DETCT::Pipeline::Stage->new( {
+                    name           => 'count_tags',
+                    default_memory => 3000,
+                } );
+  Purpose     : Constructor for stage objects
+  Returns     : DETCT::Pipeline::Stage
+  Parameters  : Hashref {
+                    name           => String,
+                    default_memory => Int,
+                    all_jobs_run   => Boolean or undef,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_name( $arg_ref->{name} );
+    $self->set_default_memory( $arg_ref->{default_memory} );
+    $self->set_all_jobs_run( $arg_ref->{all_jobs_run} );
+    return $self;
+}
+
+=method name
+
+  Usage       : my $name = $sample->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "count_tags")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $sample->set_name('count_tags');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name is missing or invalid (i.e. not alphanumeric)
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+
+    return $name if defined $name && $name =~ m/\A \w+ \z/xms;
+    confess 'No name specified' if !defined $name;
+    confess "Invalid name ($name) specified";
+}
+
+=method default_memory
+
+  Usage       : my $default_memory = $stage->default_memory;
+  Purpose     : Getter for default memory attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub default_memory {
+    my ($self) = @_;
+    return $default_memory{ id $self};
+}
+
+=method set_default_memory
+
+  Usage       : $stage->set_default_memory(3000);
+  Purpose     : Setter for default memory attribute
+  Returns     : undef
+  Parameters  : +ve Int (the default memory)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_default_memory {
+    my ( $self, $arg ) = @_;
+    $default_memory{ id $self} = _check_default_memory($arg);
+    return;
+}
+
+# Usage       : $default_memory = _check_default_memory($default_memory);
+# Purpose     : Check for valid default memory
+# Returns     : +ve Int (the valid default memory)
+# Parameters  : +ve Int (the default memory)
+# Throws      : If default memory is missing or not a positive integer
+# Comments    : None
+sub _check_default_memory {
+    my ($default_memory) = @_;
+    return $default_memory
+      if defined $default_memory && $default_memory =~ m/\A \d+ \z/xms;
+    confess 'No default memory specified' if !defined $default_memory;
+    confess "Invalid default memory ($default_memory) specified";
+}
+
+=method all_jobs_run
+
+  Usage       : my $all_jobs_run = $stage->all_jobs_run;
+  Purpose     : Getter for all jobs run flag
+  Returns     : Boolean
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_jobs_run {
+    my ($self) = @_;
+    return $all_jobs_run{ id $self} || 0;
+}
+
+=method set_all_jobs_run
+
+  Usage       : $stage->set_all_jobs_run(1);
+  Purpose     : Setter for all jobs run flag
+  Returns     : undef
+  Parameters  : Boolean
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_all_jobs_run {
+    my ( $self, $arg ) = @_;
+    $all_jobs_run{ id $self} = $arg ? 1 : 0;
+    return;
+}
+
+=method add_prerequisite
+
+  Usage       : $stage->add_prerequisite($prerequisite);
+  Purpose     : Add a prerequisite to a stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Stage
+  Throws      : If prerequisite is missing or invalid (i.e. not a
+                DETCT::Pipeline::Stage object)
+  Comments    : None
+
+=cut
+
+sub add_prerequisite {
+    my ( $self, $prerequisite ) = @_;
+
+    confess 'No prerequisite specified' if !defined $prerequisite;
+    confess 'Class of prerequisite (', ref $prerequisite,
+      ') not DETCT::Pipeline::Stage'
+      if !$prerequisite->isa('DETCT::Pipeline::Stage');
+
+    if ( !exists $prerequisite{ id $self} ) {
+        $prerequisite{ id $self} = [$prerequisite];
+    }
+    else {
+        push @{ $prerequisite{ id $self} }, $prerequisite;
+    }
+
+    return;
+}
+
+=method get_all_prerequisites
+
+  Usage       : $prerequisites = $stage->get_all_prerequisites();
+  Purpose     : Get all prerequisites of a stage
+  Returns     : Arrayref of DETCT::Pipeline::Stage objects
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub get_all_prerequisites {
+    my ($self) = @_;
+
+    return $prerequisite{ id $self} || [];
+}
+
+1;
diff --git a/lib/DETCT/Pipeline/WithDiffExprStages.pm b/lib/DETCT/Pipeline/WithDiffExprStages.pm
new file mode 100644
index 0000000..5f2161d
--- /dev/null
+++ b/lib/DETCT/Pipeline/WithDiffExprStages.pm
@@ -0,0 +1,1236 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Pipeline::WithDiffExprStages;
+## use critic
+
+# ABSTRACT: Object representing a differential expression pipeline
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-16
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use parent qw(DETCT::Pipeline);
+
+use Class::InsideOut qw( private register id );
+use Scalar::Util qw( refaddr );
+use YAML::Tiny qw( DumpFile LoadFile );
+use DETCT::GeneFinder;
+use DETCT::Misc::BAM qw(
+  count_tags
+  bin_reads
+  get_read_peaks
+  get_three_prime_ends
+  merge_three_prime_ends
+  filter_three_prime_ends
+  choose_three_prime_end
+  count_reads
+  merge_read_counts
+);
+use DETCT::Misc::PeakHMM qw(
+  merge_read_peaks
+  summarise_read_peaks
+  run_peak_hmm
+  join_hmm_bins
+);
+use DETCT::Misc::R qw(
+  run_deseq
+);
+use DETCT::Misc::Output qw(
+  dump_as_table
+);
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+=method all_parameters_by_bam_file_then_chunk
+
+  Usage       : all_parameters_by_bam_file_then_chunk();
+  Purpose     : Get all parameters for stage that requires jobs split up by BAM
+                file then by chunk
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_by_bam_file_then_chunk {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+        my @tags = $self->analysis->list_all_tags_by_bam_file($bam_file);
+        foreach my $chunk ( @{$chunks} ) {
+            push @all_parameters, [ $bam_file, $chunk, @tags ];
+        }
+    }
+
+    return @all_parameters;
+}
+
+=method all_parameters_for_count_tags
+
+  Usage       : all_parameters_for_count_tags();
+  Purpose     : Get all parameters for count_tags stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_count_tags {
+    my ($self) = @_;
+
+    return $self->all_parameters_by_bam_file_then_chunk();
+}
+
+=method run_count_tags
+
+  Usage       : run_count_tags();
+  Purpose     : Run function for count_tags stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_count_tags {
+    my ( $self, $job ) = @_;
+
+    my ( $bam_file, $chunk, @tags ) = @{ $job->parameters };
+
+    my %chunk_count;
+
+    # Get count for each sequence of a chunk separately and then merge
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_count = count_tags(
+            {
+                bam_file           => $bam_file,
+                mismatch_threshold => $self->analysis->mismatch_threshold,
+                seq_name           => $seq->name,
+                tags               => \@tags,
+            }
+        );
+        %chunk_count =
+          %{ $self->hash_merge->merge( \%chunk_count, $seq_count ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_count );
+
+    return;
+}
+
+=method all_parameters_for_bin_reads
+
+  Usage       : all_parameters_for_bin_reads();
+  Purpose     : Get all parameters for bin_reads stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_bin_reads {
+    my ($self) = @_;
+
+    return $self->all_parameters_by_bam_file_then_chunk();
+}
+
+=method run_bin_reads
+
+  Usage       : run_bin_reads();
+  Purpose     : Run function for bin_reads stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_bin_reads {
+    my ( $self, $job ) = @_;
+
+    my ( $bam_file, $chunk, @tags ) = @{ $job->parameters };
+
+    my %chunk_bins;
+
+    # Get bins for each sequence of a chunk separately and then merge
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_bins = bin_reads(
+            {
+                bam_file           => $bam_file,
+                mismatch_threshold => $self->analysis->mismatch_threshold,
+                bin_size           => $self->analysis->bin_size,
+                seq_name           => $seq->name,
+                tags               => \@tags,
+            }
+        );
+        %chunk_bins = %{ $self->hash_merge->merge( \%chunk_bins, $seq_bins ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_bins );
+
+    return;
+}
+
+=method all_parameters_for_get_read_peaks
+
+  Usage       : all_parameters_for_get_read_peaks();
+  Purpose     : Get all parameters for get_read_peaks stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_get_read_peaks {
+    my ($self) = @_;
+
+    return $self->all_parameters_by_bam_file_then_chunk();
+}
+
+=method run_get_read_peaks
+
+  Usage       : run_get_read_peaks();
+  Purpose     : Run function for get_read_peaks stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_get_read_peaks {
+    my ( $self, $job ) = @_;
+
+    my ( $bam_file, $chunk, @tags ) = @{ $job->parameters };
+
+    my %chunk_peaks;
+
+    # Get read peaks for each sequence of a chunk separately and then merge
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_peaks = get_read_peaks(
+            {
+                bam_file           => $bam_file,
+                mismatch_threshold => $self->analysis->mismatch_threshold,
+                peak_buffer_width  => $self->analysis->peak_buffer_width,
+                seq_name           => $seq->name,
+                tags               => \@tags,
+            }
+        );
+        %chunk_peaks =
+          %{ $self->hash_merge->merge( \%chunk_peaks, $seq_peaks ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_peaks );
+
+    return;
+}
+
+=method all_parameters_for_merge_read_peaks
+
+  Usage       : all_parameters_for_merge_read_peaks();
+  Purpose     : Get all parameters for merge_read_peaks stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_merge_read_peaks {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    # Work out which get_read_peaks stage files need to be combined
+    foreach my $merge_chunk ( @{$chunks} ) {
+        my @get_read_peaks_output_files;
+        my $component = 0;
+        foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+            foreach my $get_chunk ( @{$chunks} ) {
+                $component++;
+                if ( refaddr($merge_chunk) == refaddr($get_chunk) ) {
+                    my $output_file =
+                      $self->get_and_check_output_file( 'get_read_peaks',
+                        $component );
+                    push @get_read_peaks_output_files, $output_file;
+                }
+            }
+        }
+        push @all_parameters, [ $merge_chunk, @get_read_peaks_output_files ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_merge_read_peaks
+
+  Usage       : run_merge_read_peaks();
+  Purpose     : Run function for merge_read_peaks stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_merge_read_peaks {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, @get_read_peaks_output_files ) = @{ $job->parameters };
+
+    # Join lists of peaks
+    my %unmerged_peaks;
+    foreach my $output_file (@get_read_peaks_output_files) {
+        %unmerged_peaks = %{
+            $self->hash_merge->merge(
+                \%unmerged_peaks, LoadFile($output_file)
+            )
+        };
+    }
+
+    my %chunk_peaks;
+
+    # Merge read peaks for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_peaks = merge_read_peaks(
+            {
+                peak_buffer_width => $self->analysis->peak_buffer_width,
+                seq_name          => $seq->name,
+                peaks             => $unmerged_peaks{ $seq->name },
+            }
+        );
+        %chunk_peaks =
+          %{ $self->hash_merge->merge( \%chunk_peaks, $seq_peaks ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_peaks );
+
+    return;
+}
+
+=method all_parameters_for_summarise_read_peaks
+
+  Usage       : all_parameters_for_summarise_read_peaks();
+  Purpose     : Get all parameters for summarise_read_peaks stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_summarise_read_peaks {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    my $component = 0;
+    foreach my $chunk ( @{$chunks} ) {
+        $component++;
+        my $merge_read_peaks_output_file =
+          $self->get_and_check_output_file( 'merge_read_peaks', $component );
+        push @all_parameters, [ $chunk, $merge_read_peaks_output_file ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_summarise_read_peaks
+
+  Usage       : run_summarise_read_peaks();
+  Purpose     : Run function for summarise_read_peaks stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_summarise_read_peaks {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $merge_read_peaks_output_file ) = @{ $job->parameters };
+
+    # Get merged peaks
+    my %merged_peaks = %{ LoadFile($merge_read_peaks_output_file) };
+
+    my %chunk_summary;
+
+    # Summarise read peaks for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_summary = summarise_read_peaks(
+            {
+                bin_size          => $self->analysis->bin_size,
+                peak_buffer_width => $self->analysis->peak_buffer_width,
+                hmm_sig_level     => $self->analysis->hmm_sig_level,
+                seq_name          => $seq->name,
+                seq_bp            => $seq->bp,
+                read_length       => $self->analysis->read2_length,
+                peaks             => $merged_peaks{ $seq->name },
+            }
+        );
+        %chunk_summary =
+          %{ $self->hash_merge->merge( \%chunk_summary, $seq_summary ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_summary );
+
+    return;
+}
+
+=method all_parameters_for_run_peak_hmm
+
+  Usage       : all_parameters_for_run_peak_hmm();
+  Purpose     : Get all parameters for run_peak_hmm stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_run_peak_hmm {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    # Work out which bin_reads stage files need to be combined
+    my $component = 0;
+    foreach my $hmm_chunk ( @{$chunks} ) {
+        $component++;
+        my @bin_reads_output_files;
+        my $bin_component = 0;
+        foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+            foreach my $bin_chunk ( @{$chunks} ) {
+                $bin_component++;
+                if ( refaddr($hmm_chunk) == refaddr($bin_chunk) ) {
+                    my $bin_output_file =
+                      $self->get_and_check_output_file( 'bin_reads',
+                        $bin_component );
+                    push @bin_reads_output_files, $bin_output_file;
+                }
+            }
+        }
+        my $summary_output_file =
+          $self->get_and_check_output_file( 'summarise_read_peaks',
+            $component );
+        push @all_parameters,
+          [ $hmm_chunk, $summary_output_file, @bin_reads_output_files ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_run_peak_hmm
+
+  Usage       : run_run_peak_hmm();
+  Purpose     : Run function for run_peak_hmm stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_run_peak_hmm {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $summary_output_file, @bin_reads_output_files ) =
+      @{ $job->parameters };
+
+    # Join read bins
+    my %read_bins;
+    foreach my $output_file (@bin_reads_output_files) {
+        %read_bins =
+          %{ $self->hash_merge->merge( \%read_bins, LoadFile($output_file) ) };
+    }
+
+    # Load summary
+    my $summary = LoadFile($summary_output_file);
+
+    my %chunk_hmm;
+
+    # Run peak HMM for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_hmm = run_peak_hmm(
+            {
+                dir           => $job->base_filename,
+                hmm_sig_level => $self->analysis->hmm_sig_level,
+                seq_name      => $seq->name,
+                read_bins     => $read_bins{ $seq->name },
+                summary       => $summary->{ $seq->name },
+                hmm_binary    => $self->analysis->hmm_binary,
+            }
+        );
+        %chunk_hmm = %{ $self->hash_merge->merge( \%chunk_hmm, $seq_hmm ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_hmm );
+
+    return;
+}
+
+=method all_parameters_for_join_hmm_bins
+
+  Usage       : all_parameters_for_join_hmm_bins();
+  Purpose     : Get all parameters for join_hmm_bins stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_join_hmm_bins {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    my $component = 0;
+    foreach my $chunk ( @{$chunks} ) {
+        $component++;
+        my $run_peak_hmm_output_file =
+          $self->get_and_check_output_file( 'run_peak_hmm', $component );
+        push @all_parameters, [ $chunk, $run_peak_hmm_output_file ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_join_hmm_bins
+
+  Usage       : run_join_hmm_bins();
+  Purpose     : Run function for join_hmm_bins stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_join_hmm_bins {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $run_peak_hmm_output_file ) = @{ $job->parameters };
+
+    # Get HMM bins
+    my $hmm_bins = LoadFile($run_peak_hmm_output_file);
+
+    my %chunk_regions;
+
+    # Join HMM bins for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_regions = join_hmm_bins(
+            {
+                bin_size => $self->analysis->bin_size,
+                seq_name => $seq->name,
+                hmm_bins => $hmm_bins->{ $seq->name },
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_get_three_prime_ends
+
+  Usage       : all_parameters_for_get_three_prime_ends();
+  Purpose     : Get all parameters for get_three_prime_ends stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_get_three_prime_ends {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+        my @tags      = $self->analysis->list_all_tags_by_bam_file($bam_file);
+        my $component = 0;
+        foreach my $chunk ( @{$chunks} ) {
+            $component++;
+            my $join_hmm_bins_output_file =
+              $self->get_and_check_output_file( 'join_hmm_bins', $component );
+            push @all_parameters,
+              [ $chunk, $bam_file, $join_hmm_bins_output_file, @tags ];
+        }
+    }
+
+    return @all_parameters;
+}
+
+=method run_get_three_prime_ends
+
+  Usage       : run_get_three_prime_ends();
+  Purpose     : Run function for get_three_prime_ends stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_get_three_prime_ends {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $bam_file, $join_hmm_bins_output_file, @tags ) =
+      @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($join_hmm_bins_output_file);
+
+    my %chunk_regions;
+
+    # Get 3' ends for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_regions = get_three_prime_ends(
+            {
+                bam_file           => $bam_file,
+                mismatch_threshold => $self->analysis->mismatch_threshold,
+                seq_name           => $seq->name,
+                tags               => \@tags,
+                regions            => $regions->{ $seq->name },
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_merge_three_prime_ends
+
+  Usage       : all_parameters_for_merge_three_prime_ends();
+  Purpose     : Get all parameters for merge_three_prime_ends stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_merge_three_prime_ends {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    # Work out which get_three_prime_ends stage files need to be merged
+    foreach my $merge_chunk ( @{$chunks} ) {
+        my @get_three_prime_ends_output_files;
+        my $component = 0;
+        foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+            foreach my $run_chunk ( @{$chunks} ) {
+                $component++;
+                if ( refaddr($merge_chunk) == refaddr($run_chunk) ) {
+                    my $output_file =
+                      $self->get_and_check_output_file( 'get_three_prime_ends',
+                        $component );
+                    push @get_three_prime_ends_output_files, $output_file;
+                }
+            }
+        }
+        push @all_parameters,
+          [ $merge_chunk, @get_three_prime_ends_output_files ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_merge_three_prime_ends
+
+  Usage       : run_merge_three_prime_ends();
+  Purpose     : Run function for merge_three_prime_ends stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_merge_three_prime_ends {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, @get_three_prime_ends_output_files ) = @{ $job->parameters };
+
+    # Load all regions
+    my @list_of_lists_of_regions;
+    foreach my $output_file (@get_three_prime_ends_output_files) {
+        my $regions = LoadFile($output_file);
+        push @list_of_lists_of_regions, $regions;
+    }
+
+    my %chunk_regions;
+
+    # Merge 3' ends for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my @regions = map { $_->{ $seq->name } } @list_of_lists_of_regions;
+        my $seq_regions = merge_three_prime_ends(
+            {
+                seq_name => $seq->name,
+                regions  => \@regions,
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_filter_three_prime_ends
+
+  Usage       : all_parameters_for_filter_three_prime_ends();
+  Purpose     : Get all parameters for filter_three_prime_ends stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_filter_three_prime_ends {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    my $component = 0;
+    foreach my $chunk ( @{$chunks} ) {
+        $component++;
+        my $merge_three_prime_ends_output_file =
+          $self->get_and_check_output_file( 'merge_three_prime_ends',
+            $component );
+        push @all_parameters, [ $chunk, $merge_three_prime_ends_output_file ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_filter_three_prime_ends
+
+  Usage       : run_filter_three_prime_ends();
+  Purpose     : Run function for filter_three_prime_ends stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_filter_three_prime_ends {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $merge_three_prime_ends_output_file ) = @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($merge_three_prime_ends_output_file);
+
+    my %chunk_regions;
+
+    # Filter 3' ends for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_regions = filter_three_prime_ends(
+            {
+                analysis => $self->analysis,
+                seq_name => $seq->name,
+                regions  => $regions->{ $seq->name },
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_choose_three_prime_end
+
+  Usage       : all_parameters_for_choose_three_prime_end();
+  Purpose     : Get all parameters for choose_three_prime_end stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_choose_three_prime_end {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    my $component = 0;
+    foreach my $chunk ( @{$chunks} ) {
+        $component++;
+        my $filter_three_prime_ends_output_file =
+          $self->get_and_check_output_file( 'filter_three_prime_ends',
+            $component );
+        push @all_parameters, [ $chunk, $filter_three_prime_ends_output_file ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_choose_three_prime_end
+
+  Usage       : run_choose_three_prime_end();
+  Purpose     : Run function for choose_three_prime_end stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_choose_three_prime_end {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $filter_three_prime_ends_output_file ) = @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($filter_three_prime_ends_output_file);
+
+    my %chunk_regions;
+
+    # Choose 3' ends for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_regions = choose_three_prime_end(
+            {
+                seq_name => $seq->name,
+                regions  => $regions->{ $seq->name },
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_count_reads
+
+  Usage       : all_parameters_for_count_reads();
+  Purpose     : Get all parameters for count_reads stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_count_reads {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+        my @tags      = $self->analysis->list_all_tags_by_bam_file($bam_file);
+        my $component = 0;
+        foreach my $chunk ( @{$chunks} ) {
+            $component++;
+            my $choose_three_prime_end_output_file =
+              $self->get_and_check_output_file( 'choose_three_prime_end',
+                $component );
+            push @all_parameters,
+              [ $chunk, $bam_file, $choose_three_prime_end_output_file, @tags ];
+        }
+    }
+
+    return @all_parameters;
+}
+
+=method run_count_reads
+
+  Usage       : run_count_reads();
+  Purpose     : Run function for count_reads stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_count_reads {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, $bam_file, $choose_three_prime_end_output_file, @tags ) =
+      @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($choose_three_prime_end_output_file);
+
+    my %chunk_regions;
+
+    # Count reads for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+        my $seq_regions = count_reads(
+            {
+                bam_file           => $bam_file,
+                mismatch_threshold => $self->analysis->mismatch_threshold,
+                seq_name           => $seq->name,
+                regions            => $regions->{ $seq->name },
+                tags               => \@tags,
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_merge_read_counts
+
+  Usage       : all_parameters_for_merge_read_counts();
+  Purpose     : Get all parameters for merge_read_counts stage
+  Returns     : Array of arrayrefs
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_merge_read_counts {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    # Work out which count_reads stage files need to be merged
+    foreach my $merge_chunk ( @{$chunks} ) {
+        my %output_file_for;
+        my $component = 0;
+        foreach my $bam_file ( $self->analysis->list_all_bam_files() ) {
+            foreach my $run_chunk ( @{$chunks} ) {
+                $component++;
+                if ( refaddr($merge_chunk) == refaddr($run_chunk) ) {
+                    my $output_file =
+                      $self->get_and_check_output_file( 'count_reads',
+                        $component );
+                    $output_file_for{$bam_file} = $output_file;
+                }
+            }
+        }
+        push @all_parameters, [ $merge_chunk, %output_file_for ];
+    }
+
+    return @all_parameters;
+}
+
+=method run_merge_read_counts
+
+  Usage       : run_merge_read_counts();
+  Purpose     : Run function for merge_read_counts stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_merge_read_counts {
+    my ( $self, $job ) = @_;
+
+    my ( $chunk, %output_file_for ) = @{ $job->parameters };
+
+    # Load all regions
+    my %hash_of_lists_of_regions;
+    foreach my $bam_file ( keys %output_file_for ) {
+        my $regions = LoadFile( $output_file_for{$bam_file} );
+        $hash_of_lists_of_regions{$bam_file} = $regions;
+    }
+
+    my %chunk_regions;
+
+    # Merge read counts for each sequence of a chunk separately
+    foreach my $seq ( @{$chunk} ) {
+
+        # Hash keyed by BAM file
+        my %regions =
+          map { $_ => $hash_of_lists_of_regions{$_}->{ $seq->name } }
+          keys %hash_of_lists_of_regions;
+        my $seq_regions = merge_read_counts(
+            {
+                seq_name => $seq->name,
+                regions  => \%regions,
+                samples  => $self->analysis->get_all_samples(),
+            }
+        );
+        %chunk_regions =
+          %{ $self->hash_merge->merge( \%chunk_regions, $seq_regions ) };
+    }
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, \%chunk_regions );
+
+    return;
+}
+
+=method all_parameters_for_run_deseq
+
+  Usage       : all_parameters_for_run_deseq();
+  Purpose     : Get all parameters for run_deseq stage
+  Returns     : Arrayref
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_run_deseq {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $chunks = $self->analysis->get_all_chunks();
+
+    my @merge_read_counts_output_files;
+    my $component = 0;
+    foreach my $chunk ( @{$chunks} ) {
+        $component++;
+        push @merge_read_counts_output_files,
+          $self->get_and_check_output_file( 'merge_read_counts', $component );
+    }
+    push @all_parameters, \@merge_read_counts_output_files;
+
+    return @all_parameters;
+}
+
+=method run_run_deseq
+
+  Usage       : run_run_deseq();
+  Purpose     : Run function for run_deseq stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_run_deseq {
+    my ( $self, $job ) = @_;
+
+    my (@merge_read_counts_output_files) = @{ $job->parameters };
+
+    # Join regions
+    my %regions;
+    foreach my $output_file (@merge_read_counts_output_files) {
+        %regions =
+          %{ $self->hash_merge->merge( \%regions, LoadFile($output_file) ) };
+    }
+
+    my $regions_ref = run_deseq(
+        {
+            dir          => $job->base_filename,
+            regions      => \%regions,
+            samples      => $self->analysis->get_all_samples(),
+            r_binary     => $self->analysis->r_binary,
+            deseq_script => $self->analysis->deseq_script,
+        }
+    );
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, $regions_ref );
+
+    return;
+}
+
+=method all_parameters_for_add_gene_annotation
+
+  Usage       : all_parameters_for_add_gene_annotation();
+  Purpose     : Get all parameters for add_gene_annotation stage
+  Returns     : Arrayref
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_add_gene_annotation {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $run_deseq_output_file =
+      $self->get_and_check_output_file( 'run_deseq', 1 );
+
+    push @all_parameters, [$run_deseq_output_file];
+
+    return @all_parameters;
+}
+
+=method run_add_gene_annotation
+
+  Usage       : run_add_gene_annotation();
+  Purpose     : Run function for add_gene_annotation stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_add_gene_annotation {
+    my ( $self, $job ) = @_;
+
+    my ($run_deseq_output_file) = @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($run_deseq_output_file);
+
+    # Annotate 3' ends with genes
+    # Could split regions by chunk if slow
+    my $gene_finder = DETCT::GeneFinder->new(
+        { slice_adaptor => $self->analysis->slice_adaptor, } );
+    my $annotated_regions_ref = $gene_finder->add_gene_annotation($regions);
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, $annotated_regions_ref );
+
+    return;
+}
+
+=method all_parameters_for_dump_as_table
+
+  Usage       : all_parameters_for_dump_as_table();
+  Purpose     : Get all parameters for dump_as_table stage
+  Returns     : Arrayref
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub all_parameters_for_dump_as_table {
+    my ($self) = @_;
+
+    my @all_parameters;
+
+    my $add_gene_annotation_output_file =
+      $self->get_and_check_output_file( 'add_gene_annotation', 1 );
+
+    push @all_parameters, [$add_gene_annotation_output_file];
+
+    return @all_parameters;
+}
+
+=method run_dump_as_table
+
+  Usage       : run_dump_as_table();
+  Purpose     : Run function for dump_as_table stage
+  Returns     : undef
+  Parameters  : DETCT::Pipeline::Job
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub run_dump_as_table {
+    my ( $self, $job ) = @_;
+
+    my ($add_gene_annotation_output_file) = @{ $job->parameters };
+
+    # Get regions
+    my $regions = LoadFile($add_gene_annotation_output_file);
+
+    DETCT::Misc::Output::dump_as_table(
+        {
+            analysis => $self->analysis,
+            dir      => $job->base_filename,
+            regions  => $regions,
+        }
+    );
+
+    my $output_file = $job->base_filename . '.out';
+
+    DumpFile( $output_file, 1 );
+
+    return;
+}
+
+1;
diff --git a/lib/DETCT/Sample.pm b/lib/DETCT/Sample.pm
new file mode 100644
index 0000000..042a235
--- /dev/null
+++ b/lib/DETCT/Sample.pm
@@ -0,0 +1,367 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Sample;
+## use critic
+
+# ABSTRACT: Object representing a sample
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-19
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private name        => my %name;           # e.g. zmp_ph1_1m
+private description => my %description;    # e.g. ZMP phenotype 1.1 mutant
+private condition   => my %condition;      # e.g. mutant
+private group       => my %group;          # e.g. 1
+private tag         => my %tag;            # e.g. NNNNBGAGGC
+private bam_file    => my %bam_file;       # e.g. 8295_6#1.bam
+
+# Constants
+Readonly our $MAX_NAME_LENGTH      => 128;
+Readonly our $MAX_CONDITION_LENGTH => 128;
+Readonly our $MAX_GROUP_LENGTH     => 128;
+
+=method new
+
+  Usage       : my $sample = DETCT::Sample->new( {
+                    name      => 'zmp_ph1_1m',
+                    condition => 'mutant',
+                    group     => '1',
+                    tag       => 'NNNNBGAGGC',
+                    bam_file  => '8295_6#1.bam',
+                } );
+  Purpose     : Constructor for sample objects
+  Returns     : DETCT::Sample
+  Parameters  : Hashref {
+                    name        => String,
+                    description => String or undef,
+                    condition   => String,
+                    group       => String or undef,
+                    tag         => String,
+                    bam_file    => String,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_name( $arg_ref->{name} );
+    $self->set_description( $arg_ref->{description} );
+    $self->set_condition( $arg_ref->{condition} );
+    $self->set_group( $arg_ref->{group} );
+    $self->set_tag( $arg_ref->{tag} );
+    $self->set_bam_file( $arg_ref->{bam_file} );
+    return $self;
+}
+
+=method name
+
+  Usage       : my $name = $sample->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "zmp_ph1_1m")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $sample->set_name('zmp_ph1_1m');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name is missing
+#               If name is invalid (i.e. not alphanumeric)
+#               If name is empty
+#               If name > $MAX_NAME_LENGTH characters
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+
+    confess 'No name specified'      if !defined $name;
+    confess 'Empty name specified'   if !length $name;
+    confess 'Invalid name specified' if $name !~ m/\A [\w.-]+ \z/xms;
+    confess "Name ($name) longer than $MAX_NAME_LENGTH characters"
+      if length $name > $MAX_NAME_LENGTH;
+
+    return $name;
+}
+
+=method description
+
+  Usage       : my $description = $sample->description;
+  Purpose     : Getter for description attribute
+  Returns     : String (e.g. "ZMP phenotype 1.1 mutant")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub description {
+    my ($self) = @_;
+    return $description{ id $self};
+}
+
+=method set_description
+
+  Usage       : $sample->set_description('ZMP phenotype 1.1 mutant');
+  Purpose     : Setter for description attribute
+  Returns     : undef
+  Parameters  : String (the description)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_description {
+    my ( $self, $arg ) = @_;
+    $description{ id $self} = $arg;
+    return;
+}
+
+=method condition
+
+  Usage       : my $condition = $sample->condition;
+  Purpose     : Getter for condition attribute
+  Returns     : String (e.g. "mutant")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub condition {
+    my ($self) = @_;
+    return $condition{ id $self};
+}
+
+=method set_condition
+
+  Usage       : $sample->set_condition('mutant');
+  Purpose     : Setter for condition attribute
+  Returns     : undef
+  Parameters  : String (the condition)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_condition {
+    my ( $self, $arg ) = @_;
+    $condition{ id $self} = _check_condition($arg);
+    return;
+}
+
+# Usage       : $condition = _check_condition($condition);
+# Purpose     : Check for valid condition
+# Returns     : String (the valid condition)
+# Parameters  : String (the condition)
+# Throws      : If condition is missing
+#               If condition is empty
+#               If condition > $MAX_GROUP_LENGTH characters
+# Comments    : None
+sub _check_condition {
+    my ($condition) = @_;
+
+    confess 'No condition specified' if !defined $condition;
+    confess 'Empty condition specified' if !length $condition;
+    confess
+      "Condition ($condition) longer than $MAX_CONDITION_LENGTH characters"
+      if length $condition > $MAX_CONDITION_LENGTH;
+
+    return $condition;
+}
+
+=method group
+
+  Usage       : my $group = $sample->group;
+  Purpose     : Getter for group attribute
+  Returns     : String (e.g. "1")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub group {
+    my ($self) = @_;
+    return $group{ id $self};
+}
+
+=method set_group
+
+  Usage       : $sample->set_group('1');
+  Purpose     : Setter for group attribute
+  Returns     : undef
+  Parameters  : String (the group)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_group {
+    my ( $self, $arg ) = @_;
+    $group{ id $self} = _check_group($arg);
+    return;
+}
+
+# Usage       : $group = _check_group($group);
+# Purpose     : Check for valid group
+# Returns     : String (the valid group)
+# Parameters  : String (the group)
+# Throws      : If group is empty
+#               If group > $MAX_GROUP_LENGTH characters
+# Comments    : None
+sub _check_group {
+    my ($group) = @_;
+
+    confess 'Empty group specified' if defined $group && !length $group;
+    confess "Group ($group) longer than $MAX_GROUP_LENGTH characters"
+      if defined $group && length $group > $MAX_GROUP_LENGTH;
+
+    return $group;
+}
+
+=method tag
+
+  Usage       : my $tag = $sample->tag;
+  Purpose     : Getter for tag attribute
+  Returns     : String (e.g. "NNNNBGAGGC")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub tag {
+    my ($self) = @_;
+    return $tag{ id $self};
+}
+
+=method set_tag
+
+  Usage       : $sample->set_tag('NNNNBGAGGC');
+  Purpose     : Setter for tag attribute
+  Returns     : undef
+  Parameters  : String (the tag)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_tag {
+    my ( $self, $arg ) = @_;
+    $tag{ id $self} = _check_tag($arg);
+    return;
+}
+
+# Usage       : $tag = _check_tag($tag);
+# Purpose     : Check for valid tag
+# Returns     : String (the valid tag)
+# Parameters  : String (the tag)
+# Throws      : If tag is missing or invalid
+# Comments    : None
+sub _check_tag {
+    my ($tag) = @_;
+    return $tag
+      if defined $tag && $tag =~ m/\A [NRYKMSWBDHV]+ [AGCT]+ \z/xms;
+    confess 'No tag specified' if !defined $tag;
+    confess "Invalid tag ($tag) specified";
+}
+
+=method bam_file
+
+  Usage       : my $bam_file = $sample->bam_file;
+  Purpose     : Getter for BAM file attribute
+  Returns     : String (e.g. "8295_6#1.bam")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub bam_file {
+    my ($self) = @_;
+    return $bam_file{ id $self};
+}
+
+=method set_bam_file
+
+  Usage       : $sample->set_bam('8295_6#1.bam');
+  Purpose     : Setter for BAM file attribute
+  Returns     : undef
+  Parameters  : String (the BAM file)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_bam_file {
+    my ( $self, $arg ) = @_;
+    $bam_file{ id $self} = check_bam_file($arg);
+    return;
+}
+
+=method check_bam_file
+
+  Usage       : $bam_file = check_bam_file($bam_file);
+  Purpose     : Check for valid BAM file
+  Returns     : String (the valid BAM file)
+  Parameters  : String (the BAM file)
+  Throws      : If BAM file is missing or not readable
+  Comments    : None
+
+=cut
+
+sub check_bam_file {
+    my ($bam_file) = @_;
+    return $bam_file if defined $bam_file && -r $bam_file;
+    confess 'No BAM file specified' if !defined $bam_file;
+    confess "BAM file ($bam_file) does not exist or cannot be read";
+}
+
+1;
diff --git a/lib/DETCT/Sequence.pm b/lib/DETCT/Sequence.pm
new file mode 100644
index 0000000..ff80064
--- /dev/null
+++ b/lib/DETCT/Sequence.pm
@@ -0,0 +1,161 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Sequence;
+## use critic
+
+# ABSTRACT: Object representing a sequence (a component of a reference sequence)
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-21
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private name => my %name;    # e.g. 1
+private bp   => my %bp;      # e.g. 60348388
+
+# Constants
+Readonly our $MAX_NAME_LENGTH => 128;
+
+=method new
+
+  Usage       : my $sequence = DETCT::Sequence->new( {
+                    name => '1',
+                    bp   => 60_348_388,
+                } );
+  Purpose     : Constructor for sequence objects
+  Returns     : DETCT::Sequence
+  Parameters  : Hashref {
+                    name => String,
+                    bp   => Int,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_name( $arg_ref->{name} );
+    $self->set_bp( $arg_ref->{bp} );
+    return $self;
+}
+
+=method name
+
+  Usage       : my $name = $sequence->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "1")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $sequence->set_name('1');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name is missing
+#               If name is empty
+#               If name > $MAX_NAME_LENGTH characters
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+
+    confess 'No name specified' if !defined $name;
+    confess 'Empty name specified' if !length $name;
+    confess "Name ($name) longer than $MAX_NAME_LENGTH characters"
+      if length $name > $MAX_NAME_LENGTH;
+
+    return $name;
+}
+
+=method bp
+
+  Usage       : my $bp = $sequence->bp;
+  Purpose     : Getter for bp attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub bp {
+    my ($self) = @_;
+    return $bp{ id $self};
+}
+
+=method set_bp
+
+  Usage       : $sequence->set_bp(40352744);
+  Purpose     : Setter for bp attribute
+  Returns     : undef
+  Parameters  : +ve Int (bp)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_bp {
+    my ( $self, $arg ) = @_;
+    $bp{ id $self} = _check_bp($arg);
+    return;
+}
+
+# Usage       : $bp = _check_bp($bp);
+# Purpose     : Check for valid bp
+# Returns     : +ve Int (valid bp)
+# Parameters  : +ve Int (bp)
+# Throws      : If bp is missing or not a positive integer
+# Comments    : None
+sub _check_bp {
+    my ($bp) = @_;
+    return $bp
+      if defined $bp && $bp =~ m/\A \d+ \z/xms;
+    confess 'No bp specified' if !defined $bp;
+    confess "Invalid bp ($bp) specified";
+}
+
+1;
diff --git a/lib/DETCT/Transcript.pm b/lib/DETCT/Transcript.pm
new file mode 100644
index 0000000..b7a406e
--- /dev/null
+++ b/lib/DETCT/Transcript.pm
@@ -0,0 +1,516 @@
+## no critic (RequireUseStrict, RequireUseWarnings, RequireTidyCode)
+package DETCT::Transcript;
+## use critic
+
+# ABSTRACT: Object representing a transcript
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-28
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Readonly;
+use Class::InsideOut qw( private register id );
+
+=head1 SYNOPSIS
+
+    # Brief code examples
+
+=cut
+
+# Attributes:
+private stable_id   => my %stable_id;      # e.g. ENSDART00000133571
+private name        => my %name;           # e.g. cxc64-001
+private description => my %description;    # e.g. CXC chemokine 64...
+private biotype     => my %biotype;        # e.g. protein_coding
+private seq_name    => my %seq_name;       # e.g. 5
+private start       => my %start;          # e.g. 40352744
+private end         => my %end;            # e.g. 40354399
+private strand      => my %strand;         # e.g. 1
+private gene        => my %gene;           # DETCT::Gene
+
+# Constants
+Readonly our $MAX_NAME_LENGTH => 128;
+
+=method new
+
+  Usage       : my $transcript = DETCT::Transcript->new( {
+                    stable_id => 'ENSDART00000133571',
+                    biotype   => 'protein_coding',
+                    seq_name  => '5',
+                    start     => 40352744,
+                    end       => 40354399,
+                    strand    => 1,
+                } );
+  Purpose     : Constructor for transcript objects
+  Returns     : DETCT::Transcript
+  Parameters  : Hashref {
+                    stable_id   => String,
+                    name        => String or undef,
+                    description => String or undef,
+                    biotype     => String,
+                    seq_name    => String,
+                    start       => +ve Int,
+                    end         => +ve Int,
+                    strand      => Int (1 or -1),
+                    gene        => DETCT::Gene,
+                }
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub new {
+    my ( $class, $arg_ref ) = @_;
+    my $self = register($class);
+    $self->set_stable_id( $arg_ref->{stable_id} );
+    $self->set_name( $arg_ref->{name} );
+    $self->set_description( $arg_ref->{description} );
+    $self->set_biotype( $arg_ref->{biotype} );
+    $self->set_seq_name( $arg_ref->{seq_name} );
+    $self->set_start( $arg_ref->{start} );
+    $self->set_end( $arg_ref->{end} );
+    $self->set_strand( $arg_ref->{strand} );
+    $self->set_gene( $arg_ref->{gene} );
+    return $self;
+}
+
+=method stable_id
+
+  Usage       : my $stable_id = $transcript->stable_id;
+  Purpose     : Getter for stable id attribute
+  Returns     : String (e.g. "ENSDART00000133571")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub stable_id {
+    my ($self) = @_;
+    return $stable_id{ id $self};
+}
+
+=method set_stable_id
+
+  Usage       : $transcript->set_stable_id('ENSDART00000133571');
+  Purpose     : Setter for stable id attribute
+  Returns     : undef
+  Parameters  : String (the stable id)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_stable_id {
+    my ( $self, $arg ) = @_;
+    $stable_id{ id $self} = check_stable_id($arg);
+    return;
+}
+
+=method check_stable_id
+
+  Usage       : $stable_id = check_stable_id($stable_id);
+  Purpose     : Check for valid stable id
+  Returns     : String (the valid stable id)
+  Parameters  : String (the stable id)
+  Throws      : If stable id is missing or invalid
+  Comments    : None
+
+=cut
+
+sub check_stable_id {
+    my ($stable_id) = @_;
+    return $stable_id
+      if defined $stable_id && $stable_id =~ m/\A [[:upper:]]+ \d{11} \z/xms;
+    confess 'No stable id specified' if !defined $stable_id;
+    confess "Invalid stable id ($stable_id) specified";
+}
+
+=method name
+
+  Usage       : my $name = $transcript->name;
+  Purpose     : Getter for name attribute
+  Returns     : String (e.g. "cxc64-001")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub name {
+    my ($self) = @_;
+    return $name{ id $self};
+}
+
+=method set_name
+
+  Usage       : $transcript->set_name('cxc64-001');
+  Purpose     : Setter for name attribute
+  Returns     : undef
+  Parameters  : String (the name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_name {
+    my ( $self, $arg ) = @_;
+    $name{ id $self} = _check_name($arg);
+    return;
+}
+
+# Usage       : $name = _check_name($name);
+# Purpose     : Check for valid name
+# Returns     : String (the valid name)
+# Parameters  : String (the name)
+# Throws      : If name > $MAX_NAME_LENGTH characters
+# Comments    : None
+sub _check_name {
+    my ($name) = @_;
+    return $name
+      if !defined $name
+      || ( length $name > 0 && length $name <= $MAX_NAME_LENGTH );
+    confess 'Name is empty' if !length $name;
+    confess "Name ($name) longer than $MAX_NAME_LENGTH characters";
+}
+
+=method description
+
+  Usage       : my $description = $transcript->description;
+  Purpose     : Getter for description attribute
+  Returns     : String (e.g. "CXC chemokine 64")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub description {
+    my ($self) = @_;
+    return $description{ id $self};
+}
+
+=method set_description
+
+  Usage       : $transcript->set_description('CXC chemokine 64');
+  Purpose     : Setter for description attribute
+  Returns     : undef
+  Parameters  : String (the description)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_description {
+    my ( $self, $arg ) = @_;
+    $description{ id $self} = $arg;
+    return;
+}
+
+=method biotype
+
+  Usage       : my $biotype = $transcript->biotype;
+  Purpose     : Getter for biotype attribute
+  Returns     : String (e.g. "protein_coding")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub biotype {
+    my ($self) = @_;
+    return $biotype{ id $self};
+}
+
+=method set_biotype
+
+  Usage       : $transcript->set_biotype('protein_coding');
+  Purpose     : Setter for biotype attribute
+  Returns     : undef
+  Parameters  : String (the biotype)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_biotype {
+    my ( $self, $arg ) = @_;
+    $biotype{ id $self} = check_biotype($arg);
+    return;
+}
+
+=method check_biotype
+
+  Usage       : $biotype = check_biotype($biotype);
+  Purpose     : Check for valid biotype
+  Returns     : String (the valid biotype)
+  Parameters  : String (the biotype)
+  Throws      : If biotype is missing or invalid (i.e. not alphanumeric)
+  Comments    : None
+
+=cut
+
+sub check_biotype {
+    my ($biotype) = @_;
+    return $biotype if defined $biotype && $biotype =~ m/\A \w+ \z/xms;
+    confess 'No biotype specified' if !defined $biotype;
+    confess "Invalid biotype ($biotype) specified";
+}
+
+=method seq_name
+
+  Usage       : my $seq_name = $transcript->seq_name;
+  Purpose     : Getter for sequence name attribute
+  Returns     : String (e.g. "5")
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub seq_name {
+    my ($self) = @_;
+    return $seq_name{ id $self};
+}
+
+=method set_seq_name
+
+  Usage       : $transcript->set_seq_name('5');
+  Purpose     : Setter for sequence name attribute
+  Returns     : undef
+  Parameters  : String (the sequence name)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_seq_name {
+    my ( $self, $arg ) = @_;
+    $seq_name{ id $self} = check_seq_name($arg);
+    return;
+}
+
+=method check_seq_name
+
+  Usage       : $seq_name = check_seq_name($seq_name);
+  Purpose     : Check for valid sequence name
+  Returns     : String (the valid sequence name)
+  Parameters  : String (the sequence name)
+  Throws      : If sequence name is missing or invalid (i.e. not alphanumeric)
+  Comments    : None
+
+=cut
+
+sub check_seq_name {
+    my ($seq_name) = @_;
+    return $seq_name if defined $seq_name && $seq_name =~ m/\A \w+ \z/xms;
+    confess 'No sequence name specified' if !defined $seq_name;
+    confess "Invalid sequence name ($seq_name) specified";
+}
+
+=method start
+
+  Usage       : my $start = $transcript->start;
+  Purpose     : Getter for start attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub start {
+    my ($self) = @_;
+    return $start{ id $self};
+}
+
+=method set_start
+
+  Usage       : $transcript->set_start(40352744);
+  Purpose     : Setter for start attribute
+  Returns     : undef
+  Parameters  : +ve Int (the start)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_start {
+    my ( $self, $arg ) = @_;
+    $start{ id $self} = check_start($arg);
+    return;
+}
+
+=method check_start
+
+  Usage       : $start = check_start($start);
+  Purpose     : Check for valid start
+  Returns     : +ve Int (the valid start)
+  Parameters  : +ve Int (the start)
+  Throws      : If start is missing or not a positive integer
+  Comments    : None
+
+=cut
+
+sub check_start {
+    my ($start) = @_;
+    return $start if defined $start && $start =~ m/\A \d+ \z/xms;
+    confess 'No start specified' if !defined $start;
+    confess "Invalid start ($start) specified";
+}
+
+=method end
+
+  Usage       : my $end = $transcript->end;
+  Purpose     : Getter for end attribute
+  Returns     : +ve Int
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub end {
+    my ($self) = @_;
+    return $end{ id $self};
+}
+
+=method set_end
+
+  Usage       : $transcript->set_end(40352744);
+  Purpose     : Setter for end attribute
+  Returns     : undef
+  Parameters  : +ve Int (the end)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_end {
+    my ( $self, $arg ) = @_;
+    $end{ id $self} = check_end($arg);
+    return;
+}
+
+=method check_end
+
+  Usage       : $end = check_end($end);
+  Purpose     : Check for valid end
+  Returns     : +ve Int (the valid end)
+  Parameters  : +ve Int (the end)
+  Throws      : If end is missing or not a positive integer
+  Comments    : None
+
+=cut
+
+sub check_end {
+    my ($end) = @_;
+    return $end if defined $end && $end =~ m/\A \d+ \z/xms;
+    confess 'No end specified' if !defined $end;
+    confess "Invalid end ($end) specified";
+}
+
+=method strand
+
+  Usage       : my $strand = $transcript->strand;
+  Purpose     : Getter for strand attribute
+  Returns     : Int (1 or -1)
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub strand {
+    my ($self) = @_;
+    return $strand{ id $self};
+}
+
+=method set_strand
+
+  Usage       : $transcript->set_strand(1);
+  Purpose     : Setter for strand attribute
+  Returns     : undef
+  Parameters  : Int (the strand)
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_strand {
+    my ( $self, $arg ) = @_;
+    $strand{ id $self} = _check_strand($arg);
+    return;
+}
+
+# Usage       : $strand = _check_strand($strand);
+# Purpose     : Check for valid strand
+# Returns     : Int (1 or -1) (the valid strand)
+# Parameters  : Int (1 or -1) (the strand)
+# Throws      : If strand is missing or not 1 or -1
+# Comments    : None
+sub _check_strand {
+    my ($strand) = @_;
+    return $strand if defined $strand && $strand =~ m/\A \-? 1 \z/xms;
+    confess 'No strand specified' if !defined $strand;
+    confess "Invalid strand ($strand) specified";
+}
+
+=method gene
+
+  Usage       : my $gene = $transcript->gene;
+  Purpose     : Getter for gene attribute
+  Returns     : DETCT::Gene
+  Parameters  : None
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub gene {
+    my ($self) = @_;
+    return $gene{ id $self};
+}
+
+=method set_gene
+
+  Usage       : $transcript->set_gene($gene);
+  Purpose     : Setter for gene attribute
+  Returns     : undef
+  Parameters  : DETCT::Gene
+  Throws      : No exceptions
+  Comments    : None
+
+=cut
+
+sub set_gene {
+    my ( $self, $arg ) = @_;
+    $gene{ id $self} = _check_gene($arg);
+    return;
+}
+
+# Usage       : $gene = _check_gene($gene);
+# Purpose     : Check for valid gene
+# Returns     : DETCT::Gene
+# Parameters  : DETCT::Gene
+# Throws      : If gene is invalid (i.e. not a DETCT::Gene object)
+# Comments    : None
+sub _check_gene {
+    my ($gene) = @_;
+    confess 'Class of gene (', ref $gene, ') not DETCT::Gene'
+      if defined $gene && !$gene->isa('DETCT::Gene');
+    return $gene;
+}
+
+1;
diff --git a/perlcritic.rc b/perlcritic.rc
new file mode 100644
index 0000000..30963fa
--- /dev/null
+++ b/perlcritic.rc
@@ -0,0 +1,12 @@
+severity = 1
+exclude = RequirePodSections RequireVersionVar
+
+[Documentation::PodSpelling]
+stop_words_file = pod-stop-words.txt
+
+[Perl::Critic::Policy::BuiltinFunctions::ProhibitStringyEval]
+allow_includes = 1
+
+[InputOutput::RequireCheckedSyscalls]
+functions = :builtins
+exclude_functions = print sleep
diff --git a/pod-stop-words.txt b/pod-stop-words.txt
new file mode 100644
index 0000000..e69de29
diff --git a/script/detag_fastq.pl b/script/detag_fastq.pl
new file mode 100644
index 0000000..cd53089
--- /dev/null
+++ b/script/detag_fastq.pl
@@ -0,0 +1,163 @@
+#!/usr/bin/env perl
+
+# PODNAME: detag_fastq.pl
+# ABSTRACT: Extract tags from transcript counting FASTQ files and process files
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-12-15
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Getopt::Long;
+use Pod::Usage;
+use DETCT::Misc::Tag;
+
+=head1 DESCRIPTION
+
+
+=head1 EXAMPLES
+
+
+=cut
+
+# Default options
+## no critic (ProhibitMagicNumbers)
+my $fastq_read1_input;
+my $fastq_read2_input;
+my $fastq_output_prefix;
+my $pre_detag_trim_length = 54;
+my $polyt_trim_length     = 14;
+my $polyt_min_length      = 10;
+my @read_tags;
+my $no_pair_suffix = 0;
+my ( $help, $man );
+## use critic
+
+# Get and check command line options
+get_and_check_options();
+
+DETCT::Misc::Tag::detag_trim_fastq(
+    {
+        fastq_read1_input     => $fastq_read1_input,
+        fastq_read2_input     => $fastq_read2_input,
+        fastq_output_prefix   => $fastq_output_prefix,
+        pre_detag_trim_length => $pre_detag_trim_length,
+        polyt_trim_length     => $polyt_trim_length,
+        polyt_min_length      => $polyt_min_length,
+        read_tags             => \@read_tags,
+        no_pair_suffix        => $no_pair_suffix,
+    }
+);
+
+# Get and check command line options
+sub get_and_check_options {
+
+    # Get options
+    GetOptions(
+        'fastq_read1_input=s'     => \$fastq_read1_input,
+        'fastq_read2_input=s'     => \$fastq_read2_input,
+        'fastq_output_prefix=s'   => \$fastq_output_prefix,
+        'pre_detag_trim_length=i' => \$pre_detag_trim_length,
+        'polyt_trim_length=i'     => \$polyt_trim_length,
+        'polyt_min_length=i'      => \$polyt_min_length,
+        'read_tags=s@{1,}'        => \@read_tags,
+        'no_pair_suffix'          => \$no_pair_suffix,
+        'help'                    => \$help,
+        'man'                     => \$man,
+    ) or pod2usage(2);
+
+    # Documentation
+    if ($help) {
+        pod2usage(1);
+    }
+    elsif ($man) {
+        pod2usage( -verbose => 2 );
+    }
+
+    # Check options
+    if ( !$fastq_read1_input ) {
+        pod2usage("--fastq_read1_input must be specified\n");
+    }
+    if ( !$fastq_read2_input ) {
+        pod2usage("--fastq_read2_input must be specified\n");
+    }
+    if ( !$fastq_output_prefix ) {
+        pod2usage("--fastq_output_prefix must be specified\n");
+    }
+    if ( !@read_tags ) {
+        pod2usage("--read_tags must be specified\n");
+    }
+
+    return;
+}
+
+=head1 USAGE
+
+    detag_fastq.pl
+        [--fastq_read1_input file]
+        [--fastq_read2_input file]
+        [--fastq_output_prefix prefix]
+        [--pre_detag_trim_length int]
+        [--polyt_trim_length int]
+        [--polyt_min_length int]
+        [--read_tags tags...]
+        [--no_pair_suffix]
+        [--help]
+        [--man]
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--fastq_read1_input FILE>
+
+Input FASTQ file for read 1.
+
+=item B<--fastq_read2_input FILE>
+
+Input FASTQ file for read 2.
+
+=item B<--fastq_output_prefix FILE>
+
+Prefix for output FASTQ files.
+
+=item B<--pre_detag_trim_length INT>
+
+Length to trim reads to before detagging.
+
+=item B<--polyt_trim_length INT>
+
+Length of (largely) polyT to be trimmed.
+
+=item B<--polyt_min_length INT>
+
+Minimum number of consecutive Ts in length of polyT.
+
+=item B<--read_tags TAGS>
+
+Read tags.
+
+=item B<--no_pair_suffix>
+
+Input FASTQ file don't have pair suffixes.
+
+=item B<--help>
+
+Print a brief help message and exit.
+
+=item B<--man>
+
+Print this script's manual page and exit.
+
+=back
+
+=cut
diff --git a/script/make_test_fasta.pl b/script/make_test_fasta.pl
new file mode 100644
index 0000000..51f589b
--- /dev/null
+++ b/script/make_test_fasta.pl
@@ -0,0 +1,164 @@
+#!/usr/bin/env perl
+
+# PODNAME: make_test_fasta.pl
+# ABSTRACT: Make transcript counting test file in FASTA format
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-11-12
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 DESCRIPTION
+
+This script generates test transcript counting FASTA files. The number and
+maximum length of chromosomes can be varied.
+
+=head1 EXAMPLES
+
+    # Generate random FASTA file using default values
+    perl script/make_test_fasta.pl > test.fa
+
+    # Generate FASTA file with reproducible chromosomes using default values
+    perl script/make_test_fasta.pl --seed 1 > test.fa
+
+    # Generate FASTA file with 25 chromosomes (each up to 50 Mbp long)
+    perl script/make_test_fasta.pl \
+        --seq_region_count 25 \
+        --seq_region_max_length 50_000_000 \
+        > test.fa
+
+=cut
+
+# Default options
+## no critic (ProhibitMagicNumbers)
+my $seed;
+my $seq_region_count      = 1;
+my $seq_region_max_length = 1_000_000;
+my ( $help, $man );
+## use critic
+
+# Get and check command line options
+get_and_check_options();
+
+# Ensure reproducible chromosome lengths if seed set
+if ( defined $seed ) {
+    srand $seed;
+}
+
+# Make each chromosome of random length
+my %length_of;
+foreach my $seq_region ( 1 .. $seq_region_count ) {
+    my $length = int rand( $seq_region_max_length + 1 );
+    $length_of{$seq_region} = $length;
+}
+
+# Ensure sequences are always random
+srand;
+
+# Generate sequence for each chromosome one by one
+foreach my $seq_region ( 1 .. $seq_region_count ) {
+    printf ">%s\n", $seq_region;
+    my $length_required = $length_of{$seq_region};
+    my $length_printed  = 0;
+    while ($length_required) {
+        ## no critic (ProhibitMagicNumbers)
+        print qw( A G C T a g c t ) [ int rand 8 ];
+        ## use critic
+        $length_required--;
+        $length_printed++;
+
+        # Wrap every 80 bases
+        ## no critic (ProhibitMagicNumbers)
+        if ( !( $length_printed % 80 ) ) {
+            ## use critic
+            print "\n";
+        }
+    }
+
+    # Final new line if haven't just printed one
+    ## no critic (ProhibitMagicNumbers)
+    if ( $length_printed % 80 ) {
+        ## use critic
+        print "\n";
+    }
+}
+
+# Get and check command line options
+sub get_and_check_options {
+
+    # Get options
+    GetOptions(
+        'seed=i'                  => \$seed,
+        'seq_region_count=i'      => \$seq_region_count,
+        'seq_region_max_length=i' => \$seq_region_max_length,
+        'help'                    => \$help,
+        'man'                     => \$man,
+    ) or pod2usage(2);
+
+    # Documentation
+    if ($help) {
+        pod2usage(1);
+    }
+    elsif ($man) {
+        pod2usage( -verbose => 2 );
+    }
+
+    # Check options
+    if ( !$seq_region_count ) {
+        pod2usage("--seq_region_count must be a positive integer\n");
+    }
+    if ( !$seq_region_max_length ) {
+        pod2usage("--seq_region_max_length must be a positive integer\n");
+    }
+
+    return;
+}
+
+=head1 USAGE
+
+    make_test_fasta.pl
+        [--seed seed]
+        [--seq_region_count int]
+        [--seq_region_max_length int]
+        [--help]
+        [--man]
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--seed INT>
+
+Random seed (to get reproducible chromosome lengths).
+
+=item B<--seq_region_count INT>
+
+Number of seq regions (default to 1).
+
+=item B<--seq_region_max_length INT>
+
+Maximum length of each seq region (defaults to 1,000,000 bp).
+
+=item B<--help>
+
+Print a brief help message and exit.
+
+=item B<--man>
+
+Print this script's manual page and exit.
+
+=back
+
+=cut
diff --git a/script/make_test_fastq.pl b/script/make_test_fastq.pl
new file mode 100644
index 0000000..a4ce378
--- /dev/null
+++ b/script/make_test_fastq.pl
@@ -0,0 +1,276 @@
+#!/usr/bin/env perl
+
+# PODNAME: make_test_fastq.pl
+# ABSTRACT: Make transcript counting test files in FASTQ format
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2013-01-08
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 DESCRIPTION
+
+This script generates test transcript counting FASTQ files.
+
+=head1 EXAMPLES
+
+    # Generate random FASTQ files using default values
+    perl script/make_test_fastq.pl --read_tags NNNNCTACCA
+
+    # Generate FASTQ files with reproducible reads using default values
+    perl script/make_test_fastq.pl --seed 1 
+
+    # Generate random FASTQ files with 1000 read pairs and 54 bp reads
+    perl script/make_test_fastq.pl \
+        --read_tags NNNNCTACCA \
+        --read_pair_count 1000 \
+        --read_length 54
+
+=cut
+
+# Default options
+## no critic (ProhibitMagicNumbers)
+my $seed;
+my $output_prefix   = 'test';
+my $read_pair_count = 100;
+my @read_tags;
+my $read_length  = 75;
+my $polyt_length = 14;
+my ( $help, $man );
+## use critic
+
+# Get and check command line options
+get_and_check_options();
+
+# Assume all tags are same length
+my $tag_length = length $read_tags[0];
+
+# Add dummy tag for reads that don't match a real tag
+push @read_tags, q{X} x $tag_length;
+
+# Ensure reproducible FASTQ files if seed set
+if ( defined $seed ) {
+    srand $seed;
+}
+
+# Generate start of each read name
+## no critic (ProhibitMagicNumbers)
+my $read_name_base = 'HS';
+$read_name_base .= ( int rand 50 ) + 1;        # Instrument name
+$read_name_base .= q{_};
+$read_name_base .= ( int rand 20_000 ) + 1;    # Run
+$read_name_base .= q{:};
+$read_name_base .= ( int rand 8 ) + 1;         # Flowcell lane
+$read_name_base .= q{:};
+## use critic
+
+my %tag_count;
+
+## no critic (RequireBriefOpen)
+open my $fh1, '>', $output_prefix . '_1.fastq';
+open my $fh2, '>', $output_prefix . '_2.fastq';
+## use critic
+foreach ( 1 .. $read_pair_count ) {
+    my $read_name = get_read_name($read_name_base);
+
+    my $tag = @read_tags[ int rand scalar @read_tags ];    # Random tag
+
+    # 20% of read 1s have no polyT
+    my $has_polyt = int rand 5 ? 1 : 0;    ## no critic (ProhibitMagicNumbers)
+
+    print {$fh1} q{@}, $read_name, '/1', "\n";
+    print {$fh1} get_read1_seq( $read_length, $tag, $has_polyt ), "\n";
+    print {$fh1} "+\n";
+    print {$fh1} q{~} x $read_length, "\n";
+    print {$fh2} q{@}, $read_name, '/2', "\n";
+    print {$fh2} get_read2_seq($read_length), "\n";
+    print {$fh2} "+\n";
+    print {$fh2} q{~} x $read_length, "\n";
+
+    if ( !$has_polyt ) {
+        $tag = $read_tags[-1];             # Dummy tag
+    }
+    $tag_count{$tag}++;
+}
+close $fh1;
+close $fh2;
+
+# Display tag counts
+foreach my $read_tag (@read_tags) {
+    print $output_prefix, "\t", $read_tag, ":\t",
+      ( $tag_count{$read_tag} || 0 ), "\n";
+}
+
+# Construct read name
+sub get_read_name {
+    my ( $read_name, ) = @_;
+
+    ## no critic (ProhibitMagicNumbers)
+    $read_name .= ( int rand 3_000 ) + 1;      # Tile number
+    $read_name .= q{:};
+    $read_name .= ( int rand 20_000 ) + 1;     # Cluster x coordinate
+    $read_name .= q{:};
+    $read_name .= ( int rand 200_000 ) + 1;    # Cluster y coordinate
+    ## use critic
+
+    return $read_name;
+}
+
+# Get read 1 sequence (just random but with tag)
+sub get_read1_seq {
+    my ( $read_len, $tag, $has_polyt ) = @_;
+
+    my $is_dummy_tag = $tag =~ m/X/xms ? 1 : 0;    # If tag is X then no tag
+
+    # Replace IUPAC codes in tag with random bases
+    $tag =~ s/ N / qw( A G C T )[ int rand 4 ] /xmsge;
+    $tag =~ s/ B / qw( G C T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ D / qw( A G T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ H / qw( A C T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ V / qw( A G C   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ R / qw( A G     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ Y / qw( C T     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ K / qw( G T     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ M / qw( A C     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ S / qw( G C     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ W / qw( A T     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ X / qw( A G C T )[ int rand 4 ] /xmsge;    # Not actually IUPAC
+
+    # Make the last two bases be Ns so should never match a real tag
+    if ( $is_dummy_tag && $has_polyt ) {                  # No need if not polyT
+        substr $tag, -2, 2, 'NN';    ## no critic (ProhibitMagicNumbers)
+    }
+
+    # 20% of reads have a single mismatch somewhere in the tag
+    if ( int rand 5 ) {              ## no critic (ProhibitMagicNumbers)
+        my $mismatch_base = int rand length $tag;
+        my $base = substr $tag, $mismatch_base, 1;
+        $base =~ tr/AGCT/TCGA/;
+        substr $tag, $mismatch_base, 1, $base;
+    }
+
+    # Read begins with tag then polyT (or add polyA if doesn't have polyT)
+    my $seq = $tag;
+    $seq .= $has_polyt ? q{T} x $polyt_length : q{A} x $polyt_length;
+    $read_len -= length $seq;
+
+    # Rest of read is random
+    ## no critic (ProhibitMagicNumbers)
+    $seq .= join q{}, map { qw( A G C T ) [ int rand 4 ] } 1 .. $read_len;
+    ## use critic
+
+    return $seq;
+}
+
+# Get read 2 sequence (just random)
+sub get_read2_seq {
+    my ($read_len) = @_;
+
+    ## no critic (ProhibitMagicNumbers)
+    return join q{}, map { qw( A G C T ) [ int rand 4 ] } 1 .. $read_len;
+    ## use critic
+}
+
+# Get and check command line options
+sub get_and_check_options {
+
+    # Get options
+    GetOptions(
+        'seed=i'            => \$seed,
+        'output_prefix=s'   => \$output_prefix,
+        'read_pair_count=i' => \$read_pair_count,
+        'read_tags=s@{1,}'  => \@read_tags,
+        'read_length=i'     => \$read_length,
+        'polyt_length=i'    => \$polyt_length,
+        'help'              => \$help,
+        'man'               => \$man,
+    ) or pod2usage(2);
+
+    # Documentation
+    if ($help) {
+        pod2usage(1);
+    }
+    elsif ($man) {
+        pod2usage( -verbose => 2 );
+    }
+
+    # Check options
+    if ( !$output_prefix ) {
+        pod2usage("--output_prefix must be specified\n");
+    }
+    if ( !$read_pair_count ) {
+        pod2usage("--read_pair_count must be a positive integer\n");
+    }
+    if ( !$read_length ) {
+        pod2usage("--read_length must be a positive integer\n");
+    }
+    if ( !@read_tags ) {
+        pod2usage("--read_tags must be specified\n");
+    }
+
+    return;
+}
+
+=head1 USAGE
+
+    make_test_fastq.pl
+        [--seed seed]
+        [--output_prefix prefix]
+        [--read_pair_count int]
+        [--read_tags tags...]
+        [--read_length int]
+        [--polyt_length int]
+        [--help]
+        [--man]
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--seed INT>
+
+Random seed (to get reproducible chromosome lengths).
+
+=item B<--output_prefix FILE>
+
+Prefix for output FASTQ files.
+
+=item B<--read_pair_count INT>
+
+Number of read pairs aligned to each seq region (defaults to 100).
+
+=item B<--read_tags TAGS>
+
+Read tags.
+
+=item B<--read_length INT>
+
+Length of reads (defaults to 75 bp).
+
+=item B<--polyt_length INT>
+
+Length of polyT in read 1.
+
+=item B<--help>
+
+Print a brief help message and exit.
+
+=item B<--man>
+
+Print this script's manual page and exit.
+
+=back
+
+=cut
diff --git a/script/make_test_sam.pl b/script/make_test_sam.pl
new file mode 100644
index 0000000..e88d2a1
--- /dev/null
+++ b/script/make_test_sam.pl
@@ -0,0 +1,630 @@
+#!/usr/bin/env perl
+
+# PODNAME: make_test_sam.pl
+# ABSTRACT: Make transcript counting test file in SAM format
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-14
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Getopt::Long;
+use Pod::Usage;
+use Readonly;
+
+=head1 DESCRIPTION
+
+This script generates test transcript counting SAM files. The number and maximum
+length of chromosomes can be varied along with the number and length of reads.
+Read tags must be specified.
+
+=head1 EXAMPLES
+
+    # Generate random BAM file using default values
+    perl script/make_test_sam.pl --read_tags NNNNCTACCA \
+        | samtools view -bS - | samtools sort - test
+
+    # Generate BAM file with reproducible chromosomes using default values
+    perl script/make_test_sam.pl --seed 1 --read_tags NNNNCTACCA \
+        | samtools view -bS - | samtools sort - test
+
+    # Generate BAM file with 25 chromosomes (each up to 50 Mbp long), 1000
+    # alignments per chromosome and four 10mer tags
+    perl script/make_test_sam.pl \
+        --seq_region_count 25 \
+        --seq_region_max_length 50_000_000 \
+        --read_pair_count 1000 \
+        --read_tags NNNNCTACCA NNNNAAGTTA NNNNTTAATC NNNNTAGACA \
+        | samtools view -bS - | samtools sort - test
+
+=cut
+
+# Constants from http://samtools.sourceforge.net/SAM1.pdf
+
+# Regexps for checking alignment line mandatory fields
+Readonly our %ALIGNMENT_REGEXP_MANDATORY => (
+    qname => qr/\A [!-?A-~]{1,255} \z/xms,
+    rname => qr/\A [*] | [!-()+-<>-~][!-~]* \z/xms,
+    cigar => qr/\A [*] | (\d+[MIDNSHPX=])+ \z/xms,
+    rnext => qr/\A [*] | = | [!-()+-<>-~][!-~]* \z/xms,
+    seq   => qr/\A [*] | [[:alpha:]=.]+ \z/xms,
+    qual  => qr/\A [!-~]+ \z/xms,
+);
+
+# Ranges for checking alignment line mandatory fields
+Readonly our %ALIGNMENT_RANGE_MANDATORY => (
+    flag  => [ 0,          2**16 - 1 ],
+    pos   => [ 0,          2**29 - 1 ],
+    mapq  => [ 0,          2**8 - 1 ],
+    pnext => [ 0,          2**29 - 1 ],
+    tlen  => [ -2**29 + 1, 2**29 - 1 ],
+);
+
+# Regexps for checking alignment line optional fields
+Readonly our %ALIGNMENT_REGEXP_OPTIONAL => (
+    A => qr/\A [!-~] \z/xms,
+    i => qr/\A [-+]?\d+ \z/xms,
+    f => qr/\A [-+]?\d*[.]?\d+([eE][-+]?\d+)? \z/xms,
+    Z => qr/\A [ !-~]+ \z/xms,
+    H => qr/\A [\dA-F]+ \z/xms,
+    B => qr/\A [cCsSiIf](,[-+]?\d*[.]?\d+([eE][-+]?\d+)?)+ \z/xms,
+);
+
+# Bits of flag field
+Readonly our $FLAG_READ_PAIRED         => 1;
+Readonly our $FLAG_PROPER_PAIR         => 2;
+Readonly our $FLAG_READ_UNMAPPED       => 4;
+Readonly our $FLAG_MATE_UNMAPPED       => 8;
+Readonly our $FLAG_READ_REVERSE_STRAND => 16;
+Readonly our $FLAG_MATE_REVERSE_STRAND => 32;
+Readonly our $FLAG_FIRST_IN_PAIR       => 64;
+Readonly our $FLAG_SECOND_IN_PAIR      => 128;
+Readonly our $FLAG_DUPLICATE           => 1024;
+
+# Chance one read of a pair is unmapped
+Readonly our $CHANCE_UNMAPPED => 0.1;
+
+# Default options
+## no critic (ProhibitMagicNumbers)
+my $seed;
+my $seq_region_count      = 1;
+my $seq_region_max_length = 1_000_000;
+my $read_pair_count       = 100;
+my @read_tags;
+my $read1_length = 30;
+my $read2_length = 54;
+my ( $help, $man );
+## use critic
+
+# Get and check command line options
+get_and_check_options();
+
+# Ensure reproducible chromosome lengths if seed set
+if ( defined $seed ) {
+    srand $seed;
+}
+
+# Construct command line
+my @cl = ('make_test_sam.pl');
+if ($seed) {
+    push @cl, '--seed', $seed;
+}
+push @cl, '--seq_region_count',      $seq_region_count;
+push @cl, '--seq_region_max_length', $seq_region_max_length;
+push @cl, '--read_pair_count',       $read_pair_count;
+push @cl, '--read_tags',             @read_tags;
+push @cl, '--read1_length',          $read1_length;
+push @cl, '--read2_length',          $read2_length;
+my $cl = join q{ }, @cl;
+
+# Print HD and RG SAM header
+print header_line( 'HD', [ 'VN', '1.4' ], [ 'SO', 'unsorted' ] );
+print header_line( 'RG', [ 'ID', q{1} ],  [ 'SM', 'TC' ] );
+print header_line(
+    'PG',
+    [ 'ID', q{1} ],
+    [ 'PN', 'make_test_sam.pl' ],
+    [ 'CL', $cl ]
+);
+
+# Make each chromosome of random length and print SQ SAM headers
+my %length_of;
+foreach my $seq_region ( 1 .. $seq_region_count ) {
+    my $length = int rand( $seq_region_max_length + 1 );
+    $length_of{$seq_region} = $length;
+    print header_line( 'SQ', [ 'SN', $seq_region ], [ 'LN', $length ] );
+}
+
+# Ensure alignments are always random
+srand;
+
+# Generate start of each read name
+## no critic (ProhibitMagicNumbers)
+my $qname_base = 'HS';
+$qname_base .= ( int rand 50 ) + 1;        # Instrument name
+$qname_base .= q{_};
+$qname_base .= ( int rand 20_000 ) + 1;    # Run
+$qname_base .= q{:};
+$qname_base .= ( int rand 8 ) + 1;         # Flowcell lane
+$qname_base .= q{:};
+## use critic
+
+# Generate alignments for each chromosome one by one
+foreach my $seq_region ( 1 .. $seq_region_count ) {
+    foreach ( 1 .. $read_pair_count ) {
+        my $read1_qname = get_qname( $qname_base, get_read_tag() );
+        my $read2_qname = $read1_qname;    # Always the same
+        my ( $read1_pos, $read2_pos ) =
+          get_pos( $length_of{$seq_region}, $read1_length, $read2_length );
+        my ( $read1_flag, $read2_flag ) = get_flag( $read1_pos, $read2_pos );
+        my ( $read1_tlen, $read2_tlen ) =
+          get_tlen( $read1_pos, $read2_pos, $read1_length, $read2_length );
+        ( $read1_flag, $read2_flag, $read1_pos, $read2_pos ) =
+          get_unmapped( $read1_flag, $read2_flag, $read1_pos, $read2_pos );
+        my ($read1_cigar) = get_cigar($read1_length);
+        my ($read2_cigar) = get_cigar($read2_length);
+        my ($read1_nm)    = get_nm();
+        my ($read2_nm)    = get_nm();
+
+        # Rarely generate 50 to 99 real duplicates to simulate peaks
+        ## no critic (ProhibitMagicNumbers)
+        my $num_real_duplicates = 0;
+        if ( rand $read_pair_count < 2 ) {
+            $num_real_duplicates = int( rand 50 ) + 50;
+        }
+        ## use critic
+
+        # Generate PCR duplicates (i.e. marked as duplicates)
+        ## no critic (ProhibitMagicNumbers)
+        my $num_pcr_duplicates = poisson_number(0.6);
+        ## use critic
+
+        my $num_duplicates = $num_real_duplicates + $num_pcr_duplicates;
+        foreach my $read_pair_count ( 1 .. $num_duplicates + 1 ) {
+
+            # First read
+            print alignment_line(
+                qname => $read1_qname,
+                flag  => $read1_flag,
+                rname => $seq_region,
+                pos   => $read1_pos,
+                mapq  => 255,
+                cigar => $read1_cigar,
+                rnext => q{=},
+                pnext => $read2_pos,
+                tlen  => $read1_tlen,
+                seq   => get_seq($read1_length),
+                qual  => get_qual($read1_length),
+                opt   => {
+                    'NM:i' => $read1_nm,
+                    'RG:Z' => q{1},
+                },
+            );
+
+            # Second read
+            print alignment_line(
+                qname => $read2_qname,
+                flag  => $read2_flag,
+                rname => $seq_region,
+                pos   => $read2_pos,
+                mapq  => 255,
+                cigar => $read2_cigar,
+                rnext => q{=},
+                pnext => $read1_pos,
+                tlen  => $read2_tlen,
+                seq   => get_seq($read2_length),
+                qual  => get_qual($read2_length),
+                opt   => {
+                    'NM:i' => $read2_nm,
+                    'RG:Z' => q{1},
+                },
+            );
+
+            if ( $read_pair_count == $num_real_duplicates + 1 ) {
+
+                # Mark rest of reads as duplicates
+                $read1_flag = $read1_flag | $FLAG_DUPLICATE;
+                $read2_flag = $read2_flag | $FLAG_DUPLICATE;
+            }
+        }
+    }
+}
+
+# Generate SAM header line
+sub header_line {
+    my ( $record_type, @data ) = @_;
+
+    my $header_line = q{};
+
+    if ( $record_type !~ m/\A [[:alpha:]][[:alpha:]] \z/xms ) {
+        confess 'Invalid record type (', $record_type, q{)};
+    }
+
+    $header_line .= q{@} . $record_type;
+
+    foreach my $datum (@data) {
+        if ( ref $datum ne 'ARRAY' ) {
+            confess 'Arrayref of tag / value pairs is required (not ',
+              ref $datum, q{)};
+        }
+        my ( $tag, $value ) = @{$datum};
+        if ( $tag !~ m/\A [[:alpha:]][[:alpha:]\d] \z/xms ) {
+            confess 'Invalid tag (', $tag, q{)};
+        }
+        if ( $value !~ m/\A [ -~]+ \z/xms ) {
+            confess 'Invalid value (', $value, q{)};
+        }
+
+        $header_line .= "\t" . $tag . q{:} . $value;
+    }
+
+    $header_line .= "\n";
+
+    return $header_line;
+}
+
+# Generate SAM alignment line
+sub alignment_line {
+    my (%data) = @_;
+
+    # Check string fields
+    foreach my $field ( sort keys %ALIGNMENT_REGEXP_MANDATORY ) {
+        if ( $data{$field} !~ $ALIGNMENT_REGEXP_MANDATORY{$field} ) {
+            confess 'Invalid ', uc $field, ' (', $data{$field}, q{)};
+        }
+    }
+
+    # Check int fields
+    foreach my $field ( sort keys %ALIGNMENT_RANGE_MANDATORY ) {
+        if (   $data{$field} < $ALIGNMENT_RANGE_MANDATORY{$field}->[0]
+            || $data{$field} > $ALIGNMENT_RANGE_MANDATORY{$field}->[1] )
+        {
+            confess 'Invalid ', uc $field, ' (', $data{$field}, q{)};
+        }
+    }
+
+    # Mandatory fields
+    my $alignment_line = join "\t", $data{qname}, $data{flag}, $data{rname},
+      $data{pos}, $data{mapq}, $data{cigar}, $data{rnext}, $data{pnext},
+      $data{tlen}, $data{seq}, $data{qual};
+
+    # Optional fields
+    if ( exists $data{opt} ) {
+        foreach my $tag_type ( keys %{ $data{opt} } ) {
+            my $value = $data{opt}->{$tag_type};
+            my ( $tag, $type ) = split /:/xms, $tag_type;
+
+            # Validate tag
+            if ( $tag !~ /\A [[:alpha:]][[:alpha:]\d] \z/xms ) {
+                confess 'Invalid tag (', $tag, q{)};
+            }
+
+            # Validate type
+            if ( !exists $ALIGNMENT_REGEXP_OPTIONAL{$type} ) {
+                confess 'Invalid type (', $type, q{)};
+            }
+
+            # Validate value
+            if ( $value !~ $ALIGNMENT_REGEXP_OPTIONAL{$type} ) {
+                confess 'Invalid ', $tag, ' (', $value, q{)};
+            }
+
+            $alignment_line .= "\t";
+            $alignment_line .= join q{:}, $tag, $type, $value;
+        }
+    }
+
+    $alignment_line .= "\n";
+
+    return $alignment_line;
+}
+
+# Get a random read tag and substitute random bases
+sub get_read_tag {
+    my $tag = $read_tags[ int rand $#read_tags + 1 ];
+
+    # Replace IUPAC code with random bases
+    $tag =~ s/ N / qw( A G C T )[ int rand 4 ] /xmsge;
+    $tag =~ s/ B / qw( G C T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ D / qw( A G T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ H / qw( A C T   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ V / qw( A G C   )[ int rand 3 ] /xmsge;
+    $tag =~ s/ R / qw( A G     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ Y / qw( C T     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ K / qw( G T     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ M / qw( A C     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ S / qw( G C     )[ int rand 2 ] /xmsge;
+    $tag =~ s/ W / qw( A T     )[ int rand 2 ] /xmsge;
+
+    return $tag;
+}
+
+# Construct read name
+sub get_qname {
+    my ( $qname, $read_tag ) = @_;
+
+    ## no critic (ProhibitMagicNumbers)
+    $qname .= ( int rand 3_000 ) + 1;      # Tile number
+    $qname .= q{:};
+    $qname .= ( int rand 20_000 ) + 1;     # Cluster x coordinate
+    $qname .= q{:};
+    $qname .= ( int rand 200_000 ) + 1;    # Cluster y coordinate
+    $qname .= q{#};
+    $qname .= $read_tag;
+    ## use critic
+
+    return $qname;
+}
+
+# Get position for both reads
+sub get_pos {
+    my ( $seq_region_len, $read1_len, $read2_len ) = @_;
+
+    my ( $read1_pos, $read2_pos );
+
+    my $pair_ok = 0;
+
+    while ( !$pair_ok ) {
+        $read1_pos = ( int rand $seq_region_len ) + 1;
+        $read2_pos = ( int rand $seq_region_len ) + 1;
+        $pair_ok   = 1;
+
+        my $read1_end = $read1_pos + $read1_len - 1;
+        my $read2_end = $read2_pos + $read2_len - 1;
+
+        # Check reads are within seq region
+        if ( $read1_end > $seq_region_len ) {
+            $pair_ok = 0;
+        }
+        if ( $read2_end > $seq_region_len ) {
+            $pair_ok = 0;
+        }
+
+        # Check reads don't overlap
+        if ( $read1_pos <= $read2_end && $read1_end >= $read2_pos ) {
+            $pair_ok = 0;
+        }
+    }
+
+    return $read1_pos, $read2_pos;
+}
+
+# Get flags for both reads (http://picard.sourceforge.net/explain-flags.html)
+sub get_flag {
+    my ( $read1_pos, $read2_pos ) = @_;
+
+    my $read1_flag = $FLAG_READ_PAIRED | $FLAG_PROPER_PAIR;
+    my $read2_flag = $FLAG_READ_PAIRED | $FLAG_PROPER_PAIR;
+
+    if ( $read1_pos < $read2_pos ) {
+        $read1_flag = $read1_flag | $FLAG_MATE_REVERSE_STRAND;
+        $read2_flag = $read2_flag | $FLAG_READ_REVERSE_STRAND;
+    }
+    else {
+        $read1_flag = $read1_flag | $FLAG_READ_REVERSE_STRAND;
+        $read2_flag = $read2_flag | $FLAG_MATE_REVERSE_STRAND;
+    }
+
+    $read1_flag = $read1_flag | $FLAG_FIRST_IN_PAIR;
+    $read2_flag = $read2_flag | $FLAG_SECOND_IN_PAIR;
+
+    return $read1_flag, $read2_flag;
+}
+
+# Get template length for both reads
+sub get_tlen {
+    my ( $read1_pos, $read2_pos, $read1_len, $read2_len ) = @_;
+
+    my ( $read1_tlen, $read2_tlen );
+
+    if ( $read1_pos < $read2_pos ) {
+        $read1_tlen = $read2_pos - $read1_pos + $read2_len;
+        $read2_tlen = -$read1_tlen;
+    }
+    else {
+        $read2_tlen = $read1_pos - $read2_pos + $read1_len;
+        $read1_tlen = -$read2_tlen;
+    }
+
+    return $read1_tlen, $read2_tlen;
+}
+
+# Adjust flags and positions if a read is unmapped
+sub get_unmapped {
+    my ( $read1_flag, $read2_flag, $read1_pos, $read2_pos ) = @_;
+
+    if ( rand() < $CHANCE_UNMAPPED ) {
+        if ( rand() < 0.5 ) {    ## no critic (ProhibitMagicNumbers)
+                                 # Read 1 unmapped
+            $read1_flag = $read1_flag | $FLAG_READ_UNMAPPED;
+            $read2_flag = $read2_flag | $FLAG_MATE_UNMAPPED;
+            $read1_pos  = $read2_pos;
+        }
+        else {
+            # Read 2 unmapped
+            $read2_flag = $read2_flag | $FLAG_READ_UNMAPPED;
+            $read1_flag = $read1_flag | $FLAG_MATE_UNMAPPED;
+            $read2_pos  = $read1_pos;
+        }
+    }
+
+    return $read1_flag, $read2_flag, $read1_pos, $read2_pos;
+}
+
+# Get sequence (just random)
+sub get_seq {
+    my ($read_len) = @_;
+
+    ## no critic (ProhibitMagicNumbers)
+    return join q{}, map { qw( A G C T ) [ int rand 4 ] } 1 .. $read_len;
+    ## use critic
+}
+
+# Get CIGAR string containing random soft clipping
+sub get_cigar {
+    my ($read_len) = @_;
+
+    my $m = $read_len;    # Length of alignment match
+
+    ## no critic (ProhibitMagicNumbers)
+    my $s1 = poisson_number(0.7);    # Soft clipping at start of alignment
+    my $s2 = poisson_number(0.7);    # Soft clipping at end of alignment
+    ## use critic
+
+    $m = $m - $s1 - $s2;
+
+    # Construct CIGAR
+
+    my $cigar = $m . q{M};
+
+    if ($s1) {
+        $cigar = $s1 . q{S} . $cigar;
+    }
+    if ($s2) {
+        $cigar = $cigar . $s2 . q{S};
+    }
+
+    return $cigar;
+}
+
+# Get quality
+sub get_qual {
+    my ($read_len) = @_;
+
+    return q{~} x $read_len;
+}
+
+# Get random number of mismatches for a read
+sub get_nm {
+    ## no critic (ProhibitMagicNumbers)
+    return poisson_number(0.6);    # ~ e^-0.5, so skewed towards 0 and 1
+    ## use critic
+}
+
+# Generate random Poisson-distributed number using Knuth's algorithm
+sub poisson_number {
+    my ($l) = @_;                  # e^-lambda
+
+    my $k = 0;
+    my $p = 1;
+
+    while ( $p > $l ) {
+        $k++;
+        $p = $p * rand;
+    }
+
+    return $k - 1;
+}
+
+# Get and check command line options
+sub get_and_check_options {
+
+    # Get options
+    GetOptions(
+        'seed=i'                  => \$seed,
+        'seq_region_count=i'      => \$seq_region_count,
+        'seq_region_max_length=i' => \$seq_region_max_length,
+        'read_pair_count=i'       => \$read_pair_count,
+        'read_tags=s@{1,}'        => \@read_tags,
+        'read1_length=i'          => \$read1_length,
+        'read2_length=i'          => \$read2_length,
+        'help'                    => \$help,
+        'man'                     => \$man,
+    ) or pod2usage(2);
+
+    # Documentation
+    if ($help) {
+        pod2usage(1);
+    }
+    elsif ($man) {
+        pod2usage( -verbose => 2 );
+    }
+
+    # Check options
+    if ( !$seq_region_count ) {
+        pod2usage("--seq_region_count must be a positive integer\n");
+    }
+    if ( !$seq_region_max_length ) {
+        pod2usage("--seq_region_max_length must be a positive integer\n");
+    }
+    if ( !$read_pair_count ) {
+        pod2usage("--read_pair_count must be a positive integer\n");
+    }
+    if ( !$read1_length ) {
+        pod2usage("--read1_length must be a positive integer\n");
+    }
+    if ( !$read2_length ) {
+        pod2usage("--read2_length must be a positive integer\n");
+    }
+    if ( !@read_tags ) {
+        pod2usage("--read_tags must be specified\n");
+    }
+
+    return;
+}
+
+=head1 USAGE
+
+    make_test_sam.pl
+        [--seed seed]
+        [--seq_region_count int]
+        [--seq_region_max_length int]
+        [--read_pair_count int]
+        [--read_tags tags...]
+        [--read1_length int]
+        [--read2_length int]
+        [--help]
+        [--man]
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--seed INT>
+
+Random seed (to get reproducible chromosome lengths).
+
+=item B<--seq_region_count INT>
+
+Number of seq regions (default to 1).
+
+=item B<--seq_region_max_length INT>
+
+Maximum length of each seq region (defaults to 1,000,000 bp).
+
+=item B<--read_pair_count INT>
+
+Number of read pairs aligned to each seq region (defaults to 100).
+
+=item B<--read_tags TAGS>
+
+Read tags.
+
+=item B<--read1_length INT>
+
+Length of read 1 after trimming (defaults to 30 bp).
+
+=item B<--read2_length INT>
+
+Length of read 2 (defaults to 54 bp).
+
+=item B<--help>
+
+Print a brief help message and exit.
+
+=item B<--man>
+
+Print this script's manual page and exit.
+
+=back
+
+=cut
diff --git a/script/run_de_pipeline.pl b/script/run_de_pipeline.pl
new file mode 100644
index 0000000..2f61ed6
--- /dev/null
+++ b/script/run_de_pipeline.pl
@@ -0,0 +1,236 @@
+#!/usr/bin/env perl
+
+# PODNAME: run_de_pipeline.pl
+# ABSTRACT: Run DETCT differential expression pipeline
+
+## Author         : is1
+## Maintainer     : is1
+## Created        : 2012-09-26
+## Last commit by : $Author$
+## Last modified  : $Date$
+## Revision       : $Revision$
+## Repository URL : $HeadURL$
+
+use warnings;
+use strict;
+use autodie;
+use Carp;
+use Try::Tiny;
+
+use Probe::Perl;
+use Getopt::Long;
+use Pod::Usage;
+use English qw( -no_match_vars );
+use File::Spec;
+use File::Slurp;
+use DETCT::Pipeline::WithDiffExprStages;
+use DETCT::Analysis;
+
+=head1 DESCRIPTION
+
+
+
+=head1 EXAMPLES
+
+
+
+=cut
+
+# Default options
+my $scheduler     = 'lsf';
+my $analysis_dir  = q{.};
+my $analysis_yaml = File::Spec->catfile( $analysis_dir, 'analysis.yaml' );
+my $stages_yaml   = File::Spec->catfile( $analysis_dir, 'stages.yaml' );
+## no critic (ProhibitMagicNumbers)
+my $max_retries = 10;
+my $sleep_time  = 600;    # 10 minutes
+## use critic
+my $stage_to_run;
+my $component_to_run;
+my $verbose;
+my ( $help, $man );
+
+# Get command line (including interpreter and options)
+my $cmd_line = get_cmd_line();
+
+# Get and check command line options
+get_and_check_options();
+
+# Create analysis
+my $analysis = DETCT::Analysis->new_from_yaml($analysis_yaml);
+
+# Create pipeline
+my $pipeline = DETCT::Pipeline::WithDiffExprStages->new(
+    {
+        scheduler    => $scheduler,
+        analysis_dir => $analysis_dir,
+        analysis     => $analysis,
+        cmd_line     => $cmd_line,
+        max_retries  => $max_retries,
+        sleep_time   => $sleep_time,
+        verbose      => $verbose,
+    }
+);
+
+# Add stages to pipeline
+$pipeline->add_stages_from_yaml($stages_yaml);
+
+# Are we running the main pipeline or running a specific component of a specific
+# stage (i.e. a job to be run under LSF or locally)?
+if ($stage_to_run) {
+    $pipeline->set_stage_to_run( $pipeline->get_stage_by_name($stage_to_run) );
+}
+if ($component_to_run) {
+    $pipeline->set_component_to_run($component_to_run);
+}
+
+# Turn off verbose output when running specific components
+if ( $pipeline->stage_to_run && $pipeline->component_to_run ) {
+    $pipeline->set_verbose(0);
+}
+
+# Write overview of pipeline input and config files to log file
+if ( !$pipeline->stage_to_run && !$pipeline->component_to_run ) {
+    my @log = map { "$_\n" } $pipeline->input_overview;
+    push @log, "\nYAML analysis config file:\n\n", read_file($analysis_yaml);
+    push @log, "\nYAML stages config file:\n\n",   read_file($stages_yaml);
+    $pipeline->write_log_file( 'de.log', @log );
+}
+
+# Print overview of pipeline input
+$pipeline->say_if_verbose( $pipeline->input_overview );
+
+# Run pipeline
+$pipeline->run();
+
+# Get entire command line
+sub get_cmd_line {
+
+    # Get all lib directories
+    my %lib = map { $_ => 1 } @INC;
+
+    # Remove default lib directories
+    foreach my $lib ( Probe::Perl->perl_inc() ) {
+        delete $lib{$lib};
+    }
+
+    # Remove PERL5LIB lib directories
+    foreach my $lib ( split /:/xms, $ENV{PERL5LIB} ) {
+        delete $lib{$lib};
+    }
+
+    # Reconstruct -I lib directories
+    my @libs;
+    foreach my $lib ( keys %lib ) {
+        push @libs, '-I' . $lib;
+    }
+
+    return join q{ }, Probe::Perl->find_perl_interpreter(), @libs,
+      $PROGRAM_NAME, @ARGV;
+}
+
+# Get and check command line options
+sub get_and_check_options {
+
+    # Get options
+    GetOptions(
+        'scheduler=s'     => \$scheduler,
+        'dir=s'           => \$analysis_dir,
+        'analysis_yaml=s' => \$analysis_yaml,
+        'stages_yaml=s'   => \$stages_yaml,
+        'max_retries=i'   => \$max_retries,
+        'sleep_time=i'    => \$sleep_time,
+        'stage=s'         => \$stage_to_run,
+        'component=i'     => \$component_to_run,
+        'verbose'         => \$verbose,
+        'help'            => \$help,
+        'man'             => \$man,
+    ) or pod2usage(2);
+
+    # Documentation
+    if ($help) {
+        pod2usage(1);
+    }
+    elsif ($man) {
+        pod2usage( -verbose => 2 );
+    }
+
+    # Check options
+    if ( $scheduler ne 'lsf' && $scheduler ne 'local' ) {
+        pod2usage("--scheduler must be 'lsf' or 'local'\n");
+    }
+    if ( $stage_to_run && !$component_to_run
+        || !$stage_to_run && $component_to_run )
+    {
+        pod2usage("--stage and --component must be specified together\n");
+    }
+
+    return;
+}
+
+=head1 USAGE
+
+    run_de_pipeline.pl
+        [--scheduler lsf|local]
+        [--dir directory]
+        [--analysis_yaml file]
+        [--stages_yaml file]
+        [--max_retries int]
+        [--sleep_time int]
+        [--stage stage]
+        [--component int]
+        [--verbose]
+        [--help]
+        [--man]
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--scheduler lsf|local>
+
+Job scheduler - lsf (default) or local (for testing).
+
+=item B<--dir DIRECTORY>
+
+Working directory for analysis.
+
+=item B<--analysis_yaml FILE>
+
+YAML analysis configuration file.
+
+=item B<--stages_yaml FILE>
+
+YAML stages configuration file.
+
+=item B<--max_retries INT>
+
+Maximum number of times to retry a failing job.
+
+=item B<--sleep_time INT>
+
+Time to sleep, in seconds, between each iteration of the pipeline.
+
+=item B<--stage STAGE>
+
+The specific stage of the pipeline to be run.
+
+=item B<--component INT>
+
+The index of the component of the specified stage of the pipeline to be run.
+
+=item B<--verbose>
+
+Print information about the pipeline as it runs.
+
+=item B<--help>
+
+Print a brief help message and exit.
+
+=item B<--man>
+
+Print this script's manual page and exit.
+
+=back
+
+=cut
diff --git a/script/run_deseq.R b/script/run_deseq.R
new file mode 100644
index 0000000..920f79b
--- /dev/null
+++ b/script/run_deseq.R
@@ -0,0 +1,124 @@
+library(DESeq)
+library(RColorBrewer)
+library(gplots)
+
+Args            <- commandArgs();
+countFile       <- Args[4]
+designFile      <- Args[5]
+outputFile      <- Args[6]
+sizeFactorsFile <- Args[7]
+qcPdfFile       <- Args[8]
+
+# Get data and design
+countTable    <- read.table(  countFile, header=TRUE, row.names=1 )
+design        <- read.table( designFile, header=TRUE, row.names=1 )
+numFactors    <- ncol(design)
+numConditions <- nlevels(design$condition)
+
+# Check design
+if (numFactors > 2) {
+    stop("Too many factors")
+}
+if (numConditions != 2) {
+    stop("Must be two conditions")
+}
+
+# Write QC graphs to PDF
+pdf(qcPdfFile)
+
+# Create CountDataSets
+cdsOneFactFull <- newCountDataSet( countTable, design$condition )
+if (numFactors == 2) {
+    cdsTwoFactFull <- newCountDataSet( countTable, design )
+}
+
+# Remove regions with sum of counts below the 40th quantile
+# See "5 Independent filtering and multiple testing" of
+# http://bioconductor.org/packages/devel/bioc/vignettes/DESeq/inst/doc/DESeq.pdf
+rs  <- rowSums ( counts ( cdsOneFactFull ))
+use <- (rs > quantile(rs, probs=0.4))
+cdsOneFactFilt <- cdsOneFactFull[ use, ]
+if (numFactors == 2) {
+    cdsTwoFactFilt <- cdsTwoFactFull[ use, ]
+}
+
+# Normalise
+cdsOneFactFull <- estimateSizeFactors( cdsOneFactFull )
+cdsOneFactFilt <- estimateSizeFactors( cdsOneFactFilt )
+if (numFactors == 2) {
+    cdsTwoFactFull <- estimateSizeFactors( cdsTwoFactFull )
+    cdsTwoFactFilt <- estimateSizeFactors( cdsTwoFactFilt )
+}
+write.table( sizeFactors( cdsOneFactFull ), file=sizeFactorsFile,
+    col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t" )
+
+# Estimate variance
+cdsOneFactFiltPooled <- tryCatch({
+    estimateDispersions( cdsOneFactFilt )
+}, error = function(e) {
+    estimateDispersions( cdsOneFactFilt, fitType="local" )
+})
+cdsOneFactFullBlind <- tryCatch({
+    estimateDispersions( cdsOneFactFull, method="blind" )
+}, error = function(e) {
+    estimateDispersions( cdsOneFactFull, method="blind", fitType="local" )
+})
+if (numFactors == 1) {
+    plotDispEsts( cdsOneFactFiltPooled )
+} else if (numFactors == 2) {
+    cdsTwoFactFiltPooledCR <- tryCatch({
+        estimateDispersions( cdsTwoFactFilt, method="pooled-CR" )
+    }, error = function(e) {
+        estimateDispersions( cdsTwoFactFilt, method="pooled-CR", fitType="local" )
+    })
+    cdsTwoFactFullBlind <- tryCatch({
+        estimateDispersions( cdsTwoFactFull, method="blind" )
+    }, error = function(e) {
+        estimateDispersions( cdsTwoFactFull, method="blind", fitType="local" )
+    })
+    plotDispEsts( cdsTwoFactFiltPooledCR )
+}
+
+# Compare conditions
+conditions <- levels(design$condition)
+res <- nbinomTest( cdsOneFactFiltPooled, conditions[1], conditions[2] )
+if (numFactors == 2) {
+    fit1 <- fitNbinomGLMs( cdsTwoFactFiltPooledCR, count ~ group + condition )
+    fit0 <- fitNbinomGLMs( cdsTwoFactFiltPooledCR, count ~ group )
+    res$pval <- nbinomGLMTest( fit1, fit0 )
+    res$padj <- p.adjust( res$pval, method="BH" )
+}
+plotMA(res)
+hist(res$pval, breaks=100, col="skyblue", border="slateblue",
+    main="Histogram of p values")
+
+# Write output
+res = data.frame(id=res$id, pval=res$pval, padj=res$padj)
+write.table( res, file=outputFile, col.names=FALSE, row.names=FALSE,
+    quote=FALSE, sep="\t" )
+
+# Variance stabilising transformation
+vsdOneFactFull <- varianceStabilizingTransformation( cdsOneFactFullBlind )
+if (numFactors == 2) {
+    vsdTwoFactFull <- varianceStabilizingTransformation( cdsTwoFactFullBlind )
+}
+
+# Plot heatmap of counts
+select <- order(rowMeans(counts(cdsOneFactFull)), decreasing=TRUE)[1:30]
+hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+heatmap.2(exprs(vsdOneFactFull)[select,], col=hmcol, trace="none",
+    margin=c(10, 6))
+
+# Plot heatmap of sample to sample distances
+dists <- dist( t( exprs(vsdOneFactFull) ) )
+mat <- as.matrix( dists )
+heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(13, 13))
+
+# Plot PCA of samples
+print(plotPCA(vsdOneFactFull, intgroup=c("condition")))
+if (numFactors == 2) {
+    print(plotPCA(vsdTwoFactFull, intgroup=c("group")))
+    print(plotPCA(vsdTwoFactFull, intgroup=c("condition", "group")))
+}
+
+dev.off()
diff --git a/src/quince_chiphmmnew.cpp b/src/quince_chiphmmnew.cpp
new file mode 100644
index 0000000..d8a86ff
--- /dev/null
+++ b/src/quince_chiphmmnew.cpp
@@ -0,0 +1,596 @@
+// steve qin.
+// 07/01/08
+//#include "stdafx.h"
+#include<stdio.h>
+#include<cstdlib>
+#include<iostream>
+#include<fstream>
+#include<sstream>
+#include<vector>
+#include<string>
+#include<math.h>
+#include <time.h>
+using namespace std;
+
+#define MAX_LENGTH 500
+#define PI 3.14159265
+#define LIMIT 100
+#define MIN(a,b) ((a) < (b) ? (a):(b))
+#define MAX(a,b) ((a) > (b) ? (a):(b))
+
+void readData(const char dataFileName[], vector<int> &order,
+		vector<double> &data, int *nRow, double *totalcount);
+
+double logIntPoisson(const int k, const double lambda);
+
+double logPoisson(const double x, const double lambda);
+
+double genPoisson(const double y, const double mu, const double alpha);
+
+double logGenPoisson(const double y, const double mu, const double alpha);
+
+double logIntGenPoisson(const int y, const double mu, const double alpha);
+
+double logIntTrunc0GenPoisson(const int y, const double mu, const double alpha);
+
+double logTrunc0GenPoisson(const double y, const double mu, const double alpha);
+
+void pathFinder(const int count, const vector<int> &order,
+		const vector<double> &data, const int chromosomeLengthInBins,
+		int *&path, double *&proba, double *&logproba, double *&hits,
+		const double totalMapReads, const double totalPeakReads,
+		const double totalPeakArea, const double medianPeakBinCount,
+		const int numPeaks, const int readcoverage, const int threshold);
+
+void parameterEstimate(const double *peaksdata, const int peakscount,
+		const int threshold, const double mu, const double alpha,
+		double *mufore, double *alphafore);
+
+double likelihood(const double *y, const int chromosomeLengthInBins,
+		const int threshold, const double mu, const double alpha);
+
+double unran(int *na, int *nb, int *nc);
+
+double gammaln(double xx);
+
+int main(int argc, char **argv) {
+	int j, numCycles = 1;
+	int count, chromosomeLengthInBins, binSize;
+	int readcoverage = 0;//added 04/13/08
+	vector<int> order;
+	vector<double> data;
+	int *path;
+	double *hits;
+	double *proba, *logproba;
+	char inputname[MAX_LENGTH] = "10.select.chr21.txt";//"jy9.10.select.chr22.txt";//"sle.txt";
+	char parasname[MAX_LENGTH] = "jy10.paras.txt";
+	char outputname[MAX_LENGTH] = "out.txt";
+	ofstream outPutFile;
+	double totalMapReads = 0;
+	double totalPeakReads = 0;
+	double totalPeakArea = 0;
+	double medianPeakWidth = 0;
+	double medianPeakBinCount = 0;
+	int numPeaks = 0;
+	istringstream iss;
+	string lineString;
+	double temval;
+	vector<double> datas;
+	double totalcount;
+	int threshold;
+
+	if (argc != 4) {
+		printf(
+				"3 options need to be specified:\n\tinput file name,\n\tinformation file name,\n\toutputfile name.\n");
+		exit(0);
+	}
+	for (j = 0; j < MAX_LENGTH; j++) {
+		inputname[j] = argv[1][j];
+		parasname[j] = argv[2][j];
+		outputname[j] = argv[3][j];
+	}
+
+	ifstream inFile(parasname);
+	if (!inFile) {
+		cout << "Error opening input parameter file" << parasname << endl;
+		exit(0);
+	}
+	for (j = 0; j < 9; j++) {
+		getline(inFile, lineString);
+		iss.clear();
+		iss.str(lineString + " ");
+		iss >> temval;
+		datas.push_back(temval);
+	}
+	totalMapReads = datas[0];
+	totalPeakReads = datas[1];
+	totalPeakArea = datas[2];
+	medianPeakWidth = datas[3];
+	//	if(medianPeakWidth >800)
+	//		medianPeakWidth = 800;
+	numPeaks = (int) datas[4];
+	readcoverage = (int) datas[5];
+	threshold = (int) datas[6];
+	binSize = (int) datas[7];
+	chromosomeLengthInBins = (int) datas[8];
+
+	medianPeakBinCount = medianPeakWidth / (double) binSize;
+	path = new int[chromosomeLengthInBins];
+	proba = new double[chromosomeLengthInBins];
+	logproba = new double[chromosomeLengthInBins];
+	srand((unsigned) time(NULL));
+	readData(inputname, order, data, &count, &totalcount);
+	for (j = 0; j < numCycles; j++) {
+		pathFinder(count, order, data, chromosomeLengthInBins, path, proba,
+				logproba, hits, totalMapReads, totalPeakReads, totalPeakArea,
+				medianPeakBinCount, numPeaks, readcoverage, threshold);
+	}
+	outPutFile.open(outputname);
+	if (!outPutFile) {
+		cout << "ERROR: Unable to open file: " << outputname << endl;
+		exit(30);
+	}//end of if
+	for (j = 0; j < chromosomeLengthInBins; j++) {
+		if (proba[j] > 0.5)//0.01
+		{
+			outPutFile << j << "	" << path[j] << "	" << proba[j] << "	"
+					<< hits[j] << "	" << logproba[j] << endl;
+		}
+	}
+	delete[] path;
+	delete[] proba;
+	delete[] hits;
+	outPutFile.close();
+	return 0;
+}//end of main
+
+void pathFinder(const int count, const vector<int> &order,
+		const vector<double> &data, const int chromosomeLengthInBins,
+		int *&path, double *&proba, double *&logproba, double *&hits,
+		const double totalMapReads, const double totalPeakReads,
+		const double totalPeakArea, const double medianPeakBinCount,
+		const int numPeaks, const int readcoverage, const int threshold) {
+	int j;
+	double (*logfnh)[2], trp[2][2];
+	double p[2], p0, p1;
+	double ratio, compa;
+	double dif = 0, inside1, inside2, inside3, inside4;
+	int na, nb, nc;
+	//double lambdaback, lambdafore;
+	double sum = 0, sum2 = 0, mean, var, nsize;
+	double muback, alphaback;
+	double mufore, alphafore;
+	//double mufore,mualpha;
+	double mu, alpha;
+	int bgtotal, number;
+	double *peaksdata;
+	//-	int threshold;
+	//ofstream outParameterFile;
+
+	//outParameterFile.open("poisson.out");
+	//if (!outParameterFile) {
+	//	cout << "ERROR: Unable to open file poisson.out." << endl;
+	//	exit(30);
+	//}//end of if
+	na = rand() + 1;
+	nb = rand() - 1;
+	nc = rand();
+	// hits are for all 25bp window on the genome.
+	// only some of them are non-zero.
+	hits = new double[chromosomeLengthInBins];
+	sum = 0;
+	sum2 = 0;
+	bgtotal = 0;
+	//-	threshold = 6;
+	for (j = 0; j < chromosomeLengthInBins; j++) {
+		hits[j] = 0;
+	}
+	for (j = 0; j < count; j++) {
+		if (order[j] > chromosomeLengthInBins) {
+			cout << "read bins extend further than the chromosome size in bins " << order[j] << " "<< chromosomeLengthInBins << endl;
+			exit(1);
+		}
+		hits[order[j]] = data[j];
+		if (data[j] < threshold) {
+			//			sum = sum + (double) floor(data[j]);
+			//			sum2 = sum2 + (double) floor(data[j])*floor(data[j]);
+			sum = sum + data[j];
+			sum2 = sum2 + data[j] * data[j];
+			bgtotal++;
+		}
+	}
+	nsize = (double) chromosomeLengthInBins - count + bgtotal;
+	mean = sum / nsize;
+	var = (sum2 - (double) nsize * mean * mean) / (nsize - 1);
+	muback = mean;
+	alphaback = (sqrt(var / mean) - 1) / mean;
+	cout << "background: mu = " << muback << " alpha = " << alphaback << endl;
+	//outParameterFile << muback << "	" << alphaback << endl;
+	//	double aa = logGenPoisson(5,5,2);
+	peaksdata = new double[count - bgtotal];
+	number = 0;
+	sum = 0;
+	sum2 = 0;
+	for (j = 0; j < count; j++) {
+		if (data[j] >= threshold) {
+			peaksdata[number] = data[j];
+			//			sum = sum + (double) floor(data[j]);
+			//			sum2 = sum2 + (double) floor(data[j])*floor(data[j]);
+			sum = sum + data[j];
+			sum2 = sum2 + data[j] * data[j];
+			number++;
+		}
+	}
+	mean = sum / number;
+	var = (sum2 - (double) number * mean * mean) / (number - 1);
+	mu = mean;
+	alpha = (sqrt(var / mean) - 1) / mean;
+	cout << "foreground (raw): mu = " << mu << " alpha = " << alpha << endl;
+	//outParameterFile << mu << "	" << alpha << endl;
+	//double ff = likelihood(peaksdata,number,threshold,6,0.3);
+	parameterEstimate(peaksdata, number, threshold, mu, alpha, &mufore,
+			&alphafore);
+	cout << "foreground: mu = " << mufore << " alpha = " << alphafore << endl;
+	//outParameterFile << mufore << "	" << alphafore << endl;
+	//outParameterFile.close();
+	//	exit(0);
+
+	for (j = 0; j < 10; j++)//200
+	{
+		double aa = logIntGenPoisson(j, muback, alphaback);
+		double bb = logIntGenPoisson(j, mufore, alphafore);
+		double cc = logIntTrunc0GenPoisson(j, mufore, alphafore);
+		cout << "j= " << j << " " << exp(aa) << "	" << exp(bb) << "	"
+				<< exp(cc) << endl;
+	}//end of j
+	//	exit(0);
+
+	//+        lambdaback = readcoverage * totalMapReads /(3100*0.9);
+	//        cout <<"lambda foreground = "<<lambdafore<<", lambda background = "<<lambdaback<<endl;
+	//***** initial probabilities.
+	p[1] = totalPeakArea / 1357;
+	p[0] = 1 - p[1];
+	//***** transition probabilities.
+	trp[0][0] = p[0];
+	trp[0][1] = p[1];
+	//double a = exp(1.0/medianPeakBinCount * log(0.5));
+	trp[1][1] = exp(1.0 / medianPeakBinCount * log(0.5));
+	trp[1][0] = 1 - trp[1][1];
+	//***** emission probabilities.
+	logfnh = new double[chromosomeLengthInBins][2];
+	logfnh[0][0] = 0;
+	logfnh[0][1] = -LIMIT;
+	/*	double aaa = logIntPoisson(1,1);
+	 double bbb = logIntPoisson(2,1);
+	 double ccc = logPoisson(1.8,1);
+	 cout  <<"aaa= "<<aaa<<" bbb= "<<bbb<<" ccc= "<<ccc<<endl;
+	 exit(0);
+	 */
+	for (j = 1; j < chromosomeLengthInBins; j++) {
+		if (hits[j] == 0) {
+			p0 = 1;
+			p1 = 0;
+		} else if (hits[j] > LIMIT) {
+			p0 = 0;
+			p1 = 1;
+		} else {
+			p0 = logGenPoisson(hits[j], muback, alphaback);
+			//+++			p0 = logPoisson(hits[j],muback);
+			//+ p0 = exp(logPoisson(hits[j],lambdaback));
+			//p0 = logPoisson(8,lambda);
+			//++                        p1 = logGenPoisson(hits[j],mufore,alphafore)
+			p1 = logTrunc0GenPoisson(hits[j], mufore, alphafore);
+			//+ p1 = exp(logPoisson(hits[j],lambdafore));
+			//+++			if(hits[j]>2)
+			//+++				cout <<"j= "<<j<<" hits= "<<hits[j]<<" p0= "<<p0<<" p1= "<<p1<<endl;
+			p0 = 1 / (1 + exp(p1 - p0));
+			p1 = 1 - p0;
+		}
+		//-		if((j>143889)&&(j<143895))
+		//-			cout <<"hits = "<<hits[j]<<" "<<logfnh[j-1][0]<<" "<<logfnh[j-1][1]<<endl;
+		inside3 = logfnh[j - 1][1] + log(trp[1][0]) - logfnh[j - 1][0] - log(
+				trp[0][0]);
+		inside4 = logfnh[j - 1][1] + log(trp[1][1]) - logfnh[j - 1][0] - log(
+				trp[0][1]);
+		if (p0 == 0) {
+			logfnh[j][0] = -LIMIT;
+			logfnh[j][1] = 0;
+		} else if (p1 == 0) {
+			logfnh[j][1] = -LIMIT;
+			logfnh[j][0] = 0;
+		} else {
+			if (inside3 < 0) {
+				logfnh[j][0] = log(p0) + logfnh[j - 1][0] + log(trp[0][0])
+						+ log(1 + exp(inside3));
+			} else
+				logfnh[j][0] = log(p0) + logfnh[j - 1][1] + log(trp[1][0])
+						+ log(1 + exp(-inside3));
+			if (inside4 < 0)
+				logfnh[j][1] = log(p1) + logfnh[j - 1][0] + log(trp[0][1])
+						+ log(1 + exp(inside4));
+			else
+				logfnh[j][1] = log(p1) + logfnh[j - 1][1] + log(trp[1][1])
+						+ log(1 + exp(-inside4));
+			//			cout <<logfnh[j][0]<<" "<<logfnh[j][1]<<endl;
+		}
+	}
+	//+++	exit(0);
+	ratio = 1.0 / (1.0 + exp(
+			logfnh[chromosomeLengthInBins - 1][1]
+					- logfnh[chromosomeLengthInBins - 1][0]));
+	proba[chromosomeLengthInBins - 1] = 1 - ratio;
+	compa = unran(&na, &nb, &nc);
+	if (compa <= ratio)
+		path[chromosomeLengthInBins - 1] = 0;
+	else
+		path[chromosomeLengthInBins - 1] = 1;
+	logproba[chromosomeLengthInBins - 1] = log(ratio);
+	for (j = chromosomeLengthInBins - 2; j >= 0; j--) {
+		ratio = 1 / (1 + (trp[1][path[j + 1]] / trp[0][path[j + 1]]) * exp(
+				logfnh[j][1] - logfnh[j][0]));
+		logproba[j] = log(ratio);
+		/*
+		 if((j>=143890)&&(j<143895))
+		 {
+		 cout << "path= "<<path[j+1]<<endl;
+		 double aa = trp[1][path[j+1]];
+		 double bb = trp[0][path[j+1]];
+		 double cc = logfnh[j][1];
+		 double dd = logfnh[j][0];
+		 cout << "j= "<<j<<" "<<"aa= "<<aa<<" bb= "<<bb<<" cc= "<<cc<<" dd= "<<dd<<" ratio= "<<ratio<<endl;
+		 }
+		 */
+		proba[j] = 1 - ratio;
+		compa = unran(&na, &nb, &nc);
+		if (compa <= ratio)
+			path[j] = 0;
+		else
+			path[j] = 1;
+	}
+}//end of pathFinder
+
+double logIntPoisson(const int k, const double lambda) {
+	double x;
+
+	x = k * log(lambda) - lambda - gammaln((double) k + 1);
+	return (x);
+}//end of logIntPoisson
+
+double logPoisson(const double x, const double lambda) {
+	double y, ax, apart, tem01, tem02;
+
+	ax = floor(x);
+	apart = x - ax;
+	tem01 = logIntPoisson((int) ax, lambda);
+	tem02 = exp(logIntPoisson((int) (ax + 1), lambda) - tem01);
+	y = tem01 + log((1 - apart) + apart * tem02);
+	return y;
+}//end of logPoisson
+
+void parameterEstimate(const double *peaksdata, const int peakscount,
+		const int threshold, const double mu, const double alpha,
+		double *mufore, double *alphafore) {// use Metropolis to get MLE of truncated generalized Possion distribution parameters.
+	int j;
+	int nn;
+	double *aa, *bb, *out;
+	double stepsize, theta;
+	int na, nb, nc;
+	double rando, test, ratio;
+	double aatry, bbtry;
+	double aamax, bbmax, outmax;
+	double oldlike, newlike;
+
+	na = rand() + 1;
+	nb = rand() - 1;
+	nc = rand();
+	nn = 200;
+	aa = new double[nn];
+	bb = new double[nn];
+	out = new double[nn];
+	stepsize = 0.5;
+	aa[0] = mu;
+	bb[0] = alpha;
+	out[0] = likelihood(peaksdata, peakscount, threshold, mu, alpha);
+	oldlike = out[0];
+	aamax = mu;
+	bbmax = alpha;
+	outmax = out[0];
+	for (j = 1; j < nn; j++) {
+		rando = unran(&na, &nb, &nc);
+		theta = rando * 2 * PI;
+		aatry = aa[j - 1] + stepsize * cos(theta);
+		bbtry = bb[j - 1] + 0.1 * stepsize * sin(theta);
+		//double tempa = likelihood(peaksdata,peakscount, threshold, aatry,bbtry);
+		//double tempb = likelihood(peaksdata,peakscount, threshold, aa[j-1],bb[j-1]);
+		newlike = likelihood(peaksdata, peakscount, threshold, aatry, bbtry);
+		ratio = exp(newlike - oldlike);
+		if (ratio > 1) {
+			aa[j] = aatry;
+			bb[j] = bbtry;
+			out[j] = newlike;
+			outmax = newlike;
+			aamax = aatry;
+			bbmax = bbtry;
+			oldlike = newlike;
+		} else {
+			test = unran(&na, &nb, &nc);
+			if (test < ratio) {
+				aa[j] = aatry;
+				bb[j] = bbtry;
+				out[j] = newlike;
+				oldlike = newlike;
+			} else {
+				aa[j] = aa[j - 1];
+				bb[j] = bb[j - 1];
+				out[j] = out[j - 1];
+			}
+		}//end of else
+		//cout <<"j= "<<j<<" aa= "<<aa[j]<<" bb= "<<bb[j]<<" out= "<<out[j]<<endl;
+	}//end of j
+	//exit(0);
+	*mufore = aamax;
+	*alphafore = bbmax;
+}//end of parameterEstimate
+
+double likelihood(const double *y, const int chromosomeLengthInBins,
+		const int threshold, const double mu, const double alpha) {
+	int j;
+	double con, sum;
+
+	con = 0;
+	for (j = 0; j < threshold; j++) {
+		con = con + exp(logIntGenPoisson(j, mu, alpha));
+	}
+	sum = 0;
+	//	double a = y[1862];
+	//	a = logGenPoisson(y[1862],mu,alpha);
+
+	for (j = 0; j < chromosomeLengthInBins; j++) {
+		sum = sum + logGenPoisson(y[j], mu, alpha);
+		//cout <<"j= "<<j<<" "<<logGenPoisson(y[j],mu,alpha)<<" "<<sum<<endl;
+	}
+	//exit(0);
+	sum = sum - chromosomeLengthInBins * (log(1 - con));
+	return (sum);
+}//end of likelihood
+
+double logGenPoisson(const double y, const double mu, const double alpha) {
+	double result, ay, apart, tem01, tem02;
+
+	ay = floor(y);
+	apart = y - ay;
+	tem01 = logIntGenPoisson((int) ay, mu, alpha);
+	tem02 = exp(logIntGenPoisson((int) (ay + 1), mu, alpha) - tem01);
+	result = tem01 + log((1 - apart) + apart * tem02);
+	return result;
+}//end of logGenPoisson
+
+double logTrunc0GenPoisson(const double y, const double mu, const double alpha) {
+	double result, ay, apart, tem01, tem02;
+
+	if (y < 0.0001)
+		return -100;
+	else if (y < 1) {
+		result = log(y) + logIntTrunc0GenPoisson(1, mu, alpha);
+		return result;
+	} else {
+		ay = floor(y);
+		apart = y - ay;
+		tem01 = logIntTrunc0GenPoisson((int) ay, mu, alpha);
+		tem02 = exp(logIntTrunc0GenPoisson((int) (ay + 1), mu, alpha) - tem01);
+		result = tem01 + log((1 - apart) + apart * tem02);
+		return result;
+	}
+}//end of logTrunc0GenPoisson
+
+double logIntTrunc0GenPoisson(const int y, const double mu, const double alpha) {
+	double result, con;
+	if (y == 0) {
+		return -100;
+	} else {
+		con = logIntGenPoisson(0, mu, alpha);
+		result = logIntGenPoisson(y, mu, alpha) - log(1 - exp(con));
+		return result;
+	}
+}//end of logIntTrunc0GenPoisson
+
+double genPoisson(const double y, const double mu, const double alpha) {
+	double result, ay, apart, tem01, tem02;
+
+	ay = floor(y);
+	apart = y - ay;
+	tem01 = logGenPoisson((int) ay, mu, alpha);
+	tem02 = logGenPoisson((int) (ay + 1), mu, alpha);
+	result = (1 - apart) * exp(tem01) + apart * exp(tem02);
+	return result;
+}//end of genPoisson
+
+double logIntGenPoisson(const int y, const double mu, const double alpha) {
+	int j;
+	double result, logyfac;
+
+	if (y == 0)
+		logyfac = 0;
+	else if (y > 0) {
+		logyfac = 0;
+		for (j = 2; j <= y; j++)
+			logyfac = logyfac + log((double) j);
+	} else {
+		cout << "error, y is negative. " << y << endl;
+		exit(0);
+	}
+	result = log(mu) - log(1 + alpha * mu);
+	result = y * result + (y - 1) * log(1 + alpha * y) - logyfac;
+	result = result - mu * (1 + alpha * y) / (1 + alpha * mu);
+	return result;
+}//end of logIntGenPoisson
+
+void readData(const char dataFileName[], vector<int> &order,
+		vector<double> &data, int *nRow, double *totalcount) {
+	int count = 0;
+	int temOrder;
+	double temVal;
+	istringstream iss;
+	string lineString;
+	double sum = 0;
+
+	ifstream inFile(dataFileName);
+	if (!inFile) {
+		cout << "Error opening input file" << dataFileName << endl;
+		exit(0);
+	}
+	count = 0;
+	sum = 0;
+	while (inFile) {
+		if (inFile) {
+			getline(inFile, lineString);
+			iss.clear();
+			iss.str(lineString + " ");
+			iss >> temOrder >> temVal;
+			if (iss) {
+				order.push_back(temOrder);
+				data.push_back(temVal);
+				//07/04/08				sum = sum + (double) floor(temVal);
+				sum = sum + temVal;
+			}//end of if
+		}//end of if
+		count++;
+	}//end of while
+	*nRow = count - 1;
+	*totalcount = sum;
+	cout << "There are " << *nRow << " nonzero counts." << endl;
+}//end of readData
+
+double unran(int *na, int *nb, int *nc) {
+	double random;
+	*na = (171 * (*na)) % 30269;
+	*nb = (172 * (*nb)) % 30307;
+	*nc = (170 * (*nc)) % 30323;
+	random = (double) *na / 30269.0 + (double) *nb / 30307.0 + (double) *nc
+			/ 30323.0;
+	random = random - floor(random);
+	return random;
+}
+
+double gammaln(double xx) {
+	double ser, stp, tmp, x, y, cof[6], gam;
+	int j;
+	cof[0] = 76.18009172947146;
+	cof[1] = -86.50532032941677;
+	cof[2] = 24.01409824083091;
+	cof[3] = -1.231739572450155;
+	cof[4] = 0.1208650973866179 * 0.01;
+	cof[5] = -0.5395239384953 * 0.00001;
+	stp = 2.5066282746310005;
+	x = xx;
+	y = x;
+	tmp = x + 5.5;
+	tmp = (x + 0.5) * log(tmp) - tmp;
+	ser = 1.000000000190015;
+	for (j = 0; j < 6; j++) {
+		y = y + 1.0;
+		ser = ser + cof[j] / y;
+	}
+	gam = tmp + log(stp * ser / x);
+	return gam;
+}
diff --git a/t/analysis.t b/t/analysis.t
new file mode 100644
index 0000000..c6a7112
--- /dev/null
+++ b/t/analysis.t
@@ -0,0 +1,490 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 146;
+
+use DETCT::Analysis;
+
+use File::Path qw( make_path );
+use POSIX qw( WIFEXITED);
+
+# Compile quince_chiphmmnew if necessary
+if ( !-r 'bin/quince_chiphmmnew' ) {
+    make_path('bin');
+    my $cmd = 'g++ -o bin/quince_chiphmmnew src/quince_chiphmmnew.cpp';
+    WIFEXITED( system $cmd) or confess "Couldn't run $cmd";
+}
+
+my $is_ensembl_reachable = is_ensembl_reachable();
+
+my $analysis = DETCT::Analysis->new(
+    {
+        name               => 'zmp_ph1',
+        read1_length       => 30,
+        read2_length       => 54,
+        mismatch_threshold => 2,
+        bin_size           => 100,
+        peak_buffer_width  => 100,
+        hmm_sig_level      => 0.001,
+        hmm_binary         => 'bin/quince_chiphmmnew',
+        r_binary           => 'R',
+        deseq_script       => 'script/run_deseq.R',
+        output_sig_level   => 0.05,
+        chunk_total        => 20,
+    }
+);
+
+isa_ok( $analysis, 'DETCT::Analysis' );
+
+# Test name attribute
+is( $analysis->name,                'zmp_ph1', 'Get name' );
+is( $analysis->set_name('zmp_ph2'), undef,     'Set name' );
+is( $analysis->name,                'zmp_ph2', 'Get new name' );
+throws_ok { $analysis->set_name() } qr/No name specified/ms, 'No name';
+my $long_name = 'X' x ( $DETCT::Analysis::MAX_NAME_LENGTH + 1 );
+throws_ok { $analysis->set_name('') } qr/Empty name specified/ms, 'Empty name';
+throws_ok { $analysis->set_name($long_name) } qr/longer than \d+ characters/ms,
+  'Long name';
+
+# Test read 1 length attribute
+is( $analysis->read1_length,         30,    'Get read 1 length' );
+is( $analysis->set_read1_length(40), undef, 'Set read 1 length' );
+is( $analysis->read1_length,         40,    'Get new read 1 length' );
+throws_ok { $analysis->set_read1_length() } qr/No read 1 length specified/ms,
+  'No read 1 length';
+throws_ok { $analysis->set_read1_length(-1) } qr/Invalid read 1 length/ms,
+  'Invalid read 1 length';
+
+# Test read 2 length attribute
+is( $analysis->read2_length,         54,    'Get read 2 length' );
+is( $analysis->set_read2_length(64), undef, 'Set read 2 length' );
+is( $analysis->read2_length,         64,    'Get new read 2 length' );
+throws_ok { $analysis->set_read2_length() } qr/No read 2 length specified/ms,
+  'No read 2 length';
+throws_ok { $analysis->set_read2_length(-2) } qr/Invalid read 2 length/ms,
+  'Invalid read 2 length';
+
+# Test mismatch threshold attribute
+is( $analysis->mismatch_threshold,        2,     'Get mismatch threshold' );
+is( $analysis->set_mismatch_threshold(3), undef, 'Set mismatch threshold' );
+is( $analysis->mismatch_threshold,        3,     'Get new mismatch threshold' );
+throws_ok { $analysis->set_mismatch_threshold() }
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok { $analysis->set_mismatch_threshold(-1) }
+qr/Invalid mismatch threshold/ms, 'Invalid mismatch threshold';
+
+# Test bin size attribute
+is( $analysis->bin_size,          100,   'Get bin size' );
+is( $analysis->set_bin_size(200), undef, 'Set bin size' );
+is( $analysis->bin_size,          200,   'Get new bin size' );
+throws_ok { $analysis->set_bin_size() } qr/No bin size specified/ms,
+  'No bin size';
+throws_ok { $analysis->set_bin_size(-1) } qr/Invalid bin size/ms,
+  'Invalid bin size';
+
+# Test peak buffer width attribute
+is( $analysis->peak_buffer_width,          100,   'Get peak buffer width' );
+is( $analysis->set_peak_buffer_width(200), undef, 'Set peak buffer width' );
+is( $analysis->peak_buffer_width,          200,   'Get new peak buffer width' );
+throws_ok { $analysis->set_peak_buffer_width() }
+qr/No peak buffer width specified/ms, 'No peak buffer width';
+throws_ok { $analysis->set_peak_buffer_width(-1) }
+qr/Invalid peak buffer width/ms, 'Invalid peak buffer width';
+
+# Test HMM significance level attribute
+is( $analysis->hmm_sig_level,          0.001, 'Get HMM significance level' );
+is( $analysis->set_hmm_sig_level(0.1), undef, 'Set HMM significance level' );
+is( $analysis->hmm_sig_level, 0.1, 'Get new HMM significance level' );
+throws_ok { $analysis->set_hmm_sig_level() }
+qr/No HMM significance level specified/ms, 'No HMM significance level';
+throws_ok { $analysis->set_hmm_sig_level(1) }
+qr/Invalid HMM significance level/ms, 'Invalid HMM significance level';
+
+# Test HMM binary attribute
+is( $analysis->hmm_binary, 'bin/quince_chiphmmnew', 'Get HMM binary' );
+is( $analysis->set_hmm_binary('bin'), undef, 'Set HMM binary' );
+is( $analysis->hmm_binary,            'bin', 'Get new HMM binary' );
+throws_ok { $analysis->set_hmm_binary() } qr/No HMM binary specified/ms,
+  'No HMM binary';
+throws_ok { $analysis->set_hmm_binary('nonexistent') }
+qr/does not exist or cannot be read/ms, 'Missing HMM binary';
+
+# Test R binary attribute
+is( $analysis->r_binary,          'R',   'Get R binary' );
+is( $analysis->set_r_binary('S'), undef, 'Set R binary' );
+is( $analysis->r_binary,          'S',   'Get new R binary' );
+throws_ok { $analysis->set_r_binary() } qr/No R binary specified/ms,
+  'No R binary';
+
+# Test DESeq script attribute
+is( $analysis->deseq_script, 'script/run_deseq.R', 'Get DESeq script' );
+is( $analysis->set_deseq_script('script'), undef,    'Set DESeq script' );
+is( $analysis->deseq_script,               'script', 'Get new DESeq script' );
+throws_ok { $analysis->set_deseq_script() } qr/No DESeq script specified/ms,
+  'No DESeq script';
+throws_ok { $analysis->set_deseq_script('nonexistent') }
+qr/does not exist or cannot be read/ms, 'Missing DESeq script';
+
+# Test output significance level attribute
+is( $analysis->output_sig_level, 0.05, 'Get output significance level' );
+is( $analysis->set_output_sig_level(0.01),
+    undef, 'Set output significance level' );
+is( $analysis->output_sig_level, 0.01, 'Get new output significance level' );
+throws_ok { $analysis->set_output_sig_level() }
+qr/No output significance level specified/ms, 'No output significance level';
+throws_ok { $analysis->set_output_sig_level(1) }
+qr/Invalid output significance level/ms, 'Invalid output significance level';
+
+# Test reference FASTA attribute
+is( $analysis->ref_fasta, undef, 'Get reference FASTA' );
+is( $analysis->set_ref_fasta('t/data/test12.fa'), undef,
+    'Set reference FASTA' );
+is( $analysis->ref_fasta, 't/data/test12.fa', 'Get new reference FASTA' );
+throws_ok { $analysis->set_ref_fasta('nonexistent') } qr/cannot be read/ms,
+  'Missing reference FASTA';
+
+# Test Ensembl host attribute
+is( $analysis->ensembl_host, undef, 'Get Ensembl host' );
+is( $analysis->set_ensembl_host('ensembldb.ensembl.org'),
+    undef, 'Set Ensembl host' );
+is( $analysis->ensembl_host, 'ensembldb.ensembl.org', 'Get new Ensembl host' );
+
+# Test Ensembl port attribute
+is( $analysis->ensembl_port,           undef, 'Get Ensembl port' );
+is( $analysis->set_ensembl_port(3306), undef, 'Set Ensembl port' );
+is( $analysis->ensembl_port,           3306,  'Get new Ensembl port' );
+throws_ok { $analysis->set_ensembl_port(-1) } qr/Invalid Ensembl port/ms,
+  'Invalid Ensembl port';
+
+# Test Ensembl username attribute
+is( $analysis->ensembl_user,                  undef, 'Get Ensembl username' );
+is( $analysis->set_ensembl_user('anonymous'), undef, 'Set Ensembl username' );
+is( $analysis->ensembl_user, 'anonymous', 'Get new Ensembl username' );
+
+# Test Ensembl password attribute
+is( $analysis->ensembl_pass,               undef, 'Get Ensembl password' );
+is( $analysis->set_ensembl_pass('secret'), undef, 'Set Ensembl password' );
+is( $analysis->ensembl_pass, 'secret', 'Get new Ensembl password' );
+
+# Test Ensembl database name attribute
+is( $analysis->ensembl_name, undef, 'Get Ensembl database name' );
+is( $analysis->set_ensembl_name('zv9_core'),
+    undef, 'Set Ensembl database name' );
+is( $analysis->ensembl_name, 'zv9_core', 'Get new Ensembl database name' );
+
+# Test Ensembl species attribute
+is( $analysis->ensembl_species, undef, 'Get Ensembl species' );
+is( $analysis->set_ensembl_species('danio_rerio'),
+    undef, 'Set Ensembl species' );
+is( $analysis->ensembl_species, 'danio_rerio', 'Get new Ensembl species' );
+
+# Test chunk total attribute
+is( $analysis->chunk_total,         20,    'Get chunk total' );
+is( $analysis->set_chunk_total(30), undef, 'Set chunk total' );
+is( $analysis->chunk_total,         30,    'Get new chunk total' );
+throws_ok { $analysis->set_chunk_total() } qr/No chunk total specified/ms,
+  'No chunk total';
+throws_ok { $analysis->set_chunk_total(-1) } qr/Invalid chunk total/ms,
+  'Invalid chunk total';
+
+# Test sequences and chunks before adding samples
+my $sequences = $analysis->get_all_sequences();
+is( scalar @{$sequences}, 0, 'No sequences' );
+my $chunks = $analysis->get_all_chunks();
+is( scalar @{$chunks}, 0, 'No chunks' );
+
+# Mock sample object
+my $sample = Test::MockObject->new();
+$sample->set_isa('DETCT::Sample');
+$sample->set_always( 'bam_file', 't/data/test1.bam' );
+
+# Mock sample object with different reference sequence
+my $sample_diff = Test::MockObject->new();
+$sample_diff->set_isa('DETCT::Sample');
+$sample_diff->set_always( 'bam_file', 't/data/test3.bam' );
+
+# Test adding and retrieving samples
+my $samples;
+$samples = $analysis->get_all_samples();
+is( scalar @{$samples},             0,     'No samples' );
+is( $analysis->add_sample($sample), undef, 'Add sample' );
+$samples = $analysis->get_all_samples();
+is( scalar @{$samples}, 1, 'Get one sample' );
+$analysis->add_sample($sample);
+is( scalar @{$samples}, 2, 'Get two samples' );
+throws_ok { $analysis->add_sample($sample_diff) } qr/use different reference/ms,
+  'Different reference for sample';
+throws_ok { $analysis->add_sample() } qr/No sample specified/ms,
+  'No sample specified';
+throws_ok { $analysis->add_sample('invalid') } qr/Class of sample/ms,
+  'Invalid sample';
+
+# Test sequences and chunks after adding samples
+$sequences = $analysis->get_all_sequences();
+is( scalar @{$sequences}, 5, '5 sequences' );
+$chunks = $analysis->get_all_chunks();
+ok( scalar @{$chunks} > 0, 'Chunks' );
+
+# Count sequence in chunks
+my $sequence_total = 0;
+foreach my $chunk ( @{$chunks} ) {
+    $sequence_total += scalar @{$chunk};
+}
+is( $sequence_total, 5, '5 sequences in chunks' );
+
+# Recalculate chunks so one sequence per chunk
+$analysis->set_chunk_total(10000);
+$chunks = $analysis->get_all_chunks();
+is( scalar @{$chunks}, 5, '5 chunks' );
+
+# Recalculate chunks so 5/3 sequences per chunk on average
+$analysis->set_chunk_total(3);
+$chunks = $analysis->get_all_chunks();
+is( scalar @{$chunks}, 3, '3 chunks' );
+
+# Count sequence in chunks
+$sequence_total = 0;
+foreach my $chunk ( @{$chunks} ) {
+    $sequence_total += scalar @{$chunk};
+}
+is( $sequence_total, 5, '5 sequences in chunks' );
+
+# Test test chunk attribute
+is( $analysis->test_chunk,        undef, 'Get test chunk' );
+is( $analysis->set_test_chunk(1), undef, 'Set test chunk' );
+is( $analysis->test_chunk,        1,     'Get new test chunk' );
+$chunks = $analysis->get_all_chunks();
+is( scalar @{$chunks},            1,     '1 chunk' );
+is( $analysis->set_test_chunk(4), undef, 'Set test chunk' );
+$chunks = $analysis->get_all_chunks();
+is( scalar @{$chunks}, 3, '3 chunks' );
+
+# Test constructing from YAML
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis12.yaml');
+isa_ok( $analysis, 'DETCT::Analysis' );
+$samples = $analysis->get_all_samples();
+is( scalar @{$samples}, 2, 'Get two YAML samples' );
+throws_ok { $analysis = DETCT::Analysis->new_from_yaml('nonexistent.yaml') }
+qr/does not exist or cannot be read/ms, 'Missing YAML file';
+
+# Test validating analysis
+throws_ok {
+    $analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis13.yaml');
+}
+qr/use different reference/ms, 'Different reference';
+
+# Test summary info
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis1122.yaml');
+my @bam_files = $analysis->list_all_bam_files();
+is( scalar @bam_files, 2, '2 BAM files' );
+is( $bam_files[0], 't/data/test1.bam', 'Got BAM file' );
+my @tags = $analysis->list_all_tags_by_bam_file('t/data/test1.bam');
+is( scalar @tags, 2,            '2 tags' );
+is( $tags[0],     'NNNNBAGAAG', 'Got tag' );
+
+my $seq;
+
+# Set FASTA index
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis12.yaml');
+throws_ok { $analysis->set_fasta_index(); } qr/No FASTA index specified/ms,
+  'No FASTA index';
+throws_ok { $analysis->set_fasta_index('invalid'); } qr/Class of FASTA index/ms,
+  'Invalid FASTA index';
+
+# Set Ensembl slice adaptor
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis12.yaml');
+throws_ok { $analysis->set_slice_adaptor(); }
+qr/No Ensembl slice adaptor specified/ms, 'No slice adaptor';
+throws_ok { $analysis->set_slice_adaptor('invalid'); }
+qr/Class of Ensembl slice adaptor/ms, 'Invalid slice adaptor';
+
+# Get subsequence with missing parameters
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis12.yaml');
+throws_ok { $analysis->get_subsequence(); } qr/No sequence name specified/ms,
+  'No sequence name';
+throws_ok { $analysis->get_subsequence('1'); }
+qr/No sequence start specified/ms, 'No sequence start';
+throws_ok { $analysis->get_subsequence( '1', 1 ); }
+qr/No sequence end specified/ms, 'No sequence end';
+throws_ok { $analysis->get_subsequence( '1', 1, 10 ); }
+qr/No sequence strand specified/ms, 'No sequence strand';
+
+# Check getting sequence from test FASTA file
+# First 10 bp of chromosome 1 should be CCAGGCGCGG according to:
+
+=for comment
+head -2 t/data/test12.fa
+=cut
+
+$analysis = DETCT::Analysis->new_from_yaml('t/data/test_analysis12.yaml');
+$seq = $analysis->get_subsequence( '1', 1, 10, 1 );
+is( length $seq, 10,           'FASTA subsequence length' );
+is( $seq,        'CCAGGCGCGG', 'FASTA subsequence' );
+$seq = $analysis->get_subsequence( '1', 1, 10, -1 );
+is( length $seq, 10,           'FASTA reverse complement subsequence length' );
+is( $seq,        'CCGCGCCTGG', 'FASTA reverse complement subsequence' );
+
+# Check getting subsequence outside size of sequence
+$seq = $analysis->get_subsequence( '1', -1, 10, 1 );
+is( length $seq, 10,           'Negative start FASTA subsequence length' );
+is( $seq,        'CCAGGCGCGG', 'Negative start FASTA subsequence' );
+$seq = $analysis->get_subsequence( '1', -1, -1, 1 );
+is( length $seq, 1,   'Negative start and end FASTA subsequence length' );
+is( $seq,        'C', 'Negative start and end FASTA subsequence' );
+$seq = $analysis->get_subsequence( '1', 1_000_000_001, 1_000_000_010, 1 );
+is( length $seq, 0,  'Large start and end FASTA subsequence length' );
+is( $seq,        '', 'Large start and end FASTA subsequence' );
+
+# Check getting sequence from Ensembl database
+# First 10 bp of chromosome 1 should be TTCTTCTGGG according to:
+# http://www.ensembl.org/Danio_rerio/Location/View?r=1%3A1-10
+SKIP: {
+    skip 'Ensembl not reachable', 4 if !$is_ensembl_reachable;
+
+    $analysis = DETCT::Analysis->new(
+        {
+            name               => 'zmp_ph1',
+            read1_length       => 30,
+            read2_length       => 54,
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            peak_buffer_width  => 100,
+            hmm_sig_level      => 0.001,
+            hmm_binary         => 'bin/quince_chiphmmnew',
+            r_binary           => 'R',
+            deseq_script       => 'script/run_deseq.R',
+            output_sig_level   => 0.05,
+            chunk_total        => 20,
+            ensembl_species    => 'danio_rerio',
+        }
+    );
+    $seq = $analysis->get_subsequence( '1', 1, 10, 1 );
+    is( length $seq, 10,           'Ensembl subsequence length' );
+    is( $seq,        'TTCTTCTGGG', 'Ensembl subsequence' );
+    $seq = $analysis->get_subsequence( '1', 1, 10, -1 );
+    is( length $seq, 10, 'Ensembl reverse complement subsequence length' );
+    is( $seq, 'CCCAGAAGAA', 'Ensembl reverse complement subsequence' );
+}
+
+# Check getting sequence without FASTA file or Ensembl database
+$analysis = DETCT::Analysis->new(
+    {
+        name               => 'zmp_ph1',
+        read1_length       => 30,
+        read2_length       => 54,
+        mismatch_threshold => 2,
+        bin_size           => 100,
+        peak_buffer_width  => 100,
+        hmm_sig_level      => 0.001,
+        hmm_binary         => 'bin/quince_chiphmmnew',
+        r_binary           => 'R',
+        deseq_script       => 'script/run_deseq.R',
+        output_sig_level   => 0.05,
+        chunk_total        => 20,
+    }
+);
+throws_ok { $analysis->get_subsequence( '1', 1, 10, 1 ); }
+qr/No reference FASTA or Ensembl database/ms, 'No FASTA or Ensembl';
+
+# Check getting sequence from Ensembl database with explicit connection
+SKIP: {
+    skip 'Ensembl not reachable', 2 if !$is_ensembl_reachable;
+
+    $analysis = DETCT::Analysis->new(
+        {
+            name               => 'zmp_ph1',
+            read1_length       => 30,
+            read2_length       => 54,
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            peak_buffer_width  => 100,
+            hmm_sig_level      => 0.001,
+            hmm_binary         => 'bin/quince_chiphmmnew',
+            r_binary           => 'R',
+            deseq_script       => 'script/run_deseq.R',
+            output_sig_level   => 0.05,
+            chunk_total        => 20,
+            ensembl_host       => 'ensembldb.ensembl.org',
+            ensembl_port       => 5306,
+            ensembl_user       => 'anonymous',
+            ensembl_pass       => '',
+            ensembl_species    => 'danio_rerio',
+        }
+    );
+    $seq = $analysis->get_subsequence( '1', 1, 10, 1 );
+    is( length $seq, 10,           'Ensembl subsequence length' );
+    is( $seq,        'TTCTTCTGGG', 'Ensembl subsequence' );
+}
+
+# Check getting sequence from specific Ensembl database
+# Get database name via:
+
+=for comment
+mysql -u anonymous -h ensembldb.ensembl.org -P 5306 -Bse \
+"SHOW DATABASES LIKE 'danio_rerio_core\_%'" | sort | tail -1
+=cut
+
+SKIP: {
+    skip 'Ensembl not reachable', 2 if !$is_ensembl_reachable;
+
+    $analysis = DETCT::Analysis->new(
+        {
+            name               => 'zmp_ph1',
+            read1_length       => 30,
+            read2_length       => 54,
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            peak_buffer_width  => 100,
+            hmm_sig_level      => 0.001,
+            hmm_binary         => 'bin/quince_chiphmmnew',
+            r_binary           => 'R',
+            deseq_script       => 'script/run_deseq.R',
+            output_sig_level   => 0.05,
+            chunk_total        => 20,
+            ensembl_host       => 'ensembldb.ensembl.org',
+            ensembl_port       => 5306,
+            ensembl_user       => 'anonymous',
+            ensembl_pass       => '',
+            ensembl_name       => 'danio_rerio_core_69_9',
+        }
+    );
+    $seq = $analysis->get_subsequence( '1', 1, 10, 1 );
+    is( length $seq, 10,           'Ensembl subsequence length' );
+    is( $seq,        'TTCTTCTGGG', 'Ensembl subsequence' );
+}
+
+# Check getting subsequence outside size of sequence
+SKIP: {
+    skip 'Ensembl not reachable', 6 if !$is_ensembl_reachable;
+
+    $seq = $analysis->get_subsequence( '1', -1, 10, 1 );
+    is( length $seq, 10, 'Negative start Ensembl subsequence length' );
+    is( $seq, 'TTCTTCTGGG', 'Negative start Ensembl subsequence' );
+    $seq = $analysis->get_subsequence( '1', -1, -1, 1 );
+    is( length $seq, 1,   'Negative start and end Ensembl subsequence length' );
+    is( $seq,        'T', 'Negative start and end Ensembl subsequence' );
+    $seq = $analysis->get_subsequence( '1', 1_000_000_001, 1_000_000_010, 1 );
+    is( length $seq, 10, 'Large start and end Ensembl subsequence length' );
+    is( $seq, 'NNNNNNNNNN', 'Large start and end Ensembl subsequence' );
+}
+
+# Check if Ensembl is reachable
+sub is_ensembl_reachable {
+    my $handle = IO::Socket::INET->new(
+        PeerAddr => 'ensembldb.ensembl.org:5306',
+        Timeout  => 1,
+        Proto    => 'tcp',
+    );
+
+    if ( defined $handle && $handle ) {
+        $handle->close();
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
diff --git a/t/data/test1.bam b/t/data/test1.bam
new file mode 100644
index 0000000..9866c53
Binary files /dev/null and b/t/data/test1.bam differ
diff --git a/t/data/test1.bam.bai b/t/data/test1.bam.bai
new file mode 100644
index 0000000..0177392
Binary files /dev/null and b/t/data/test1.bam.bai differ
diff --git a/t/data/test12.fa b/t/data/test12.fa
new file mode 100644
index 0000000..b36ef7e
--- /dev/null
+++ b/t/data/test12.fa
@@ -0,0 +1,400 @@
+>1
+ccAGGcgcggaAcGTcaGGGcGCttAAcgaAcaCattCAGTcatTtcccatcTTGTcaTaCtAaAGATTCtAGATCAcga
+CcttCttCcGtctGAGGCcggcCGcAAataGGtgCGggcgCacTaagcTcCAtACCCGttctcgtgcGGCActaCAtgAg
+ATCCaAcCgCGTCcAAGTAgAGAcCTgaCGTATgGaTatTCttAacATAcCtAtGaatAttaaTaATtaAGgaACCcTgG
+ttTaTctcCtACTtCGCtCgCGTTGGctATgGGTGAggCcTagTGagatTaAtGgTtTAgGgcgTccCtGCAgAgCTGGg
+GCcCaTGCcGaTgcTgaCgATgAgAtGaaAGCGaATgGCGCGcAAtTCcAcAAaTAgGCtGatgTgCcTAAcccCgtAcg
+GaaGCtGgtcCTcTTTaGctGtCGCacTacTCacCAggCTggcTATtcaagAcCgTaTGttcTGcGtgcGATcGatCtaG
+agtgAGCttCCtCTtTAcCacACgCgGacagaCtgGGTAAatAGcTAcGtAACtGagAtAtgCgTgcATaggaTCtGCgC
+agtACGtggtagGaGaGaTACAGttTccTACTTGAATCatACgcgAGCGatCGGcCGTACAcGgTtgtgcGAcTatgCCa
+gaCCATgaGCTCCCtGAgCcCCtcaTGGTAaAgAtCCgctcAGGTataAgCttACgCaGAGacCctgacaggtAAcGgaG
+ctcTAaAAaCTACTgCcCTCGgCTAcAccTatCGTgAaTcaAGAaTGAaaacAaAagCaaGgcgGCCGTaATATTGCGGg
+GTATgTagGTAcGGtGgcaTcTgTTCCAaaCCacCgcTgtCggcTGTaTgAAgaaTcGGAaTaTGAgtAGctgTgGaTaC
+ggcATcgTacTGcTTgcTtGgAGCCGtCCggAATGGCCAcAgCcaCctCTGcGccaggTGtcTaTatgCAccGgtTatcg
+CCTagGctCaCcgCgGAaGGGtTTTGcagcGaCcTTtgcccAgTcgctCATGaTCggtgtGTGACTCAcCCtTatGCacg
+gatagatAcaTGATtaaGtcccgcGtcatAGCGTAggaTggAcGGgAgcTtaCgACCaGGTCGCtGTAgCaGCATTTaCT
+gccgCgAtAAcAcGGATtTtTATaCTGgGAgCcaGCcAgaCggCtGcgGggagTGCctGtcTAgTGAGAtAcaAgtGttg
+ACcTgGCTGagAagtgcctAAAAtCCcgatTCAcCGGtcgcaaCTatAtTGgCTTactgCActtaCTctaAtGGGgcATt
+AcACcTTTTgTtCtAtTCccTgctcTCtCctGtatAaCtactTtacgtTtGcGatAgAGAttCccgtaaGCcgcTGCtcC
+gACtCCcgCcTGaTgaAcAATgTcAGtaTTGGcaggtaaTCcCCGaaaGtgCTcaaggacggCCatatGagaaaAgGTaA
+caAATCCGtatCcCtCTaaGTCgcAgtgggCccCcTAAATAcgCgAAAAaCAgaaAAGtTCCcgCGaTAacTCACtctaT
+GgcgATGCttggAGCGAAgGACCCCattCCAGcCGtAcaTcGcGCAgAgggAaGAcCCgggAgtcgcTCAtgccCtCTgG
+TtAagAcaCTcTCtTccttTCCAAatAACTCtgaaCTGacagCGTaagGttacCtacGTGtCCaACGTGAcATCagagTt
+aacctAGatTAcGcTTGGaaGcAGgGCCtttccCCcGAaAGccTacTcgtGAcGcGTATAggATgACAtAtTtaAaaaCa
+GcaCAccAcGTTCtcttgtaAgTTTGCCagTgtTGAtTtCTCgGTtAcCgGtgAcgcCAccActaAcGAtGgcaccgcGG
+TGaAaGaGtaTCcgTgGgCAgAGAGtAggTTTTaTCaAtgAgcGtaAccCCcgCCTcGGTctattGaAacaTCCCCcGAc
+cCgCCAtggagccCcAGcCaaGtACGcgTAcACTGgAaGAGtACGgTGgAaTaCaTGAgTTaaGTacttcGtgGcTccTt
+CGCacgCCaTGacatgAcTTAtgatcGGaAgGGCTaCGgTtATtCagGtaGggtTGTtTGaatcgGaATTCGGCgtTccc
+GccgcgcgcAGTgCCGCcgcAaGTCgGGcaagTTTaagACTGCAtGcGggAcCTTgaaaGtctGGttatTAAAaTaacCA
+cTcCccccTCCcCCGAaaAATCTgTGgAaGACCcCAcaAggtaTgGActgGtCGggGGgaGgaTAtcaAcacAttaTCaC
+AgGGTtcAAAtcGTcggTGCtcCaTAcACtgGGGTGgTtagcaaccgtGcgGTCCCtcgtAagcTTTaccgcTAgCcgTT
+tgtTCcggaACcgcTcaCCGCtCgTttAAGgactggAaAaCAgTgCgAGtgAtaGgCaaGttcccTgTacaTTAgAaGTa
+GAAtaagTCTTGCaTtaGGgTGtGgGCaGcacgAtgcggAaTcTAcGgacCctAgcatggCcTgTCAaAcAAaGgcgaTG
+CCAtaTGAAAgTcCAaaCTGTATGGTctTTGAGGtCCAgTgGCaAcaGcAgcgaGACaAcCggCTaacTTcCcTgtAATg
+gaCggtATaaCgAGAtggGgAtAaCCCtcGCgCgcCAaTcGctTtttTgtCTaggatTaggTGcAaTtCCcGCCgtaCgT
+GtTTcCCGgAgATaCAAAtcGGatCcaagTGcAAGTaTttcACcCtCCCGcaaCCCgttAGtCtCgAgcGtCcaacgaAG
+TgCatCCctGAtTGtCCagCTAGtcaTCgTCGcGAtaCGcaACACCCccACTcCGAtgCTGaTtAccTTAcCGcgtGCat
+gTCtTTtcTTTCctGCGCGTtGaTATgATttAatCAacccgGtcccAGtGAAcCtaCTaCAtaCtgTCCTCGACtTtaca
+gTcaGAAAAgCTAtaTtaAgaaGCcAttcgaGtATGCcTaaGttAATgcGTcccagaATaggTCagCaAgttaGGAGatt
+tCAaCgCCGGacgCggccCaaTTTtgaATCtACACAagCtAgAatgCCCCTTggaTGGAacgccctAGtCtgtGaGgGgg
+ccCAaaACGGtTTcaCtTAccCcTCtCgaagCGtTtAAgTctcAatACggcaGgCGatCCCCgTCcTTCcGgGgtgtAAc
+CGtgGTaATAGaCcTCCGCATTtGCgcCAtTtgggCCtTtCtGtTaaAcaCAtTCtTaAggacAtcTATTTtcTGCatTC
+taTtGgCtgGgttaagCTactgtAgccTCtTTCGgctcACTCCCaTAttAcTCACcaGctTaTGATaAtTCGgTCTttaT
+aTtGaCcTGaTcctggctTtTTGACGTGctgTAgGtGTaaGTgTAttCATtcctgAcAACgTtAAaAggCCaTAGtGctt
+cacctcTCGtAtGATCgCtCaAatccgCGcGgtccGCcCacGGCAgaTTcgGcacacgcacatTTCAGgtttcGctcCAt
+tatTcAtcTTTgCaTgCGaGaaCAgaTcgAGCCgTCCcAGctCAtAtGtcgTcaTaACTTAtcACGCGTCAgTccCAaCa
+CGcatTagTcagCaGTGcacTTCaaACgctgGttAagcaGtCTCGAAaattaTacGAAAGGacAAttTtGCTaCagTcAT
+gCaTGtttgAaAaaaTcaGTacAttaAagatGcGtcaCttCTaCAAGGACgtTAgAaATctGtcGgAttGcTcCtcacTT
+cGtTgcaCaGCtCgGCgAgAgatcaTgTGCcAACgTTtggaccCAGtgggTTcGagtAtTGgattcctcGTGtgTtAaga
+AagAaTCAttGtTtTagGatCCgACCGtCCTaGACGtAgGaggattTgTAACCGTgaTCAcgCCattGtTTAcgTTggGt
+gtACCcgattctcCTtGTGgGAAgGctagcatAgcCGatGaCGGCgGGGtcgcCccatcAtacCTcAgTagccgCGtTac
+tCCtctTtcgAtGCTcGCcTGCaCTACAtGCCtgaTcCTaGaCCGaCACcgGTgatcTggCTACcggctAgCcAAGtaAt
+CGTcGatcaActtCgCttTTCCccGttCCcgtaATTTCaTAGTAGcAAAgtatcgacCtaGGcCtagatCgGGgCGGCcG
+tgCAtCaCTTcAttTttGAcagctTgtAaatCtaAcacCTcActTCcgttatAacGcGActaCgCagAACACGatcCtgg
+ATgccTaGttGTTTgGcaGtCtaGtaTgTGCtCtatGTtcgTcATtTtcCCggTttgCTttGTcGACccAccTGTAGCCG
+tCCcgCgtTaagTaccTGAcaGCCtcGTtattaTcaTGTCAaGTAtGTGctgaAGGttatgTTcGtgctTgtgccaCGCc
+aGTCTGgActTcTTAaGgcaGcTGGATTAaCTCcggttGAaATATGGTaAGgTaCGgCTaaaTTCGaTCCACcCGatccC
+ttAcGTgATctaGgaCtTtTgTTTCgTcctGCtgAAttCttatTtTcaCgTTtcGGCtGCCCttaAttTAcTGGgCgTaG
+ccTtaaaggACatCGTATcGGCcccATtGggacTGAccattCTccgaCggTATAtGGGctGGGcaCtgactGgGcaCCgA
+GTtAGagTcAcACAGTTCtACtTctaAccTgTaTtAaacgAttCaCCAcGGaGtagatCGCGCgaCAtaAgtCCcGgcCc
+AGAcgCgTTGTaAGAgATTactagcgcGacGggGggGcGcTatgACAGCgTTcAtGcCaTagGATGgTGtGCaCTatctG
+aTTTccCCAggtCCAgTAGAAGAgAcTTtCAgACggCTAtTCGGtcAAtGAaAtTtCCggtaGaTAtcAGCTGAgaAtcT
+GaCgacaaAgCCCttaaAtaaaaTCgtccTacaCGaTAgGGtgGttgatcTtCaTTCttTGgATATGCaGccgAaacGaC
+CCCctTgAGAGCttgCAcggcGTTtCggtgaGTtGacCcCGGgTCgtgcAaAGGgCCCgGAGCAaTgAgTtTATcctGgT
+gTTCGgCgctcGTcCaTcgTcCAagCgtCTAaGATcGCatcaCCTAtTcAcGAATTcGatcgAtTGCgTTaTATgtAGTC
+aGTttaCcGtcgtagAgaGAtCGTAacgtGGgtaATTCaGACtTtACTttTGttggTgagtGtccAcTtCaATaTTTTaa
+ACaTGGATGgATcaTACGaGAaaCtcCtaaaAaatTcGtgAgGTgaTatcTGGTGCgGaCTcacTacGTTCGaAGTAAtA
+aGGAGacAcCctATAtAaGTaCCagccctaGTtCtGctAgcaTcttATATAcaaCACTATtaaaTCAgACttatgcAaGc
+CGGtTaATGACGAgtaGTCccacaggGCAGaccaTtCTAggCtAaAgttATgtCactTtGTaGcAtAacgCCtCACtAGC
+TattACaGtcaATCCTGTgAGttttaaCcGggAtgTTCgacgaCGcGGcTTcatAtACAttCcgAtGACAtcgATTaAAg
+TGGgCGcTCTctattGtgAAtcAaCCATaCGgGcaCTATaCGcgCCggAATcctCcgGggACctGTTtGccGTcGTgtTG
+CAgTcCtgtgcaTTCAGAAtTttTTTTAtctCcTCACCgTagTTTcGCAagGtAtACgCTtGCcaaGtgACggacTaCac
+gTccAAGgGGAatacAgtaggTAgAAgACGCGTaCccactAGTgaaTTgTtgaGctcTcCcccTttgggaAGtggCGgac
+gctcCCttGACGatTTCgCTGCGAtCggCGGTttgatGgAgtcTaCTGtcacCCacgctTCgAAcaGaAtCgCcgTTtaC
+gATcACTTCAtttaacaccgcATAcAAActGTCgagaAaGtGAgTAgTCaaTAAtccgagAGgaTCATcTtaGcacATCa
+CaGGGAtAAAtTAAttGaaaCgCctgATcgcTCGgGtttTgaCGGctCCcCgataAGgCAgtaTaCaggtGAaCGaaGaG
+TaCCTgGgcGgtCcCgtGTcccCTCTtcagcggctTCtacTcgcGagAAcACGGaCAGtgcGGaTGCTAAaGatCAaccT
+aGaaAttgCGgtATGCcGGCAtcTcacctaCgACGgaTgtgTCCTaAATTAAaaTtaGgtcgCcAaGGtGCgtTGattCc
+ccGCGaTttaGAcGagGGTgCGTGcTTCCAAccAagcgCaGTaatTggTaATCCagTcTGaGtGcaTgcggCAaaAcGcT
+CCAactTCCatTcAgCtAaacTtGctgTcctTcaAGtctGTGTCgCcTaGGgTGctacgGGAGcgCCatGcTaGtAGCTC
+atTACGGGaaACaCCtcTttCGccgAGAcCaTTgACCgagtAAgtgttTcCAATCtCgAggGTaCCcCggCcTaaTGccc
+AtcTCttGaAGGgTCAcggtTgttAaaaAGCaATAGcaCCaggAAtATcCCgcAtACggaAgcCATtAaGGatGcAtCag
+CcGAtTGcTCATaggAtcAaGgCtCctgagcTGcCGTCtaAcgagTcGGtcctcAcTcTGgtcAgcAggtctcttCcCct
+aACaTAatGCggtTCgACCggtTatttAaCGaGGgtAtTtGCttCCTgacAcAcTCtGCaATGGAcCCagCgGtCTTggG
+cgCACtGtaCCTcAcACCatGgTaAGGcCcCTcCCcTGaGgGTAcTCgcggggAgctGtgcgaTGTTTTacTgctGTagc
+ggAAgagTGGcgGCCgcGGaaggtggtATTGCTTTtgTTtCaGtggGtcCcgaTCAgGaacACctcgGGCTAattCTGcC
+gaacGccctctcgGAGAGATtGGGcGtatgACCaGCcactgaAcgGTaTAGcTTtCAgACcATGTTCcCtGtccATgacA
+CTtCAcAGaAAAgCgCcGtatTCAcTatCGtggGaTAtATgactGGtgcgcTCaaCCAggTCgAatAgtGTtggAtCccg
+cCcCGagtTCTtTtttgCttttAtGGtgAGGAggaaCTatCTCcAGGagCGtGTGtgtTCaTGCgccACtATATaaaATT
+gAaaGtgAGtagccaaAaCGctTAtAaATtAgAaTACACtaAacGagTAGAtAtCTcAcgATCTgcGatTTgACaTtaCG
+AGgTaCaGctcTatAcGtccTTaTagcAtTaATCAcaAgcTCCgtAccACTtgtggTGcCACAggAacTGgtaGcaTgGC
+CGtctGtGAgCTGTCAaCacaAtgCTGTcACCAaCcggCagtcGcCcAcaTGcACcTCTcgCTGCcaTGcGTgaGcgACG
+ttgtTCTaAattaaaCAGAtAACTGcCaAcGCgtatTtcgcgcAGAaGATCTtaCAGgcctCAcTATGtgCgccGtCCCC
+GTACGTgtcAtGTTaATgtgtGgGTTtTCTCGaCAcGAcCatTgggGAcaGgcaGcaGTATGcatCAccCGtAggGCtcG
+AaGTgttaTtCgagAaAgACAcGatgatCgAattCGGtAttaGGgagcTtgAgcGAAcCATctAgCAttCCTaTGtAaGt
+TctgCcagGAgcCTAcgtAcCaGgCCCCaGCTcTaTGacGcaagGgggTctcTGtcgAAACgTctAGCcAGgagggGGTg
+cGtcgAgtACTTCCAAACTTCcAAGcatTcaTaCgctCTcccGGaTtAGCTTCcTGCgACcaGtaCaCAaCGGGCCCcAa
+TgaaGTagTTGgCTTcAcCaACaaCAAtAttGAatccgAgTggtCactTtgAacaCtACaggAaACGtAaTTTtTGTAca
+AACcCtgcacGTaaaaaTtcttTctTGTCcAaGtCcAcgCaGGcgagtTAGGtcAaCcGgGGAGcaAaTTaTAAcgAACG
+CGAActcggcCgGCaAtaagcGgAgACgagttAAGcTtttccgCgCgcTGCAActgGcGggAAcTtgcACTTacCTcCCA
+gGcAgGCACcAGaAaccGaACctTtgaaTGGAAtATACGCgaCtgCtGTCAtacgcCTTtCCGCataTAAtGGCCGaGTC
+AgTgAAtgtTaaTTCTagATAccGTaTgagGCaaTTaacCAataCatAgCGaaGGTaTCCatAGCCAcggTTcAgttTCa
+GacTGaacCcAcCGACTAGGAtAtAcAaGcCCAgCGTcACcTcGCctAcTcgCaTGtAtgcTTTATCgcACAaCTTTgTT
+actCtTGcTGtGcgCgtAACtctTTcCTtgTacggCGAGTAaaggGaaCcGtCgcTaAGACCcAaacGaGCCgccAgagg
+TTtgATCAcTtCTTtaAaaTtCGAAaGCACTCcAcAtAgGGGCcaatgCtGcCtctgcgctGCAgGcgGcaGGtgCTCCA
+cgtGaagcGctgGcaCTAtcAACgAgActtTtctTGcaCagATaCgcGcGcctTAcctaaAGccGaTGTTGgCctGttgA
+actActtcAaatGcaAcTGCATTCGtGAtTtctacccCcATGaGTACCcaGTTcCctggCTCaatgttcagtCAggGCat
+ctgACtTTtGtagTTGGCagctGgCtgTAAtCTTGgcGacCcGCACgCGtGTACGTTAtacggACATcCGAgaCGcTgtG
+ccGGCcCGGaTATTGgCCaACaTGAtCgacaGagcggGGtCgGGAAGtaGttGAcTAAcAtGaGcgTCCAGcacCCgtag
+CcgCtGcTGGctcCcCcGATtTccCGTcaAtttTtTcCCTGaaCtgAGacTCACTTaccTGgtCGcTGcaaaatAgTCcC
+aaTttATGCtaAagctGCtgAgCaTTcgAgacgatcaggaTtAcACcGGCgCTttCtCCggAAgggacTaGttatGgAcC
+AcatTTCtGtGctcTacATtaAtCTTcaccacAgCAtGaTGTTtcgaActCtttGTgAAAAAtAgCTcc
+>2
+CtcAGcATgTaaGacgtcgtgagCgACCcaTAAggaaTAtTcAgcATaAgGCtgccGAagAgCAtCTATttTTaGTACtt
+TatAggACCggCAGGtAcccTTgccaCctGGaCtTTAGACgaGagcCgttGTatTcActcTccaGAaAcTcttACtGtAA
+CTCcgtTAAAaagCACccgCaGCaacagCtCTgCAaGccCtgCTGGacgtaaAAacccGAATTACTcctgaCaggagcAc
+CTGTGtTTaTCCGtaGgTCttActgCaaAcgATCtAgcCagctcTGTCAcTAAgAacgCGCCacacGTgAagAGATgGcc
+ATCtTggtCCCcCctaCttGtgGGAACTACagtATTctCTAcCtcTATaagGaAaAGgcACaatGggACTatCcTGGagt
+AaTgcAaAGCaGTCaTtCCaACgTACggAgatAGctagcgCGGGGCgCcCAaGaGGGTTgAttATagcAtGGTCGgtAAt
+gtGCAcGgATtgaggGTgacGtGGACtCCAAGTtCgGCCtTcTcaACTGatcTTGACccGATTTGGTgGcgTgGTTaACC
+GgaTGGAGgCAaCGgaTccgGGaTaacacCaaaACGggCAAAGagTTgtattaAttCGggtGTtTcGcTACtTcACCGcg
+TcTgctAgtccTaaGgtAAgtCTTtATttgaGACCcCAgCcGgGgTTTcTAGgGCCCATTCgCTTgctgTCGGgCTgCGT
+aaCgCtAAagGagtgtCaCgAtatGTCATgtataaAGcGCaCGGGgcTCTaTTaTAATcgGggcACTaAgGTAaTCcaag
+AGcGCcgtgGcGtTCGTtaagACACtAAGAGTcCagtCaAGtCGaCTaatACATtaAAgGaACccCtgAtctCAGAagGa
+GactaGgAtgccGcCcaGcGagagcCGtTTCcCcatgATCCCcAggcTACGTgATaGGTGagtCgtACcggcGaGAAAGC
+gtggCTcGgCgttTggcggCtATTTaTtcttCtCgttgggCTTAGGACtaagAGgtCACtTGaGggCtTTtTACcttgAt
+tCgcAgAtATAGgacAATggcgACATtTtGGTtTaTGTGTAcCtAcTaatACcaCTtcgggtTaATgcTAcAgaacaGtg
+GTAcTAtAGTcCaAcaTGgGactGaCCCaTActGGcCagTGtttgATCtctTGacTttacaCtCaCAaACGtGTtgACTt
+AtTatCtGAGTCCatTgtCAGTaaaGTTgCcctACaCCCtgCtaaTtcggAaaGtccCgtcAccAcTTgTTcTGTtaTaC
+tATaTCacATAacATaGccaagTgtCTcTcTGCtTAGcGTgaTATTcGGgCcGAtgTgcccGTgGAAaGCtGAGCttaCT
+GtcACcGTAcAtTGCCGAgTTTTtGtCcGGcctAaataaccGaTtcAcctAaaAtcagTggaTCccGgaTggCAgCCCga
+ggGAcAACaTGgcgCcCtatCcGgcCCgGcgtCcaaAtCttCcTgtCGtgcaAggTATgGatcTAgcTcAtagTaaCCct
+taCGTgCCaGgCtgAGacGgGGcaaaAAtTggTcCaGTGccatgagCttgTtAaCTgACtcccCTcCCgAaGccGGGatt
+tgaAgaGacGgATcCTtgcTaAATTATtGTgGTGtTACgctgCGTGCCTGTcCTCGCGTtTAACTGcTAGcGTAttcCcG
+CtaAAgATGaGCgggcgCAAccCtCAcCATtaGCaCCTTgcGCtACTtcaCcaTAtGtaCtGcTgCTtcgacacTgAGAc
+ggCgtTagtgCaccggGaAgAcagggGcAgGGGCCggGCTGaTGgCtgGTGgtAtGTGgAGCaCgGGGtgAcGagAacGa
+gTcaaGgGtatCgAGaCtGGtagcctcAccgGGggGCAtcCtgtgATGcAtCatGAgAgaAGCTgcATtaAAAcGTcctg
+cGtCcaTaATAcgAAAgtTggAtTatGCaTcCttAgaGGAggATcAtgttcaGttAGatAAcTTgtAacGAcatTgtCgt
+gAattcgCgtgataCGtcggTctCCtAactgTaccGcgAcTCCTgaAccACcaaGcGCTGgatTTTAGGACTctGcaCCa
+AAaTTtTAgCgCatgaAgccACCgTTtCACtTGTCAccaagtTaGAtCaCTgttTaTccaaATGGcccCCAcCCgctggt
+tTGTtTaGaATtTctcaAcTaTGagACcCgaacacgctCaTCGaCgTATgcCGGtCctcGGggtGAGTggtAcAGTTagT
+caTtcTgAcaaCTcgcACtCTTTgaCACCTaGTgATCtaAaCtTCgcGAgAttgaCAagAacTaGtcTatATGaAacCCC
+TtTGGTgCTAggCAaaTCCcTgAgAgaTAgTCCataGaTTCGTATActtGtCaTatCGCaAgaaTAtTgAgcttaatGTc
+gaTGTgCCgGtgAtAtccgTCcTtcGaGCgcTaTtGTaGtagtcTGcTCgTCTACGtCTaaTTtGGGGCgtCttcTcaAA
+TgCaGaAggtGttTGacAGTtGTCttTAAGatGggaAaggCAcgtTAtcGCACaAtcGACcaAcaaAGCGTgCAccgTAC
+gaGgCgcAGcaGGGtATTtcCaAAcTAatACaAAGATGTtGtGCatGgGGTgTcAAgGgTCGcTGgtgGgGcgGtATCgt
+CcgcgcCcAGaCaAgtGaagttaagCgTTccTcCActgcTgGAGAAggtgccaTCTtGgtaAtAataTggaGcCTttATg
+aAActCTgGTACCgTtCCTTgtTttCGGattTCtaATGtTcCacCAactgGGTAaGcTGcGACGTcTtCaaGTACgcAGa
+GcGTGATcGGCaCtgtTACtTTcCgAGCcCCcAGtagAaACagcttgGatcgTAtCACctAACaAtCgGGGGTCGgcacc
+ActCTGTAGGaGgGAcACCGaAtaTgCaTtgCatcGTCTaagCCGaCatGcaaaAatTGtAacCATtcGAaGcAActcCG
+aAtAgCGaTGttaaGGCGaggagatCcaGtgTACaatgtAGtggtatggatACCttgtGatagatTAggtCtaGACcTgt
+CtttgGAaGCgtgcaTGTTaACTgATacTAaTGagAAggAgcAgCGGCAgCATCtTGTagCCaTcTcaAtcTTgctgggG
+CtTCAgCaACtGcctgTTTAacGGGcAAcaATCtcAAGtggGAaTGGgAtccCaAAgCAAgTTtcacCgcGaacaTGTAc
+TTgCCtgcGgcTaTtCCatCaAcgaaTGtTAACtgTtAGtgtCTgAaTcGCAcCAtaCaAcAgaTAtctcCAAGacGaaA
+ttTctaaCgCaCATaAcccGgCCTCtgGCgaGaGACcTcggCTCaCAACAtTtTgtaaTGCCcgagcTtaTActCccAAt
+CcAGCtACgGtCGAgTGaAaTTaagAcAaATagCaTatgTTGCCActaGcCtGTGatGGtgacAggAtAcatGTtTcAGA
+aTGCATtAtaCgGCTCCtaagCTTtGattAtcAtCgGATTCCcCattgtccgTcCCcGtgcgtGaGTTacaTtacAaTTa
+agTcaGCATTGacGatCaaAcGgAaTGAttgacgGCCCAGAcTTGCCCttGactCAACtcCCatgCatGCTcAGtGTagG
+ccCcCtatgGCtCgGaGAcatGggggTtATTtgCCtCTTGGGACgGaggCGACCtGaGAGCTCctccGtcCgaGTgtGAt
+TacCgccctaAgTctTtCgTgTCcaatAACgTcaGggtTcGatCTtCACctAtTggaataCgCtTaaGaAaccCgCaGaa
+GGTaGaGggcgttacaaTTAGaGgctACtGcTccCGcTtcgGggcatCtGctgcCAACAAGtcTgAcaacgtAtcACGAa
+GaTgATaaTtTTAGCGCcgAacaTGGGctGAaaccTGCgtcAgCtgGTagGaTtTgtGATAgcCcttccgctcAaagatT
+aGtaAacctatCCTGTagGCtgGCgGGcgaaATAaCcTaaaGaTttagattcaAGGgCcAtcGTgaTaatTGTGtgCctt
+cACcACACTtAAgGTcGgaaGcAatcgActCtCGgTTAGacGGatgcgcTagAAGagcCCCaTCGTaCGCTTaCaaGagT
+GAtTAGatGCCcGaTTAaAagggACCcAGTccattaCtATgTTaATTAGcgAtaGGAtCaAACagGtgAAGgcCcACtca
+agGTagagGatgAacCaTCTAAggAgCagAtGGCTtgGCttGTtGccAtgAAatGtaAcCAGTtgGCAacACGAcATTAA
+GgtCCaAgtacCAGCAGgcaTtgaGCCGggaCCaAGtcttacTTcttCtcgCCatCtcGACaAAACaCggcAcgacACTC
+AttcaTGAggttAaagCtaaagaCAGAAGaaCTAAgAGTGtAATTtTGgtAcGTttTAGTaCTgaCaTCCCAGGcCCAGa
+aatActAagTtatgATGtCCccGgCCTtatcAgGGCAtgGgCGACtAATgCTTAAGGTTCatCGtcaacATcCaactGgt
+TtATcgCgTGAcGAGCATGtAAaCAccTGCGccGtTaAaCcgCCttGAgaGTgcAgaGctgGGgGcCtAgtATCcTGcgA
+cgGgAaTaCgTAcctagAgTcGCGGgttcAgggcCgAgaTAgGgCgAtcAAgTCcAcGATCaCCgGCgatgTActtttAC
+CCCcCGGAAGCCAaCAcatgCtGGaCttCcagttAtCACaTtacggTGCGcttgaGTaACtCctgGtTTGaCaAcTTTaA
+gACTCACGtTTTtGagGgatTcCtAaGgCctCTGTactCTcaGGgcAGCAcCAaTcGGgTctAgcactTaggGaAtcaTa
+GttAtGCccAtagCTgagActggccGAAAcTggGtGATAgtaTGacTCAaCCCgAATCCccgaGagcgTGaatGAGcTaA
+AGgTaTatcAAgtGGGTaGGActGTaaTgtCTCtgAaCGagGcGgTGCAgCgttAcaGAagTtTGcGtCTCggAAactTa
+AAccaGTatGCATACGtgagaTgcGcCTctCCTCCgacAagTGcgaGAGGatattGCATAgTaGgaCgtgcCAAgTCagT
+TgTGgtcatcTCaATgCacaTGtTttAaCAacctctAagacgAagATtCgTaCGtCGttcTGTAagATtaatCCTCttTG
+cAtTACaccacGtcGgAAgCCGaGGaTCaTtTtgCCACcGgcACAcTCaaCCGAATGCTATataTTCgTtggatGGtTGg
+gcGgatAAGaaCAgggAactAtGAGaTTggActAGGCtTctTcCTgaAcgtCcAaTcGtCttCggatcCTtaCGACctCG
+AtcacAAGTAGCGTttccTgTgtTtGgtGtgTTAcaTGtaaCgCctCgtAcCgCaactTACcAAtCtTAtaggGTGtAcG
+AgatgCCGcACaTTtGtTGaAGAtGTgatTAccTTAgAtATACccgCtaGaGCcgCTAccCCgCGTATacctaTTcgcAC
+GtTgGCagccACTttGgAGttgCCGTTcatTTgAGCCAtttTTgttCAcATCgctCTaaacAaCGcAtCagAtCaGGGAA
+TacgAgtGGTgggGccAcgTcCAtGCcgTCTcCGgcaAacAcaGtTCtcgcAaATacGcCaATTCGACAcgcaaaTgatC
+GtccgttcGTAgACccgtCtaCTAgtcCgGGCTaGcGCccaATCCAACGCgaAGtCtacTgCtCgttgGACgGccaGgTT
+GgCTTcggGGGAGggaAGgGCGtGGcgtgGACAttTAgaAcGaTatcCgtTAtAgAactCCGcctaGAcTgAacGgttcg
+acAGggCaaCtCaacGTCTagAGAgtAACGgACATgcTGCTgaaCTGactgtAtAataGacCAgGaActtGTGGagCATt
+ACcAccaTGctATCtCTacTgTCttCTGTcAtgatcGGtttAaAtcTGGccAcCtgTccGccAATAaACCGGGtgCCTGt
+ctgAAGTttACCGtaAACaGaCTtAccctGCttaaaTacCagctagtTcagccataacgtGgATACtGAcTaCaAAaTgg
+TGaCACtgCTaCGGtaGGgAgaACcACCTgttGgGaGcCCACAaAagtAcgtcTCCGcggTTctaCGACGATTcctcGGa
+gaACcTaTtGacTGTgccgAcGGGAtaTTtTCCCtcAgCgTCgaTgaACTtcTtacttatgaCTcATcgGttCgTCGtga
+tacAaGGtgaAGCggAgcGTCacaGATcgAtTTTcCcctAaactctTTCtcCGCaCaagaCgcTcgTcGcATTTCcTTca
+aggaAgTtCcttCAaTGTCTATcCAcAGtGCtCCaAccgaGTccTTcAGcaCCgaatgTATcttTaCacccAaTaAcCgg
+tccCTtTtTttagAggccagaCGtttTTTGCtaGaTGtTcTCttTACACtcTTtcCaaTAgGCGtCtgagcTCaCCCcga
+aCAaGTtaagAcaCggTCtGgTataCtTgCTCactGTtGCcgtcGgGAaTAgAcAGcctCCTaAtTgaTtTTGGtccCCA
+tcAAaTTGCAcggaAaTactaTCGTcgGTGAGCaTCgAAtcTCAatCaCgGGccTgcGCGataCgcATAATccTCCGaTa
+AcGCAtaaACTGgCCgAaactCgtAACtCGAgTcacacCctCGAaGaTgAacATaGaTcACGTgAgGTgAcGTgaCcgCG
+GtCAAACGgGaTcgCaCCGAgCtcTTgaCCatcAgctgaGTtgAAgaagCaCtcTGgtCcTgCTagTTTaTccaGCtggg
+CttGAGCcGtaTCGAgCGaTTATcgCgctcgGGAGgCCAgCATCTgCGctaggagAcaATtgGgcGGgtTtAtgCgTCTC
+TCTACatTCacaaCagATTATcGCggACgCGttTcGGCtcacctAaTTtaTGcCaGAGTgagCcCaCCtAAtAgAcacaG
+GGGcGagaTGCgaatcagactTGCAGtctTTCgatcTatGTTcgtTtgAttgACcCaTCAGgcAcGagCaTCccgaaCCC
+TaTTGggAcacCTaTCgGaaTtgCGaCTTCtCGgGaaAtAccggCTAaTggCaATATGAtTAaaCCgcCtACaCaCtttG
+gaAgctTGaGtagCgtGCGattatcTCtgttgTgAgaCTctCCcgGAGGcgCtAATCgcactcTtCaAAtTCTggtAGTT
+gaaacGgaaGGaCctaCtcgAGATaAaacTaagcgtTCtTggCcgcTttAaGcatcCcgaAcGggcTcCgTGTagaTgAA
+ttGTAAtgAcccGGGcCTgaaaCCcCCTcaAGTaACttaTTTAcaAGatcaCATagcGGCGacCgctctcCcAtGATGgc
+gaGcGtggtTaTCCTgttgTaaATgcCgacATGaGCcAaCAgtcTAccgAacGgGcGGGgGaTggTtctgtCgtgGataC
+aggTTcAcgctAAAaGAaacaaGgTcACGCagGagaTCtcgATcCAggTGTgCaAaacAGCCcctaGccTaAgtcAaAgG
+AcAcCttCTtcttCCaGGATAGtAccAGATtCTAtatcATaAAaTgTCctgtcgcgcTAtAcAGcAcTtcgAgagGcGgT
+ataTGCtGTGttCggtcGcATAcATtacGtaGaCagTcATTGAAggcgCcGCcCtACtCgTgGgaGCcATAActggaGGT
+tcCAtTttTTtatTgGaAAAATAgGaCAGgtgGGcgaGataggGCtCcaTTGGaTcCgaAACAagAgATGgggcAGATCA
+CTTcaCaTGCaCgtcgAatcTaaAtTgtTTTtGAcAtACacTgCGTctctGttActCTCaTtcgtTaCACcGtAgtAttA
+ACAcATCAaCtCGGtggTAtaggaCCgcCtgAtcCcAtaAcGtTAGcagACaaAGggACTcTagacAgaCAgGatatTAT
+GttaAAccaTGgcCcaAtccATAcAtgctTgagagCTgtAtATGAgTTatGCAcCCtTaACCggATggtCAGAttcGtat
+CttcTgCAccggCCGCGaAgttCTGcataacCgAtCGG
+>3
+agCgctggTtTaGTCAATaGGcCccATcaCGcGgtatccgaAcaCcTTctACtTATTGcGACcCGtATgcccaGacgtGT
+TAgTATcgCgtTtGGTccAGGGACtCAgaAGtagCtTCCgTGGCtaATcTCgCGTTTtgcAGAaTAtGGGGTaTagaGAA
+cagccAcAATTCTGGATTtTtcTAaCtcgacctaAcacGgtaagggACagGacGcGccCTtTtTTCacgAcaGGCgcgAT
+gCTgCgcCgCAgCctGACAtCGGAATTTAAttAGAaGCtcCGtTgACgaAATTcTGGGCCagAcCgAcaaGcTcCGTgCA
+tGAgtCccCgACgAtctgttgcggagCTTTTaTaaggCcTGatTAAAtcGAttGtttTTggtAcTaGtCcaaaccAgGAt
+cGgtGGCgtTGAtTTCTaAGgAGAagCGtaAcgcCcGTggcAatcatCtcAtcCcCCGAAAccTtccAGAAGCaAcGatg
+TAaATTACtCTcgAgaagATCAGgcAagGGGGatTtgatCGgCGgGcCTGtccTTAaTActCGTatgTGgACCTTtcCAT
+gAgTTTgCcCgTccatcGgACCaGtAggGactgaTattgcaGAattTcAcTCtctgtgGTTCTAccgATTaaAcCgTaGt
+tcGTtaGttgTTggtTggTcGGAAAgtcTgCcTCCtAgTgcTcCCtcAgcTaTaaatGCgTGTcTtCTtCAATcAAaCCg
+aTgaCAGCGcgTGGATTTGTaGGgAggGGGTtgGTcGgaTtaACtcGAcGACCCAagActtTcCtCCTatAGttAcGaAa
+cTtGCGTgTacGtaccGCCAgaaGcacgTcCgTTAtgACGTCTGCgACAaTGAGtttatgATtAtAgcCcCtACaGCcgt
+AAtAGCagAGTcaaagGCATcAaaAaaACGcgtCATtAcgtcaCttATTCggaGcttGCTtattaCttCtTTgcGTgccA
+TTTCgTtttCgTaaGtccggcagAggaCTGGCCaaCTCtcACctTGGTtCtccCGcGtGCctaTGcGcAtGgaGgtCCcG
+tGtCGAcaggTCCgactaggTGGTaaAAtCAcCatggtAgcCGaaTtAgatttcCGACcTgTTtGaAgTCCTTgGGATGA
+agtcGatCtCCcTGGattGgtcAAcGACAaaGTaAtCgGtgacGgGgTgcCtcacGaCggCACAAgggGtCgactACCCC
+gTTCtCgCTgAGcAgaTTAgTCTCtAcgcaGgaaCAccccTCAGTtCcGtATcggcGCCgtATgTggtagGtaAGGCaCt
+aCGCTActgGCcGggTCGAtagacTctggcGAtaagGGggaccGCcCaTgGATTAGTaTAtgTCcTtTAgAGTcttCgaG
+cGcGAAACGggaAATCtTCAcTggAGaCaTttAtCGGccGagCtCGACCGGCCgcttAtCGCcTgCCgtCTTAgaGgAaa
+AGaGCtCGAgtgTtggcaAcTccAtCtggCATtaTTgTCgTaTTTCcGGctagTCCGTgtCgACCAgAcGaaatCccAgc
+GtgcTGGTaAGggCGtGAgtaTCGgtAACAaTGATATCAtggtcaaAtaAccggCtTTgCCTgaaggaaTgcGTtGTTtA
+tgCTTtcTGtTGcAttaGgAGaGGactATtcCGttGgcAcTGaACtGGgAacaGCTtctgcaccgaTACgTATtgCaGGA
+tgcgccTcggataTagGcAgCACgaGGACgCcCagACGAgAtgACTcgctgGTCcCTCCCcCAgctaACGaTTACcggCc
+gcCtGcGGCactaAacAgTCtGtACctccgGAgGcattataCtAGTaaggatTCcCATtAaAcCcCtTAaCTTCccCaGa
+CTCtggctAaTactctgCGAcatTTaGaaGcTttccaGGCTtcgCgCgcacgtcCTTAtCCgAacATTggttATTGtcaT
+gtgagACagtgGGaGTaaAGtGccatcgCtGaTActcATcagcGCgTAGGtaacccGTtaGtaataaaAGgGGAcGcCAT
+AcTtCTTGTTCCTcaGGTgCAgcCCgAACCGgctTcGtCAGGtTAGcCTaGgGcTGGggAtacTtGCGCaAgtcccGgat
+AtgCcAtTgGaGtCAcccAaCtTcGTATCtaCgCCCTCccCgGcACAcAAatCaGTgAAGgGgcgTcCcAACcaGcatGC
+ctCtTaTGTgATGacctGCggAAtGTGGACtTaGAtCgGgtCCggtCAgGtGtTCGCGaTAacTTAAaCcCgCCcttTcT
+acgGAaCAAGgTaagaGctTAAaTGtcTGgAtGAaAGgacCGCGcgAcGAAgcTgCAcCgCgCAATACtCCTTCtaGgCc
+AaGcGCGGGgcCtGTgtGcaTgACcAAaATgtGaGGTTGggcTcCTCgttTcaTccTACTagctctATGCcGcgATaGgc
+GCaagtcGgTTtTgtgTCTtaTTGAGccCaaTGgAAtCTTGtCAtCtgtTAtcGcgGggactAGGTtCgCaggtTtaAaC
+GCctccgcGAtgagCgtcCGTTCcATGAtaaGAgGCtAcTagaTaCGggTaGTCtCAaCGtACCaGttAgGcCTGtGGGC
+gGACgtcttAtTCgTtTGTCGCtGaGaGTtcTGaGGGAgaGGgCtGcatatTAtGcGgcTgcgcaCaGGaTtTgaACGCt
+tcGAtgCgTGTcGAgggccAcGTCTgCgtAaTaTGggTTgCATCtAAcGcAaCCaACtAgAgCCgcGtaGCtccatgAgc
+tGTGataTtccacAACaCGAttCTGCAtTCGGAGgcgAAacACTgttTCTGcTtGTCcacGaATgGcTaGGcAtTaTTAg
+aAGaCaTtatAGgACCcaCgcTcCagAcTtCctgaaTtTgcCtcCCcAcCAtcaaCaTAGgtAtCCCGtctTATggacTa
+tgGcGTGgGatGTcTgAtaGGcaaTcgtGcCAGAtCCAtTtTCcgcCcGtaCcgCCAcAGgGCAgTTATCcctgctaTgT
+ATtctgaGAcgaagCCCTGAtttgTTTCCacGAgGagAAACAtgCAggTGTcTaTcctCTCAATcaTCatattttCggCG
+aGttgACTaGcTTTTTTTGAGgGGAcTATgCaaAaaggGcCcAAATTAGacAcgAtgGCtAatTatCCcTaGagTaGTac
+GagAaGTtTGCgTactagaCTaCCGgTCcGACCCgAaTcGGctagAcGcGTtTTCtCcTaaGTgtGccgCcCTCtaaccg
+CcGcGtgtgGCTggTgGTatAggCgaAtAgCTgTAGtCgcgggcaGCcCCAACtGtTTCCAaTCGaGagtaaCTGcaGAG
+TGgTgAtTggtaCACgAttaCtgAtgGaTGCAgCAGATTCGaGTcTGGTctCgTgGcTgtacAGActCaGgCGACCatcg
+aaaGgTAaAgTtCCCctGCAGgtCtgGGtTcCTgtaCtTGgcGGGcaTcttGCGCgGcgtTAGcCgCCCttGATCAGtcg
+CtAtGtTaGGtGCgAggaGcacAagAagAgGTAATGtTTtcTGTtGCTCTAgaCCgAtGtcGgCtTCaCGaatTgCGgGt
+gtCCtAacGTTCaGgaaaGgaGgagacacGCTTgctTTatGaATcGcCAgGtaTTCCgCgaATgCttAtGTttgatcCgT
+GCCaccCACGcTaCGgcattTacgGGcGCcaCtTAgAaattgGcTCAcGTtcaaAcTACTTGCATtaCGATTTcGcAgaT
+TAtAACtTTaagTtagtgAGaTTATATCAttGCTgTgACAatcgAtCttcGgAACgaacTAAcTtaGttCatTgAttggt
+caTaaAGCAAcTGGcATGtAtcgACAAatCTcgTGAagcGcgAaGaAgAGgCaTcaaCTtAGatATCaCcCgaaGatGAt
+caAagAtgTCccCTaTaAtGTAGCtAGCCtgagtGgtGtGATGgTttgcggTtcTGcAtgATCGTTCgTTaGTTtaAGCA
+gACAGTTctAGtaaaCaatacTAATtccttagAtAtTCTGgCtagGgtctatgTagatgTagattGCCaattGAtTatga
+AccCgTaaTCaCaCgAGgTtTCGTTaCgtgcAttgtcAcctCTaTttgctTaGttGgGtaAAgagGGGgtcGaatcTATg
+TacatctaAcCGATTGgTgAGccTAgaTGgTAccAaaCCcCcAtAGTGGCtCTgcagACCgTtAGtTgTcTAttTattgg
+GgCCtaTgtcATaAcAtAtacataGtGaTtGgttgCgtcAgCACgAcgATgCAtACtaGACAGtggAAcTacTGtaCaTA
+TtCtaGTcgCGAgaacCCAGgaAgGAcaCTcTtTCaACGCgAgttCTaGgGTATTaTTccacAAtcAagtATAtAAaatt
+acAgCAgCGAATaGtaAGCAAGcAtaTaTtTtTGGTGGtggCatTAgTAatcaGgcTGTGTAGgtatTTCgAgGCAggaa
+GGGcCgctAgtCgAGTTActGTTcAAATCgAACTCttTCGGcGGCgcAtcTggtAaaaTgTacaAAcATggCaGGaCTcc
+CTAGaTTtAGagaAcaaGGAGgAgggCtAaacgGcGgGttTctgACccTaACaCGTtCacgaTCCttaTgGCaTgCcCAc
+atTACGtCGaCttGTCtacCTgAgATccaTtagCAATGGAAGCgtggagTATctaaGtgcCttatGtcttagTacGGGGC
+GAACctaaCtcacgacTcgaaGCAAtaAAtCCcTAGGAtatCGTgaAacATactTgcCgggGtaTaGCAGggAaGAtaac
+GGGGccGGaaAaggCTCggGTCGTCtGcGcCTgcTgtTaTAgTcCGaTgCTttGagggatGcCtCTCGagtAAtCgAacG
+gTtTAGtA
+>4
+GaAaTagATgGAATAAAgCTCtaCcctAGtatCgtCGAccAGgGtGAcTcCGTCATcTatGAGgATAgtTtGaTTcGGcG
+gggAGGgtgcaGGTCTCCcaaTCaTaTCCgtgGCatTtcTTGCTTtaAAaTAgCcTACctGCgAaatCCgcGCAGCgctT
+CcgGtCCaTtatAGGGttAGcCtAcAtgtCTGCGCtTccatAtGTCctgttCCttGgcggATcgcTAAACGCAGaAgACa
+aGaTCtcCgActgaGTagccggTACtCtGgttgTcccTtcATcgcAGcgAcCacAaatAGGttcAacAGacCaTCccGaC
+GCAcAGCtaCtaCCTgTACATAttgtTaATttCTcaaCtCCCGACGTtCcgaGCgtaCAggcctcgTACCaGgTCgCcCC
+CAggCAgTCCtCTtcgGATaTccCggTgggAcTcAgGACTCTtgctcaaCActaAtcaaATTTAgctgGAgGCgTaTCcc
+ataGaGTATtTATAcCacggAcgcGTTgtCcAcGtagacAatAcaATTcGAAaACATCCcTGCaAgGgattAtgcAGcCt
+TgGtcgtaGTgTgCaGCcaAtacgCgttGacatgcTagTTTaGCaAAtGcgGccGggCtATAgAGaaCTGtagCGATtTc
+gcTtTaGTgtcgaCgacACTCaCtcaTTaTcGCCGGGTTCgcagcCAcTtcAGtAgGTaCtATCCcaAAGACagGccccT
+gGCaCgTcAccCGatCGTCtTTCTTCGGgCGgGCAtGcGGActCGaTCAtCcCctaaCATAAGTtgcCTTGggccGtTag
+acAtGAcgCggGaGGATATgcggtCaAgaTggCtTGgCGaTGGgAagtcGATgctgGcACagGAcCTCcaTCAttCtgTT
+CgaTAtATTCgagAAAAcagACTtGtAAtcGCTtccgTtcGGGCtgCcaagctgcTAgCaaCcCGcCaTaaCggGaAAaG
+GTGgaggtTGtccaCaAacGAcaTtGaCCATCacTcGgGTttgagctcgGCGtCGcataTAccCgGACTCgTCACTTGGT
+acCtTCGGcCGgcTggCAaGTattACGGaGCacggcATATATtGtACcggAcccCtAtGatatACaActtcgGTataGAg
+GCtggTccGCTCAcTATcGagtagcGAgcCAAAgTcgCaGAgCcAGTctccCTTtTaAttTcACGtCggGcAaaAGTacC
+cGgcTGGGcGTgCTgAACAtAAttGtcAgCgcctGTACgaCaTtGATTAAggGCgtTATTtatTaCTGggcTATtATGtA
+CtAAtAAAggAgaaCagAcgTgCcCaAcgGTgttcGGgCaGgAAttActatAGaCaAgaAaCCcTCtAaAACGtgAgcGa
+AaTccgtAGcaTACATaTAtTaCaAGGGctGGaaGCAaaccGcGgATTcaTGtGCacAtCCGtcGaCgGcGAGcatAtcG
+ctctCCAAgATcgACacacCttggACgAgtaataATgTcAcGAAggGCGtgcTAaCTATGCAgTtgCgGTTaGgctATCT
+gCAaTTTtGCcaacCCtgagGTaCAGgCcGaaCGgcCaAgGCCTtTcAGCGCTgGttCaAcGcaCTccGTCGCcaAACac
+AGAAccTgATTggAtgaagGcGCtTgccAcCCCaaTtTtcgCaTttaGctacTtcCcGcaGTaTaCAaGTTggGtCCGgc
+TgCGcgtcgtcTaaaGCatAaCTGaaaAcCAgtCAaacGaTtcCTGTtatacTGacAcgaaATTGctGActaAGCaaATc
+cgaGCcagaAgGCtcagaCCCCgGaAacAacgTATTATTaAaGaAgAcGtTTcCAgCcTtGTttacCtTtTtATCcttGt
+aCAcgGTgTAGgaCgTCTCaAACgTgTttCCgTCATtAcatCCcCcGCcTtggCCaCttCATtGatAtGgAcTAAAAcCC
+AAagcaAAtgTGCGttGttCggggtTagtACgAccCTtGTccggGGcgTtagCCCcGGAgAaatgACCagAgGaAtActA
+CCgCCGcGatTacAAtGGgTctctTTcctGctcGaacCCCTtAgAcCgagtcaGtgTcccTCACtgCcgGaTTGcGtgGT
+TgtGcaATcaCtcatAAgTCAcCcGaAggcAcAtTGTatccgCAGAgTTGAacGaTAAtaCcggTggcAAcaCTAaTctC
+GctTAATGCatcgACcAgcCtagtTattgGAgCTGcgATATActAgCCtATagAGcCAGaCcccAgCAtACGGCGtgtac
+CGGGGGgGcgatGaGtcaCGaaAGaGcTagTctctTGGcCTgTcaggttgtggTTctcCttAgTTCGAGAGtactaaCga
+AAGaagtCCCccaccgGtgttccCTACcGcGTcTCttAGTGTTtAagactgCccACcgTTgagGGGCAAAttcTtgCagt
+ATTAtgGtAgaGTtAtataaGaATTcttgCCtgTACcatccTtgacTatTagttGgTaTggtcGcTCCCCTaCAtaactT
+tctATAcCgCgAAACTATTtACTCTATTcgCAGgTaaTGtcattAaAAaTAGCCgtacggGAgctccAGAGcTGcCGTat
+TCatttgtcGTaaTTGTtaTAaAatcgGaaaCtctcacCgtaAgacGcTGcccGACTTtTCTgACAacaaAtAAaGgGCT
+gcAGcGACAtcaCcCaGTCTCCCtTTaGtCcGTGGCGgtaAcGGCGAtCAAaCCAATttTGTgTgtTaCcATGATgaTTt
+CTatGaaTgCCtgtGagcgccTAcaTAggGtGATTTgaAAaCGACcTcgtctGGAtGtCcGcTtCcGaCtGaAcGtttCA
+ATGTgcTGgttttaAAGctaAcCAgtTtcCaaCgTttaGAAgCGTCccagTgtgaggTGCctCGcTgaaaACtTcAGCTA
+TGtcgCtTTatCcCAAaGcaccaGtacAtTAaAaatgGCGATGggcCGtGCCTgCGTcTTTAGagtCaCaGcgaAgaACg
+AaGAttaacgtagccgGgTtGTgCTcAtGggtGTgaCtCGcttcCGcActGcggCAgcCcAATctaAtCtCggtgCccga
+ACcGCaAAGcTtCAacGTCcAAgaacgTgGtTTttaCCAcTCCCGCgaCcgGTggaTGcACAgTGGtacTcCCCTgaCGc
+GgatcCtgtgaCgTataGagcGAACTgATGCTcATTaTctGtTTgTggCAaGGtCgAgtgATGGCGtGCCaTGAGCGGga
+TcaaacGaTgGcTatcTATTTCaaAttGaTaTactACcCACcGagagcACgCAAGcTgcgctcgTGaaACtaaTTtgAtT
+TcTGTgTtAGaaAAGgGCGtAACcAACGCCgaGCCTCcAcAtGaGtCaTtaGgGTaAAcTtaCAGGGaGtggActatCAT
+CcaaatcAAgGAacgATTgcattATtgAcCGaggAtTtgtgGAGaACCTgttCtgACCgtgCcCCgGAGTtcTgAaaTaT
+cGTgAcaGGttACaAGgcaAaCgaTaGctTTCtcttgCCCcaatAAcgAgcATttatCgctcActtGCGtcACAGTtagA
+CCtAcTGaggGGAGgccGAgtacCgCACCcAacGTAGCtTaCcCtgtTTCGTcCgtaGggTCgaCtCCaTGgCCtCccGt
+CtGtTaCgcGTCCGAGaTCcATAGGTAGgGccgAAgTGgcCGTtgTGgGAtgCgAgGCGgAGTTCcAGAggGCtAActca
+GgTATaaCaGgCCgcAGgctTtCcaGgAacCGcAcGACCcGgaGcGCgCAGTaCGgaaGgtaTGaCTtagGActCGaTGT
+TgTCAtaAGatGtCgggAATtcatgtGttccGcGCtCTTTaGtGGACCaATGAgCACTCtTTTAcCATTgGCTtgTtgCc
+ctTttcGCccCctatCcggAcATaGGcaACcggcataagCctGccCtcgACGgTTCgaTGCTTAtGAcACacgTgCAtAa
+gcTaTTAcccaggAtGTCGgaatGatTCAgAGACaTCAAgCATgCTGGCaGaCtGgGtcCagtAgcTtgtTGgcgtcgcA
+agacaCgCgctGTTTttCCTAgctttGgtgCAtGggtaacGtCgCCgCAGtCgtctcGaTaaggctaTtTccGgAaagAC
+CtAGGAGCaGCgagAcaTTAtctAgCCTcGTacGGTCtcCTATcGctGcCgCcCAtGtTGgaCgCTtAAacTCGAcCCCa
+TAaAAGaagggagaTCaaTCgATTcgTgaTTcCTGCccAcTCggacaGataGCcGTtTTgAcaTGTTCacaGCgAGAgtA
+CtCtgtATCtTTCAGCTGCtaaaAgTAcGcgGctgacccgTgcGAACcGcACAgtgAaCTgGGCCtaCctGcAAAATAGC
+CgAATCCTAggtCcAtGCtaaagGcgCcTTtatTTcActcCCGTTgacAgcgTccTCGccGCgTGgacCgactCacaacC
+GgAgAaAgGggAtGtAaCGgtCgcTcACcGccGgaccCCgCGgTgCTaaCGGTtGtTgatCgCAcTAGccTgGATtAaCT
+GgTGAcGaaaTCacatgGAtGGACgCAaGgtaCtaaCaGCGcTGcGataGgaaacGCGGCgggCtGgAcTACccGgCaCg
+gtAaGtCTgGcgCcCtgGCATACcGtActCGcaatcAgctGTTTGCgTcGCACGAGCgGTAcCgCaGAGTCTTtAgCTAg
+tagAcGAaGcCTACGtCcaTgAcgGgcAgGgagAGAGTCATGAtaCcgcGcagtTCatcTaccAcgcCCgCacGgCgcaG
+AAtAAagTtcGTGtGttCtGcGgcTgcAgTgggtaGgaGcaATGtatcgcaCcGcgCcgGAGCcCcTGGTACCTGtGAgT
+GACgttgCCgcCcAgGaGgcACTcAGTATCgcAGaattAgtcAAtcagGCgGcCtACTccaTccGcCtGgTATacATAgg
+catactCTACAGcggaAGGAgcctTGCGaGaGtAtcGTAaActTAtACagAAGTtcttTAGcCcgcCCTGacTgACtGcG
+ATattGtAGagTaAATgCaAGgGTAgtaaGGAgacGgactTcgCcCtgggTAcaAgccATCgccGtctGCaTGaCCtCGg
+CatTGttcActcCctaTGAaCaTtCaGacctGTCgaATtgcgTAgCcAGAaGatTCgTcCGttaacCAGAtcTAgActcg
+CgtCCTcgAgGatccgttaACTCTcCATgagGCAcAcGcGgCGGcAGTcCgTccgtaTTTGtgAttgCCCCACcgtGGcC
+GgaaTtcGatGttAccAcgccgcCATaTtGGCaTaCtAACGccccTGATCtAgtTAc
+>5
+gatATGGGTgGgAGatCgGttaAATgtCaTTccctCgccaGagTAaGGtCtTcAcggAAggGTTtTGTCGttGTtAgAtg
+tatGcAAaCcccgGAAgcacTCccGgTccTAaGcATcAAcgcgttgGatTcAttCaAtCtAtGTActTAttTACATtcAA
+AgTGtatCgaACCagaagATttCGTATCCAatCccATcGaacAGAtctCcgaGgGtcacATgGaCACCTTcTaCTGATca
+CaagtttAcTcCCAATcttgAGtcAtaAttCTaTcATACgcAaCAgGtcTCCgagTGacaACACggCatTgTAtACtaGg
+TgCagccGcgTGGGCaTtAtCaaACgAacCtgCTaGCGgcGaGgCcCagttaACgTTaaTATggtAAcGCgcacAGCaGG
+AcGtTttttgggggcTCtaTCGccGgTgTTGTCGgAAcAGaggcGcgTaTtctCAatAaaATaTTgcactatTaTgCaCC
+CtACaAATCTtCagAATtcaggTtAGaGgAttcgatatCtaaatTTCTgccaTgAGaggAAcgaCgCcGcAaGATgCGct
+TcATaGTggcCCcgtccGTaagcTATTTtGtAgCtTCgTCACCGGtAgcaTtgTaAGcCGaGtgTCATatGAcaaccCAC
+TGcgATgGgCTaggtaGcCgAcTGagGcGCacAgAGAGTcaAatcGGGaTTcCGcCCAccgCcAGTTAcgaaatATtCCA
+cttGGGCCAggCATCaaAgtGaCgGGccGGGCgAgtgAATGAtcagaAGcGacTCtCTccgcCtGgggAagTTgagcGaA
+cgcAGGgATataaTcgAAatctatgaCAtGcaAGggtgtcGGaTcaTTAtcCAATgtCGtcTAACtGGatggCCCtAGaC
+atCaAccCCgTCgtacgtgTgacGaGcattGaGCtTCTCgAtTgcGgcCGCTCgaTgCgCaacCGccGAggTcCtTcTga
+gcTCGTTAtaTttGctgTAcCTaTcAaCggCTggaTGGggGAcggAGtTCAcaCTaCGCTTcccGtTggAaTttTcGAcG
+gTtGaTgCtATGTttCGgGAaTcgggATtCGtCCAGagTAAcaAGCgACAGgTaACGaCgcAATtACagAccgCgaGTct
+tcgGCATaaaacAcgAGcttCGgGcTaaaTtcagGCACAAGcGActACGaAaTAcTaTAgAAaAAgGTcgGcTtcgcagC
+acTtcgGcTCCgAAAAAatTGacgGGgTaCtTAaGGACctcagtaccaTAgCTGTtACgAcCtcCCgCgCTATgCacGaT
+ActACaTgTAgGCcCgGacCGcTgGGtTACcagtttgcATtTGGatGGgcATGacgGCgcGgAtcAcAtggaTtTtTcTT
+aGGCcGACGcGtTaggaTGaGacCGTcGGAagTaCAAAtttccaTACctCgAtCGcGCCtTtATagCAAcaaaAAcTCgA
+CacTTtGGCtGcCgccCctcCTgCctCtttGgAggGATACtTcCAaTgtaattgCagtGGGTaCtaTTGaaGaTTgTGgc
+AtACCtCCcaCtgaTcAgaaGacctaGttttAaGAaTCGAtgacgaTGCTGtCGcgtcgAGTccACGcccAggtcGAgTC
+tcttATaaAagTagtTCTcgGGaCACgCcaAccCgActTATgGaTGtTATTTcgCAgGattCTCCtAcaGaaGAAccGgc
+AcgATagtCGTaTaaaCtaATtaaACaggATTaCtACccTccAaAACCACTctTGTTTCTcGGGActaagGcaGCgGTga
+aCtATtTCTTtCAGTaCGggCCgcaAAtGATCCgTgAAAaCccAgtTcCagTTAAcgAGACCcggCtCgCCTaGtTTtAG
+CGAcGGGTaAGtaAgATcGCgttATCtAacCtaAAaCgGTAAAGggttAATccgCcCtacagGTAaaCAACCTgcTgGTC
+cGaACgtgcTcCCcAaTAaCCTcCacTgCtcTAcCCTcTtCTGcGtCCacaCtTGgaACGtTtGGaGatgTAGCAgaaCG
+aAActGAcccttTcactggTgttgtAAGcCtGtTccCaTGaCgAAtCagctGgtTACCGgcAcGtaCGctCCTGaTAAgt
+cTcTATatGACTaggaACagAggAtGaGACgCtgTCaATCAGATtacGaCcTCCgtaTGCgtTATaaTcCtACgaCcTaG
+CgGtTTaaGctAtgtggaggtcAtcttGCaGCaACGATcGttTcTtCTtTctgaAGaccaAacGGaaACcCggcgaTAgT
+AtCtATatgtaCaGgTttcgGACTCaAAAgGaGAgTttTTTAcgCCCgATgCcagaGTCTAAaaGtCTGCaCCcacGaCT
+CCCTACttcAttgGAGAgCCTtAtgGAacgaGcATCCaTcGTgtTCtcCtCTTctcCcAAACCgCCagCGGttCcagtac
+tcTagCTAcgTgtTgCAgCAAttGTaggAacGaCAAACtTgCagCacAcaATcTGTTccgGgtGcaaACTTGTGTgACCG
+CGGCcAAtatTTCGgtGTaTaGGtcCCTgTGaaCaaGtAttgCGaTaaCaccGgttGATAaggggtGTCattagtTgTtG
+aaAAGAcAgGGCGgTGGTtGcCATAgGgCcgTcttgtCTggaCCttGTAAtAAGGctaagGaagtTgGtcgAcAtctCCA
+CAaGGcttTGaCatcaaAGGTtGcgaAgAcAcacCcTtcaCgcAtCgGgcTtGGgCcAcgCATtcCtGgTcAtaGtgGTG
+GaGaCAGcATagcAttaACaACAACGcgggACTgCtaAAAAgaggAcCcActTCATagaAAgGCAcGGcaaAAtatCgaa
+AtCCGTGtgCgcGCACgGAGATTGaAAcGCtTAgaGTTTACCtgaatTTAaaGGggaaTGcgGTtttcaCACGCggCtTA
+gccCtTTcAAAacTTcggtgTAaGGatctAgTTcTaATCAagCTtGggCAtcgTcCCCAagGgAggcCTaCaCACCctaa
+tTGCcTtaatTaGctACcTggAcaAtTGcTtagcgGcGGgctTcGcCGgttAAAcTTAAgGccCgCGaCtaTtcGAgttA
+GCTcaAgacAaGaGCTGtgtaTCCggaccGaCAtCGcggcccACtAtcGCaTcCATtTGGaAttCaCcAccAcCCgatAT
+TGcaGtgCAGaTaCGAGATattGgcGGTaGCcTTAcgCaCAaGGccgcCaCTcCGccaTgtGCCTCtgtcggCGtAgTAa
+TAGAagGtCGcGACAcTgAccACcTGCctGatTaTGtaCGgACAGGGTgaCccCGGtCGaGtgCAccCGtTtCGCTtaAg
+tCgGGttCTCcgGAtgcAttCTGagTaaAtgGgatCAgtccGgcTGtTCAgAaCATGttagtccTAACaccCGgAAactg
+ATcTttCCtcAaggGacCgGcTaGtCGcGgGatAcCtaaTcgAACCccctgaACccAtgGGaACACCgtcATatgAccCG
+cCtaTGcTcCagctcaCGAACgccTTcCAgctTtgCgTAAGTcttTaCtGtTtTgCGTGGAccgGAttTtGGcaTagCTa
+tgGgcgTcGgAtAGgCCCaGAgcggCgggCggCgaCGCGAcGCAtaTGAaTtaGacGtGtACtccGaGaTGcTGtGcCGa
+CtTACgCAcccgcTCCttaCTCtCAacacAgcCACctCTtatgTtaacgcTTGGtgtCccgtactCctgtTgtagAgCga
+ggACccGCatgctGctgCaAtATTAgGgGTTaGgggctgaTtTTGcaaACGGGagtGTGCCTAtGgTatGgCgcTGtGTg
+tgtagGGGtCGCGACTaaCgCCcGaCAaTAcTTtGccCgCctGtagGGCtggAaAgcGTCAtgagGaaGGTatTGCCAgA
+AGCCTgcGcTggtgcGTTttGATCGgaAgGgAccctgAaTgtTTGttCGttacTgctaCCTTtACGCTcatTtttTTcCg
+caTaCtTcGaAaacgatCtAAggtAGtaAgGcGTtAaTgtTTAGGCatGacgCgTCactCttgaGAacaaTCtTctAggC
+gCAgTGggCCcTTgGTAcCGGgAgAATTaggtggAtaTgCgAGcgAatGaAtttcAGcAtCgTAcgGaCcccAcGtCGCa
+CgTTtcctGGctgCtcCTTCAattaGAcTGGtcAcCgAaAgGACAaCacTgccTCTtttGgTcACaTcTgGcAAAgGttC
+cTtAGCcacCtAATcgaAAtctCGTtcCACaAAGTTGatCTAgtaGGgtaacAcaCtTaTgAggAAGaagtAGCCacgTG
+TgACTtggTatCCAgtgGCGaTgcGGggcCGGGtgCTGGCtTcgagtgtCaGaAgagaCCacGCtACGtCAcgAcGtcAa
+AtTCGtTCgcCActtaggGAACTgGaCgTAAacGtCTcGgggTATtTCGGAAAATcgccaGaAtcCAacAaTATAgGaTA
+TTCcGagCTTaTCCacTgtgGAgAgtTgGgAgGCCcTCacAcgTccATaATgATGACccGTCCGtGGgCCaCCtgtcgac
+gctgGGAaCtAtatgGAAcGGGGaTTGatActtCgGCgctTAcGAGacAaATTtTTtGttaTtcCggGTTcGgTTtcTtg
+gGtcATcgaGcTGctAgGatGCGtaTaCaTat
diff --git a/t/data/test12.fa.fai b/t/data/test12.fa.fai
new file mode 100644
index 0000000..93a5ab9
--- /dev/null
+++ b/t/data/test12.fa.fai
@@ -0,0 +1,5 @@
+1	8789	3	80	81
+2	7958	8905	80	81
+3	4808	16966	80	81
+4	5257	21838	80	81
+5	4592	27164	80	81
diff --git a/t/data/test1_1.fastq b/t/data/test1_1.fastq
new file mode 100644
index 0000000..d8e3b08
--- /dev/null
+++ b/t/data/test1_1.fastq
@@ -0,0 +1,400 @@
+@HS3_9090:7:1008:11310:354/1
+TGGCGGAGGCTTTTTTTTTTTTTTTACATACAACTGTCGTCATAATATGCGCGGCCTTGTCATCGCGGTGCCCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1438:2003:100013/1
+CAGGGCGCNNTTTTTTTTTTTTTTAGTCGCAACCCGGTGTGCGTGCGACTAGTTGTGCCACGTATGGGTCCCAGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1271:1905:86956/1
+GAGACATTGGAAAAAAAAAAAAAACTGGAGAGACACATCCCGAGCGGGCTAATCCCCGATACTGCCGCGGTTAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1305:791:83162/1
+AGATCGAGGCTTTTTTTTTTTTTTGGTGCCCGTCGTGCCAAACCGCGTCATTGAAAGTAAGTGGATATCGAGTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2746:14451:329/1
+TAGGTAGAAGTTTTTTTTTTTTTTCGAGAGAGTCATACCCGTGTGAAACCGATAGACTAATGAATTGATAAAATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2708:18759:118692/1
+GTGGGAGAAGTTTTTTTTTTTTTTGTCTGGCACTACACGAAAGTGAAAGCAACTTCTTCGAGAAGATCATTATAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1588:3468:141624/1
+GCATCAGAAGTTTTTTTTTTTTTTAGTGATATCGACCCTTACGATCCCCAAGATCATGATCCCGCTTTGCAATGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2921:15856:57939/1
+TCCTGAGAAGTTTTTTTTTTTTTTCCGCGTTAATGCCCAACTACATGCTACAAGGCTACATCTAAAATATGCAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1417:15923:95590/1
+GCATGAGATGTTTTTTTTTTTTTTTGACCTTTTGAATTAGAGGAATATATGCTCGCGACGTCCAGTGACGCACGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2102:10514:9268/1
+GCAACAGTAGTTTTTTTTTTTTTTGGGTAAGCTGCATGGCAACGGTCCGTTCTGGAACCACCTCGTTGGGTTACG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:54:3501:193579/1
+GACCAGCCTCAAAAAAAAAAAAAACGGTGCGCTGTGTCTGTTGCTGAGGTTACGCTGTACGCGGCAGTCCTCGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2888:15166:71020/1
+TTCTCTGAAGTTTTTTTTTTTTTTGTTAAATGGGGACACCCGGAAGTCCCTCACCATTCTTGGACATATAGTCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:782:4801:172498/1
+TCAGCGAGGCTTTTTTTTTTTTTTAATTGCGGTGAGGGCTCCAGATTTTGTATTCGGGACAAGAAACCATGTGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1840:12105:187126/1
+GTTGTCGTNNTTTTTTTTTTTTTTCAATGTGAAATATAGATTTGCTAACAAACTCTTCTGTTAACGTTTGGGTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2153:1390:90531/1
+GCCGGTTGNNTTTTTTTTTTTTTTGAAGTGTCCGCTCCGCCTAAGCGGTTACTGGTACCTCTAGGCCGTCCCGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2362:6530:45744/1
+AGGTCAGAAGTTTTTTTTTTTTTTCAGTAACCAAGCCTCGAGGAGTCGAACTAGATCGGCTAGGGCATTCGTCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:992:19260:109704/1
+AGCTGAGAAGTTTTTTTTTTTTTTTCGGCAATAAATCGGAGAGACTCATTTCTGAATTATTTTCGGTTTTAGTCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:70:16099:51055/1
+CTAGACTGNNTTTTTTTTTTTTTTTGAGCAAAGCGTATACAGTTAACAGTCAGCATAGCATAGAGGATCTTTCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1603:19029:119019/1
+ATGAAATTGAAAAAAAAAAAAAAAGCTCCGAAGAGTTTTTCATGCGCAGAGGGCTATCTGCTGAGTATGTCCTGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1872:3885:17311/1
+GATGACACNNTTTTTTTTTTTTTTTCCGGTCTTCGGGAGGTACAAAGCAGACTCTATATAACCTTGGCAGTCGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1547:15205:108796/1
+AGCTCACAAGTTTTTTTTTTTTTTCATGTGATTTTCTCTGACGGCCAGTGACACGGACTGAGGGGGCCACGTACC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1307:9506:116191/1
+ATGAACAGNNTTTTTTTTTTTTTTTAGGGCCATCGGCGAGACCACGAAGGGTTCCTCACACCCTTGGAGCTATTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1293:13399:104992/1
+TGCTCGAGGCTTTTTTTTTTTTTTAGCCTATCCGTTAGAGTACACGATGACTTGTGCTTTGAGAGCAGCGTTCAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2245:4790:19502/1
+TGAATCTANNTTTTTTTTTTTTTTCGACACCCCGAGGCGTTAGAGCTTTTGTTAGCAACCCGCCGTCGATCGGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1630:16736:9274/1
+AGGCGAGAAGTTTTTTTTTTTTTTCACGGTTTGAAGAGTGCGCAATCGCGGTATTGTTCAGTAAGAGAGTCGCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1548:4346:62294/1
+CCAACCAGGCTTTTTTTTTTTTTTGGTCACTCTTGTTTAATGACCCGCACCCCAATACAGTTCATGCCGACATAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:32:14349:156816/1
+CTGAGGAGGGTTTTTTTTTTTTTTGACGAAAATCCGCTCCAGTCAAGTCGCCCCCATTCCGTCATCTCACTGTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1322:16932:51209/1
+ATCCTAGAACTTTTTTTTTTTTTTTATTTAATTTCCGATAGGCCAAGATAGAACCGATTCGATCAAGGCCCGCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2657:15332:141945/1
+CCTCCGAGGGTTTTTTTTTTTTTTTAATGAAGATCTAGTTGGGCCGCAAGTAGGTATAACCTCCTTGCGGATCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:681:17025:20507/1
+GTCCGCTGTTAAAAAAAAAAAAAAGTCCCTCTTGTTACGTAATGACAATCTGGGAATGAGCGATGAGGGCGTGAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1904:5984:48273/1
+GAGTGAGAAGAAAAAAAAAAAAAATGCCATAACCCGGAGTCGGGCACTGGCGCTCACGGCTTCAACGCTCAGAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2002:6401:148580/1
+CACTTAGANNTTTTTTTTTTTTTTCACAAAATTCGTCGCTCGACTGCCTTATCTAGATAGGATCCCAAGGAGCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2051:6388:64396/1
+AGTATGTGGCTTTTTTTTTTTTTTGCTTAAAGCTTTGCCAGAAGACTGTGTTTAGCGTTAAGCGACTGCGTAAAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:333:16095:70445/1
+TGTTCAGANNTTTTTTTTTTTTTTAATTACTGCCGGCTAAACCCTATTGAGCGTGATACTCGGGTTATAACGCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:235:4347:163096/1
+CGTAGCTCNNTTTTTTTTTTTTTTCTCTAATTGTCTGAGAGCATTAAAGCCGAGCTGTTACTTGGTCGCTAGCCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2459:19347:74593/1
+ACCCGGCCNNTTTTTTTTTTTTTTATATGTAGCACTACCCTGCCTGGAGAGCTACAAAAGATGGTGTTCGGAGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:810:16180:96914/1
+ACCAGGTATAAAAAAAAAAAAAAATTCAATAGGAGGTACACACCAGCGTCACTAATGTATCAATTTGGGGCGCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1634:5631:85585/1
+CATGTAGAAGTTTTTTTTTTTTTTGTCCCCAATCCTGACAGAGCCGTATGCTTCGTGATCCGGCTTTGGGAGAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2962:11381:21217/1
+ATGTTGAGCCTTTTTTTTTTTTTTGATCTCCAGGGCCAAACACTGTTACCCAAACGTCACAGAGTCTCTTGAAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2633:13732:192447/1
+TCGGTGCTNNTTTTTTTTTTTTTTCAATTTACATATAACTGTCGATTGAGCCTCTAAGAAACTCTAATGGAGTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2087:17402:125788/1
+AGTCGGAGGCTTTTTTTTTTTTTTCGGTCGCCAGCGCTGCTGGTGTTATCCGAGGGTCTGCAGTTACCTTTCTGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:781:9732:169661/1
+TACTCAGAAGTTTTTTTTTTTTTTTACGTCATCACCCGTAGGCGTATACTGACAATGGCCACAAGCAACCCGGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1662:16108:137898/1
+GAGCTGAGGGTTTTTTTTTTTTTTCCTATACTTCGGACTATGTTTACACCTCATTATCAGGGACACCGTTCGTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:919:19941:114306/1
+CACAAAATNNTTTTTTTTTTTTTTGGAGCAAAAGCAACGGGTCGTGAACTGTACGATTCTTTTTTATGTGATGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2754:8750:58584/1
+TCAAGTACNNTTTTTTTTTTTTTTACGTATCCGCAAGTCTTCTCTATGGATACGTGCCGGGCCTGATTACCTCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:336:1521:165232/1
+CTTTGGAGGCTTTTTTTTTTTTTTCGAAACGTGCCTGGCTCGTTCATCCCATACAGGAACACACGTCTACTATCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1355:15756:164948/1
+TATTCTAANNTTTTTTTTTTTTTTCACGCTCACGTCTTAGTTATTTGTTCGCGCCGCGTCTATAACCAACGGATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2231:4273:60186/1
+GTCCTAGAACTTTTTTTTTTTTTTCGCAATTACCATTCCCATACATGACGAGCATAGCAAACGCCTCAGATTGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2316:6407:47755/1
+CAAGTGTGGCAAAAAAAAAAAAAAAACGGGGGGGGGGGCTGACTTCTTTAAACTTACCGGGGTACACCAGCCCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1180:9151:105392/1
+ACTTGGAGGCTTTTTTTTTTTTTTTTGACTATGCCCTGCTCCTTACCTTGCCATTTTCTCGTACTCGCGATCATT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:316:19348:39980/1
+ATGGATAGNNTTTTTTTTTTTTTTACCGACCACATTTAGGACCTCGCTATGTCCCTAGCTTGCGATATTGCCTAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2293:5852:41801/1
+CTCCGGTGNNTTTTTTTTTTTTTTTGCGCATTGCCAAGGGAGCCATGCAAATGGGCCAGGAAAGACGCTGGTACC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:194:2629:116686/1
+GACGCCGTNNTTTTTTTTTTTTTTTATATTATCCGCCAAAACGAGGCATCCACGAAGAACGTCGCATGTATGCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:504:19694:38065/1
+CGATGGAGGCAAAAAAAAAAAAAATTCTACGGGGGACTATCGGTCAGTATATATACATGGGTCGATCCTGGGCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:40:16690:103457/1
+ACGTCAGTAGTTTTTTTTTTTTTTACTCGCGCCGTAGAATATACGCTTAAACGTCCGATTGCAATTAATGTACAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2665:14543:75970/1
+TTACAAGAAGTTTTTTTTTTTTTTACGACGAACTATGATTAGAATCTGGCTAGGGAGATGATATTCATCCGGCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:238:8472:192625/1
+AATACAGAAGTTTTTTTTTTTTTTTGGCCTGAATCACAGCGTTGCGTAGTGTTGCCTAACCTCTTATGACAGGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:457:4583:118080/1
+TATCTCGANNTTTTTTTTTTTTTTCTATATCAATAGCCAATTGCTCTGTCGAGGCCTTCCGTTAGCATCAGTGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2124:7309:107717/1
+TTGGTTGTNNTTTTTTTTTTTTTTCTTCATATAACTTAAGCCTATCGTGCCCGATATCGGTATGATAGCTCCCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2793:15293:1228/1
+ACTCCGAGGGTTTTTTTTTTTTTTGCTGCTTTTCGGTAACGATCAACCTGCACGTGAAAATATATCGGTTGGAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1021:19152:59391/1
+CTCGTAGATGTTTTTTTTTTTTTTACAGTGTTATAGACTGTAGTTTATCTTGTACCCGGATAGCCTTATGGGCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1834:14761:60846/1
+ACCACGAGGCTTTTTTTTTTTTTTGAAGACCCCAGCACGGGTCCGTAGGGCTTGTAGCAACAGTCGCAGTGGGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1085:18337:179806/1
+GGGAGGAGGGTTTTTTTTTTTTTTCAGCTAGTAGGCGGCGGACACTCCCGTTTTGAACCTTTTTGAACTACTGTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:224:19111:133561/1
+GACCGTCGNNTTTTTTTTTTTTTTAGATTGAGCCCACTAACCCTAGACTCGCACTATTAACTCCGGGGCATATGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2983:826:9193/1
+CTTTCAGAAGAAAAAAAAAAAAAATTACGAAAGGACAGGCAGTAATCATGGTCCTGAGTGTCCCTTTGTATACAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:338:13920:17267/1
+ATGGAAGAAGTTTTTTTTTTTTTTCTGTACCTCATCTCACTTGACTGGAAACGCACCATGGCTTAAGTCGCTGAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1364:13563:167748/1
+CAGGGAGAAGTTTTTTTTTTTTTTAATTGATGCGTAGGGCTCGAAGTCTGCACGTAGAGGAGGTGCATTTTCAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:607:13103:139333/1
+TAGCTCCTNNTTTTTTTTTTTTTTCCCAAGATCATTGGATCCTTGTGTCCTGCGGTCAAGTACCGGGATCAGAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1747:1408:125904/1
+CTTGTGAGGCTTTTTTTTTTTTTTACGCTAACACATCAAATAGCGACCGGTCTAGAGGGAGACCGCACTATAGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1148:16465:13804/1
+GCCGAAAANNTTTTTTTTTTTTTTATACGAATCGCGTTCGGGATAACTATGTTCGTCCTCCCAGCTAAGGTGCCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1176:3250:145877/1
+ATATGAGCNNTTTTTTTTTTTTTTTATTCTGGCGCTATATGCGATTTAGAGCACAGGTACGAAGGCAATCTAACC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2792:6902:44593/1
+ATCCGGAGGGTTTTTTTTTTTTTTCACTGAGTCGTTAACGGGTTACTAATCTTCCTGTCTTGCTCAACTAAGGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2857:19869:180991/1
+CCCTCGAGGGTTTTTTTTTTTTTTTTCCATAGCGGGGGAAGCCATCCGAACGCAACTGGACATTACTATACCGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:755:19931:178280/1
+TGGCCGATNNTTTTTTTTTTTTTTCAACATGTGGGGTCCAACATACACACCGAGCAAGATTAGTTCCAACTGCTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2480:155:63389/1
+TAAACGCGNNTTTTTTTTTTTTTTGTGATCCCTGTTCGGCGTGTCGCTCGCCTCACTACTAGCCAGGTGTGACCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2843:10575:3131/1
+ACCCTGAGGCTTTTTTTTTTTTTTGTACTCGTGTTGACGGATTTCAGTCGAGGCAAGCACAACGGATTGATATTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1770:4081:197491/1
+GTTACAATNNTTTTTTTTTTTTTTACGTAACAGATACCATCTTCTTGCACTCGCTGGATCATGAATCTGCGCGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2062:3642:13147/1
+TAATTTCCGTAAAAAAAAAAAAAATAACGGGTCAGGAAGCTGAGTCGTCAAGGGTTCACCTCTAGAGCCCACAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:193:6561:99678/1
+CGATTGAGGCTTTTTTTTTTTTTTTACACCATGTCGTCAACCCAAGCTAGACCTTCCTACAGCAGGTCCGGACGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2562:3408:19709/1
+TATGTCAGGCAAAAAAAAAAAAAAAGTATGGGGAGCGGCGCGGTTTGCGCCGTATCGGTTCATTTGCCGATTTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:97:6403:33976/1
+GAAAGGAGGCTTTTTTTTTTTTTTTCGCCTTTTTCGTCTTGTTGCCCGAGGTTAGATATGTTAACAGGTCTATGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1693:9857:189526/1
+ATTGGAGAAGTTTTTTTTTTTTTTGACCAGTGACACGTACCAGCTGTTGCCTGGTATGGGTCCCGAAGTTGCGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:723:3632:121816/1
+CGCACATCNNTTTTTTTTTTTTTTCTATAAAGCCCTAAGATCAAGGGTTATATGACGATGCCGACAAGCTCATAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:590:19844:171664/1
+CGGATTCTNNTTTTTTTTTTTTTTCCTACCTTACAGTCGGCCCAAGGACAATCGATTCTGATGCAAGTCCATGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1274:4201:171834/1
+AGTGCTTCNNTTTTTTTTTTTTTTGGCCTATAATATATCTGTGGGATGCGCTAATATCCCTAACGCGGGGTTTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2299:4436:81404/1
+CTGAACGTNNTTTTTTTTTTTTTTCGTCAATCCAACCATGTCGAAGGGTTCTGCTACACTCAACTATGCAGGCTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2962:13933:163719/1
+CGGACTGTNNTTTTTTTTTTTTTTGTTGATGCGTCGGCTCGGACCCTCCACGCGCCCTGGATCACCACAGGACAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:79:10717:191559/1
+AATGCACTNNTTTTTTTTTTTTTTACCTTCAATGCGGTAAGCCCTCCGGCAGATTGCAAATCTGTTAGTCTAGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:728:6848:44913/1
+AGCCTGAGGCAAAAAAAAAAAAAACTGTATACACTCCACAGAACCCTCCACGAAGTTTCAGATTTAACGAGGAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1805:19028:29375/1
+TGCATAGATGTTTTTTTTTTTTTTAAACCTCCGTTCGCGGAGTATCAGATGATGGTAAAACCGGTCAAAGTAGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2525:11691:147816/1
+CTTCGGAGGCAAAAAAAAAAAAAAGAGTTTCGAGTATGCTGATAGCATCCCCGCAGCACGGTCAGCTAATGTTTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1456:3792:55151/1
+CTGTTACCNNTTTTTTTTTTTTTTATAACAGCCAAGTAGTGCATTAAGAGGATAGGCCAAACCAGCCACATACCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1759:15822:177463/1
+AAGATGAGCCTTTTTTTTTTTTTTACGCTGAACAGACTAGAATAGGGCCAAATCTGGCGAACTTGCTATGAATCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1434:12716:156687/1
+AAGCGAGANNTTTTTTTTTTTTTTCGACTACAAACCCCCATACGAATAGATTGATATGGATCCACCGGGGACTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:79:14794:163993/1
+TATTAAGAAGTTTTTTTTTTTTTTGCATTTGCCAATTGACCACGATCGCGCACGCGTGGCTTTCGAGATGGCTCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:313:3610:41049/1
+GTTTAATCNNTTTTTTTTTTTTTTAACGGCCCTAGCCTCGAATTGCCCGGACCGAAAAACCGTCACTCAAATCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:766:16681:39314/1
+TCTCCGAGGCTTTTTTTTTTTTTTTCGCACACAGACTTTTAAAAAGGCCATCCGAATCCCGATAGTTAGCGTAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:494:9805:67437/1
+TCTTTGATNNTTTTTTTTTTTTTTAATGTGCCGGTTCCATCCGGGCTACGGTCTTTCAAGCTTGGACACCGTATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:638:6498:188736/1
+TGGACGAGGGTTTTTTTTTTTTTTGTTCTCAAGACTTGGTTAGCGTTAACTTTCTGTAAAGGGGTTGACGCAAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:350:3180:176176/1
+GGCTAGCANNTTTTTTTTTTTTTTACTCGCTCTATCGCCCATTTTGTCCCGTCCACTTACCGGGCTCGTTCGTTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/t/data/test1_2.fastq b/t/data/test1_2.fastq
new file mode 100644
index 0000000..afa3c2d
--- /dev/null
+++ b/t/data/test1_2.fastq
@@ -0,0 +1,400 @@
+@HS3_9090:7:1008:11310:354/2
+AGCTCCGCCCTACACGTATGCTCTGAGTGTGTCTGACTCCTGTCTCAAAATTCATGCGTAGTCTGGGCCTCTAAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1438:2003:100013/2
+GACTCTGTCCTGTCGTTGCACACATATGGCACCGGGATATATGGGGCCATTGCTTTTCATCCTGGCATAACCGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1271:1905:86956/2
+TAGATGCATCGTCTGGTTCTCAAAGCACAAGACATGTAGAAGATAATCGGCTTCCCTGCTACAAAGACACTTGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1305:791:83162/2
+CTCAGTTCACTTTTGCAGAATCCCGGTGTATTGTACGCGACAGTAAGGGACACGTCGACGCTCAAGGCTCTATAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2746:14451:329/2
+ACACCCATCTACAGCCTCCGAGACTCTCCCGAGTAACAACAATGTCCATTTCATGCAACCCGACTGTAGCGGAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2708:18759:118692/2
+GAGAGACTGTTGCAGATTATGCTGTCGGGTGTGAGGGTAAACCTTGCTCTCTTTCTTAGGCGTTCACCCCATGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1588:3468:141624/2
+TTCTAGCAGTGTACTAATGCGAGTGAGCTAAAACGCAGCATCGTTGAGCGGGTCATTTATTACACTATATCCGTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2921:15856:57939/2
+ATTCTCACCGTGGCACGGATCCGAAGCTGCAAGCAGTTAACACATGATATCCGCACGTAGAAAACGCGTCCGATT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1417:15923:95590/2
+GAAGGGCCAGTCCAACGTCGCTAACGAGATGAACTTAATGCCGTGCGAGATTCTGCGGGGGCATCAGGGGTGCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2102:10514:9268/2
+TACTCAATCCAAATATTTCAATCCCCTCTGTTCAGACGAGATTAAGCCAGTACACATTACTCCAGACCCGGTCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:54:3501:193579/2
+TCAGGCTGTGCATTATTGCGTTGCGAAAAGACCATGATTGAAGAGCTCGTTTCAGCGTCCGGTGCTAACCGCATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2888:15166:71020/2
+GAAAAAATATGTCTCAGTTTATCTTCCGTACTCAGTCCCTTGTAGATGATCGCATCTCGTCGCACCTGGGGTGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:782:4801:172498/2
+CATTATCCTGGGTGTAAGCGGCCCCGCGGCAAGTTGCTTGGAGTCAGTGGAGAGCATCCAGTAGTAGGTAACGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1840:12105:187126/2
+ACGCAAGCCTCACCTATTACCCTGCGGATCTCGCGATCACTATGCTGCGGCTACTCAAGTCTGGGCACAAGACTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2153:1390:90531/2
+AAATTCAGCTTAGGACAACTTATGCGCCGGTTTCCTCACCGGGGTTTATCATGCGGCTGCGCGTACCGGCCTAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2362:6530:45744/2
+CTGCACGTGTTGAAAGGTAGCGGTGGACCGCATGACCAATGGCTATGCTAACAATCGTTAAAAGTCGCAACACAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:992:19260:109704/2
+GCGCATCAATAACCCTCTATAGAGTTTGAAGTCTGCGAGGGTGCCAGGGGCTTCAGCTCGACGAAAAGGGAAGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:70:16099:51055/2
+TGTGCAGTAAGCGACGTAAGCAGCTCGAGTCTCCCGGATTCAAGCGTACGGACTTGACTAGATGCGTACGCATGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1603:19029:119019/2
+AGGTCGTAGAGTTTCCTTAGCTTGTAAGGTAGTACATAACCAGCTGGTGCGTGCAGTATGATGCTATACCCTTAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1872:3885:17311/2
+TGCGCATACCTGGCACAGCCGACTTAGTTACTATACGTCCGTAGTGCCCTATGTCATGCTCACGACAAAGTTGCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1547:15205:108796/2
+AAGGTAAAGATTGTATTTACCACCGGCGCATGCAATTCTAGAAGCACTGACACGAACGCTCCTCAGCTGACGGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1307:9506:116191/2
+CATTTTATTGATCTAAAGGCCATCTGCACCACACAAACGAGGTATTCCGCTCGAACTGGCCCCCTGAATAGCGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1293:13399:104992/2
+CGTAGCCCAACCTTGGCTTGCTAAAAGCTACTTGGTGGACCAGTCTTGGACGTGTACACTCGTGATACCAATCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2245:4790:19502/2
+CTTTAGGAAACGTTAATAAGACGGCGCATCGAGATGTACACCACCCCTGCTGTTCGTACTCACTAGCGACAAGGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1630:16736:9274/2
+CGCATCGACGAACCAACGTCCGCTGTTATGAATCATTATCTTCTAGATAACGAGCCTTAACTCAGAGGTAATAGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1548:4346:62294/2
+GCATATATTTGCCTACCGAGTCTGTAAAAATTGTCCGTGATGTAACCGACACCTTCATCCCAGTTCCGGATAGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:32:14349:156816/2
+AAGGGCCTAAGGAACTTCATCAGGTACGGTGCTACGGTCACAACGTTTTGTTATGTGACCCTTATTGGGATGGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1322:16932:51209/2
+GAGCCTGGATTGACCAGCCACTATACAAATATAGGAAAGTTAGAGATGGTAATCGCCGAAACATGAACCGCCCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2657:15332:141945/2
+GCTAGCATGAACCACTGGTGGCTGTTAGAGCTTTCGCAGTTTGGGGAGCCCTGACTGGGTGGGTCTAGAGCTTAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:681:17025:20507/2
+AGCAATAGCAAGATAACTGATTACCCAGCGCGCCATTGCGGGCGAAGTGCTTAAGTTCGGCAAATACCGAATACG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1904:5984:48273/2
+CAAGTGTTTATGGGCATCACTTCGACGACGCTTTAAGGTAGAATGTATTTAGCACATAATACACCTGCTTTGGCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2002:6401:148580/2
+ACACCGACGAAAGTCGGCAATGTCTGCATACTCCTGTTGTTCGGAGCTTGTAAGAGCGTTGCATTCTCCCTGCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2051:6388:64396/2
+CCACTTAGCAAAAGCTGTGCAAATGCATCTACTCTTTAAGTACAATGAGCGTATGTTGGTAATAACCGCGGCCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:333:16095:70445/2
+ACTTTGACTATCACTAAAGTTGGCGCTATCTGATAGTCCATAATAACCGTGGTTCTGAAAGGGACAATATGGCAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:235:4347:163096/2
+CGGGCCTAGGTCGGGAGCCACCTGCATGGGTCTATCCTAATCTCCAACTCAGCCGCGTGTTCACTCAGCGTATGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2459:19347:74593/2
+TCCTCGTCCGGCTGGCGGCTTCGTTCCTCTACCGCTGATGCAACCAGTGTGGGCCTCGTGACGGACTAAGTAATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:810:16180:96914/2
+TAGGGACCCCACGAATGCTAGAAGGTCGAGGTAGTTAGTGATCTTTACTCGATATCTCTACCGGTCACCGTAAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1634:5631:85585/2
+CCGCCTATTCGTTGCCCGGATCGTTAAAAAGACCGCTGAGCTGGCACGAATATTGGCAATTGACGTACCCGCGTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2962:11381:21217/2
+AACTAAACTCCCCTCGCGCGACAGCTAGACTTGAAGGCCTATGCTCATCTTTTAAACGAGAGCAGCCGGGTAACA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2633:13732:192447/2
+AACTGAGGGTCTTATCCAATAAGCTATTAAGGCTACACACCTGTTTTCTCTTTAGCAATTGGACTACGCTGAGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2087:17402:125788/2
+GAAGTCCATCGAAGATACCACAGCATCCCCGCAGTACAACCTTTCTCGCGGACAGGGCCTTACAATACCGTTCCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:781:9732:169661/2
+TAAAAGTGTAAGCGCACCTTTCGCGAGCGGGACCTTAGATTTAAGTTCGGTAATTGAGTCGACGCCACGAGGGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1662:16108:137898/2
+GAGAAAGGGAAATTTGTGACTATTTATGATGATCCCCTGTCACAAATTCTAAGATGATGTGCTAGCCCCCTCTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:919:19941:114306/2
+CGACTTGCTAGGGTCCATGGGTGTTCGTTATTGGTATTCTGAATTAGTCCCCTCTTTTTTAACATCCGAGTCCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2754:8750:58584/2
+TCCTGGTAGCGACGGTTACACACGCAAAGCCTCACGGGTACTGTAATATTCTATTTGCTCCTTCTTACTGGAGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:336:1521:165232/2
+GGGAGTATACGTAGTGCCGACAAGTGATCCGGATGGCTAAGATCCCCGACCCGTTTTGGTAGGTTCCGGGAAGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1355:15756:164948/2
+CGTGCGGCCTTATTGGGACTCGACGCCGTCACAGGATATATCGCGCTGAGCGTTTCATATATTCATCGGCTGTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2231:4273:60186/2
+AGGGTCATGACTACCCGCACTGATATCAGTATGTATGGTATGCTCGCGGGCATCGGGGAGCCAGTTAAACACTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2316:6407:47755/2
+CGTGTAGGCCCGACTCCATGTTTTGCATTGCTATTTCCCACGCCGTCCCACGTCTCTGCATAACTGGGAGGTATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1180:9151:105392/2
+AAGTTGGAATGAACCTCGAGTTCAAGGAGTATCCCGCAGACTTTAGCCGTAAGGCAGACAGCGCAAACTAATCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:316:19348:39980/2
+TTGTGCAGTAGCGGATGTATTGTAAGCTGGATGCGGCCGACGTCGTGACCCTCTTATTGAGCAGCTCCCACACGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2293:5852:41801/2
+CATGTTTAGCAAGACTTTCTCTCAGGGTGGAAGAACGGCGCCGATATATCAAACACAAATGCAAAGTGAAATAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:194:2629:116686/2
+AGGGCGTACAACAGATGGTAACCACGGGGTTGACATAGACCCTGCACCTATGGATTATTCAGGGGATACGCTCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:504:19694:38065/2
+GATGGGTTGTGCGGCATGGTATCGTGGACTAGTTGGACCAGATGTAAGCGTGTATCGCGACTGTAACCACTGTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:40:16690:103457/2
+TCGATCACAGTGCATGCGTTCTATTCCTATGAACGAAAGCTGGACAAGAAACGCTATTCTTACAATTAATATCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2665:14543:75970/2
+CAGACTGAAAGACACCCCGCTAACCCCCTGTGACGATTACGCTGCAGGTGTTGGATGGCTGTCACTCGCACCAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:238:8472:192625/2
+TCTTTAGAGATCCCTGATTTTAAACGATACCCTGAGTACCGTAGTGAGCAGAGTATGTCAAGTCCGAGCCTCGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:457:4583:118080/2
+GGGATCGCTCGCGCCGTTAAGGGGTATATAGGCCTCCGTGACTTGTACCGGGCTCGACTTGGCGCCTAAGTAGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2124:7309:107717/2
+GACAGGGGTGACCTATTATCGCACGCAACTCTGGCAAGATAGCTATGTAACCCAATTCAGCGGCGATAGATCGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2793:15293:1228/2
+CGTTTCCCATGCTCCCTTAAACGCCTCATCATGCTCGGCCATTTTTCCCGGGAGCTTCTTGGGTAGGTTCGATTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1021:19152:59391/2
+CCTGGGCCTAATCTATCGTAAGACCCGCGGTTCGCTCCACCGTATAACAGTCAGCAGGTCTAGTAGTAGCGATTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1834:14761:60846/2
+CATCAGCGGGATGTCTAACAAGCATCATATTCGTCATCTAGACTTATCATCGTACCGGCGATTGTATAGACCGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1085:18337:179806/2
+ATCTAACTCTGATTATCACATATATTGGTGGCACGCAGAATTCGATGTCAGCGAGGCTAACACTCGGCAGCCAGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:224:19111:133561/2
+AAGGCCATGTATCCAACTCAATCGGCCTGCCATAGCAACTCTGTGTACAATGAAGAACCACCTCCATCGTGCCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2983:826:9193/2
+CATACTGCCATGGACCCTAGAAGAACCAGGAGTGGGAGAAAGAGGTATCGGATTCCTGGGGGTAGTATCTATCAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:338:13920:17267/2
+TCGATGGGTGTTTTGGCTTCATAGATTATATTGGCGCCCTCAGAAATTATTGCAACGTCCGCGCCTGACGAGCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1364:13563:167748/2
+TGGTGCTATGACGCGTTCACAAATGAAGCTCTAAGAGAACAGCAACACCACCCTGAATACGTGTACTCTGCCATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:607:13103:139333/2
+AGCTTCGAACACGTACACGGTAAGAGTTGGGGTAGCGCGTCCACTGCGAACTGCCGGGTTAATCAGAGTGTGCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1747:1408:125904/2
+AGAATAGCGCAGAGGGAGCGATCGTCGCGTGACGGGCTTAAACTTTAGGTTTGACTTTTTGTTCTCAGTCCAGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1148:16465:13804/2
+GGATATACCCTGGTCGCTACGGTTGGTCCTGTACTACGACCACGACGGAGAGGTCGGTTGGCTTTCCCGACTGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1176:3250:145877/2
+CTTTGTCTGTAATAGTACCAAGTTGAAAGCTGGGTACGTGGGCTGGGGCGCCGACACTCCCCTGACCATATTGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2792:6902:44593/2
+ACTTGGTAATGCAACACTTGGGTGACCAACCCATCACCTCGATCAACTGCATGGGCTTGCAGACATGAGTCTGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2857:19869:180991/2
+TCTGCTATCTTCCCCAGTAACACATCTTGAAGTATTCTGACGCGCGGAATGTGGCTGAAGGTTCCACCAAACTGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:755:19931:178280/2
+AAACTATAAATATCAAGCACCCTGGATATCAACGTATTCCGAGTGGCCCGAGCGCATTCCGTGCCTACCCGGGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2480:155:63389/2
+ATGGATGTCTACGAAAGAGCCTAGTAAAGTACTCGCCCGACAGGAAGCTTCACTTTTGTTAGTGGCATAGTGTCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2843:10575:3131/2
+TGACTAGCTTAGAGAAACCAGTGTAGCGGTACACTTCTGTCAGGGAGTTCAAGCCGGAATTATATTAAAAAGGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1770:4081:197491/2
+CTCTTCGGAAAATTATAAATTTGCGAGCATATGTTTTGGCGCGTGTCTCCCATTACCATCGATAGGGAGTATGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2062:3642:13147/2
+TAAGCCGTGATGTCTCACTTAATTGGCTCAGCTGGCCCCACAAGTAAAGGCCTGGAAGTGTCATACACGAAACTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:193:6561:99678/2
+GTTAAGGAACTTAGGTGAGTATCATTCTTCCACTAGGGCAACAATTTACCATCCGCCCAAAACTCTAGGGTCGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2562:3408:19709/2
+AAAGACGTCAGACTCTTCTCCCATTGCCGGACTCTCAATCCTCGACAATAACATACGAATCCCACGTATCACCAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:97:6403:33976/2
+TCAACCTAGAGCCTCGCACATTTTGTAGAATACTGAAGGGTTCGATCCGATGGCTGTCCTGATGAACGCTTATCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1693:9857:189526/2
+CACTCAGAACGTCTTTCCAGATAGTACAATGCGAAGGCCATTACCGTGGGGATTCGCAGGAGTTGAGAAAACCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:723:3632:121816/2
+CCAGCATGCTCCTACCTCCAAAAGCCCTTTGTCTAGATCTACGAGTAGCGCGTTGAAGACGTTAATGCCCACAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:590:19844:171664/2
+AGTTGGTACTATTGCGGATGAGGCCAGTCAATGGACATGTGTATGATCACACCACCGGATCAACCCGTACTTTCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1274:4201:171834/2
+CAAGCGCAACTCAATACTTTGCTAGGACTCTCTTGAGCTGAATGCGGGCTGTAAGTTGGTGAATAAGGCGCCGGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2299:4436:81404/2
+AAGAGGGATGTGAAACTTAGGACATGGGCATAGGATGCCAACTGTGGATGGGTTGTCATATGCGTAAAACCAACG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2962:13933:163719/2
+CGATCAACAAGTACAGGATAACTCGCTGATATCTTTAGCTCCGAAGCTAAGCAATGAAGTACTCACATTACTCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:79:10717:191559/2
+CGGATCTCGTACATGTGTCGACCCATGAACTATCTGCCTGTAGTCCATTGCACTGAATATGCTATCTGGTAAGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:728:6848:44913/2
+TCTGCTTGATTTCGCGTATGTTTCACGTAATCAGCTGAAAAGTATACGGGGCGAACTATTAGCTCCCCATCCGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1805:19028:29375/2
+CGGCCGGCATCCGGAATTTGCTCGTAAATTTAAAAAAGATCGTTTTGTCGTATCATCATTCCTTAGGTGCCGTCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:2525:11691:147816/2
+AAAAATGTACACCCATTGACTGGTAGTGACGAGGTCAACGCACCATTATGCTATCTGCCCAGAGTCTTCTAGTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1456:3792:55151/2
+TTCACCGGTACTGTTGTGCAACCGAATGGAGGACGGTCTTTTGGTCCGAAAGAAGAATATGTATACAAACACCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1759:15822:177463/2
+TCAGCTGAAAAGAAACTAGTTAGGGGAGAGCGGCGTCGGCTCGGTGAGCACCCACAGCCGTAGTTTTACCAGTTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:1434:12716:156687/2
+GCCCCCTACCAGGTGGAGCAAAGCGCAGTTCCGGTTGTGATATACAGCGCCTTGGCTTAGCATTGACCGCAATTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:79:14794:163993/2
+TAGTACGATCCCGTACAATAGTTGGTGATGACTTACGATCATTAATAGTCCCGCGTGGGGGGTGATTGCTGAAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:313:3610:41049/2
+CGGCAAGAATCAGTTTTTTTGTTTCTTGCACAAACTGCTCTGTTGGATCCTGTGGCCGGACGAACTTGTGTTTAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:766:16681:39314/2
+ATTACACCGTGTAAGTAGAGACTGGAGTCAAAGGTTGCGCGCGAGTCCACAGAACTTACTAAACCCTCACGTTAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:494:9805:67437/2
+CGGGTCGCCAAACAAGCGGGTATTCCGTCGCACTCGCCGTGGACTGCGCAATTTGGAAAATGGGGACGGCGACAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:638:6498:188736/2
+GAATTACCGTAAGTCGAGTACTGAGGAAGACAGCCGAGTGTCAGCCAGTGAACGCCGACTTGACTGTACAGCATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS3_9090:7:350:3180:176176/2
+CACAGAAGCGTTTACACTTACTAGATTCACTACCGCGGAATCGTCCAATCTACCAGGAATCTATTTCACCGCAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/t/data/test2.bam b/t/data/test2.bam
new file mode 100644
index 0000000..18aaa1b
Binary files /dev/null and b/t/data/test2.bam differ
diff --git a/t/data/test2.bam.bai b/t/data/test2.bam.bai
new file mode 100644
index 0000000..e357bb6
Binary files /dev/null and b/t/data/test2.bam.bai differ
diff --git a/t/data/test2_1.fastq b/t/data/test2_1.fastq
new file mode 100644
index 0000000..7c9e9d3
--- /dev/null
+++ b/t/data/test2_1.fastq
@@ -0,0 +1,400 @@
+@HS46_3182:5:2405:11074:43547/1
+CAGTCTCCNNTTTTTTTTTTTTTTTGGTGCTACCGAATATAGAAGAACACGTTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1341:11922:194601/1
+GGCATTATGCAAAAAAAAAAAAAACCTTCTGTATCGGATCATCCCAACGGAACT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:272:17688:181326/1
+GTCAGGCGNNTTTTTTTTTTTTTTGTACCTCCCTGGGGCCCATGTTCATCCGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1564:15853:100266/1
+TCCTCGAANNTTTTTTTTTTTTTTCCATAGCCTCGTATTACTACGTTCGGTCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1752:18919:39066/1
+GTATTGTCAAAAAAAAAAAAAAAACTTCAGGATAGCAACCAGGCATGGACGCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1408:2410:48242/1
+GGTACGAGGCTTTTTTTTTTTTTTACTAGCTTTACAGCTGAAGGGCATCGGCCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:381:4299:36056/1
+CGTAAGAGGCTTTTTTTTTTTTTTTTTCTCGAATATACTGTGCCCGCGTTTCCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1073:8428:128989/1
+CATTCGAGGCTTTTTTTTTTTTTTAGGTGGGGCTACACCTCAAAGTATAACGCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:771:4279:15727/1
+GGCTTAGATGTTTTTTTTTTTTTTAATACGTTGATCTTTCGCTACTAAGTTGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:584:1207:121853/1
+GTTCCGAGGCTTTTTTTTTTTTTTATACAAGACATAGGGTCATATTGTTGTCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2354:16276:7208/1
+TGGCGAGAAGAAAAAAAAAAAAAACATACTAACCGGATGATGGATTCTGTAGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:80:5305:54651/1
+GGGCCAGAAGTTTTTTTTTTTTTTTAAAGACCCGAGAGGGCCCTAGGAGATTCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:106:2551:71099/1
+TGGTGCGGNNTTTTTTTTTTTTTTGTGTTCCAGTACCGCTAGTTGTAGAGTGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1817:7790:154523/1
+CAATGAGAAGTTTTTTTTTTTTTTTTGATTGACAATCTCAGAGACCCAAGCTCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:178:16976:72000/1
+AGGAGAGTAGTTTTTTTTTTTTTTGCTGACCATCAATGGGTACGACTGCCAGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2547:4151:12793/1
+AAACTAGAAGTTTTTTTTTTTTTTTTTCCCCGAAGACTCACGAAAATATCCCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2291:14901:34113/1
+GCTAGAGANNTTTTTTTTTTTTTTTAGGAAGCCTGAACAAAGCTACGTTTGGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2934:2829:31483/1
+AGTCTTGAAGTTTTTTTTTTTTTTTTGGTGTGCTTAGCGGCCAAGACTCGGAAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1249:16218:147727/1
+CTTGTAGAAGTTTTTTTTTTTTTTTGCATGTAGCTTACGTTGCTCCGCAACGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:553:771:39903/1
+TCTTCTGAAGAAAAAAAAAAAAAAAACGGGGTATCAGTCTGAAGCACTTCAGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1223:9525:65794/1
+GGCCTAGAAGTTTTTTTTTTTTTTTCTTCATTATGATGCTTAGTGAAAATTAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1191:17978:187481/1
+TCTAGGAGCCTTTTTTTTTTTTTTAGGAAGTAGTAGATATTCCCTTTCAAGGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:3:10917:33452/1
+TTTCTAGAAGAAAAAAAAAAAAAATCGATAACGCTGTCAACCGCGACCCCGCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:177:10045:179524/1
+TTTTCACAAGTTTTTTTTTTTTTTGGTCGTTTGGATGCATTTACCGTGGCCACA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:946:13566:137436/1
+GCTCGGAGGCAAAAAAAAAAAAAAAAGACTAGTGGTTTACCCGGAGGTCCCCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1148:656:9363/1
+CAGGAGAGGCTTTTTTTTTTTTTTCATCATAACCTCTCTTATAGGTGGTGTCTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1498:3307:70020/1
+CTGATGAGNNTTTTTTTTTTTTTTATCCGTCCGTAGGAAGTCGTAAATGTCGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2248:4900:51665/1
+CAATGGGCTTAAAAAAAAAAAAAAACGAGGAGTGTGGGTCATGCGGTGCCGTAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:807:7391:123290/1
+GGTCGAGTNNTTTTTTTTTTTTTTAGCTAATTGAGCAGAGTTGGCAAACTTCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2455:17864:5895/1
+TGCTGAGAACTTTTTTTTTTTTTTCACCGCGTGGTCTGTCCGTGGGGTGCCCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1257:14923:50658/1
+GGCCGAGTNNTTTTTTTTTTTTTTCGGGTCTCAACCTGGAAATTTGAGCGGAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:492:12915:12514/1
+GACATCAGGCTTTTTTTTTTTTTTAGTGCTCCTTGTGTGCGCGCTTCTCCATGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:235:7270:100350/1
+TCACCAGTAGTTTTTTTTTTTTTTATGGTACAGCAATAGGTGAGACCAGCGTCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2260:14085:3683/1
+ATAGTAAANNTTTTTTTTTTTTTTCACTACATCGACCTGTCTACTTTTGCACCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2169:9651:142664/1
+ATGGCAGTAGTTTTTTTTTTTTTTTTACATCGCGTGTAGATATATTTGTCGTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:540:14663:48756/1
+GCTGCGAGGCTTTTTTTTTTTTTTTTTACTTATCAGTCTAAAAAGCGAACAGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1091:11404:166341/1
+AAAGGCCTNNTTTTTTTTTTTTTTAAACCTAGGGAGCGCCGCTAGGTGCAGCTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:608:7328:131607/1
+GAGTGAGAAGAAAAAAAAAAAAAAAGTTGAATGCATTCATTCTTTTCTAGTCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:522:3049:47753/1
+CATAGAGAAGAAAAAAAAAAAAAACCTGTTGGTCCCCGAATAGTTTGCGGGGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1283:16887:169154/1
+GTCATGAGCCTTTTTTTTTTTTTTAGTTTTTTTGACGTACGGCGTGTGGCCCTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1095:4319:4606/1
+ACTTATGCNNTTTTTTTTTTTTTTTAAAAGGAATGCACGTATCTAATCGCGTCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:752:19585:108953/1
+TCCGGCAANNTTTTTTTTTTTTTTTATTTGGTGAAGATCTTTTCCAATCCGTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:752:3015:99805/1
+CACTTGAGGCTTTTTTTTTTTTTTCGGTGGTACGTAGTTCAGGCTAATAATAGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:332:4846:77772/1
+TACCTCAGGCTTTTTTTTTTTTTTGCTTTTAAGTGCCACGGGGGAGGACTGGAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1140:784:66805/1
+AACGTCGGNNTTTTTTTTTTTTTTCCTACTCTTACGCCACTGTCGGGAGCCGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:473:18338:33594/1
+TTAATAGAAGTTTTTTTTTTTTTTCTAACTCGTTTCGCCCCGGGGATCGAACGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2702:3903:96584/1
+CGAGTGAGGCTTTTTTTTTTTTTTGAAATTGCTCACTTACGGACCTTCACCACC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1797:11052:97278/1
+TCGTTGAGTAAAAAAAAAAAAAAATACGCTCATGGTCATTACATGCGCTTTAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:820:13922:80241/1
+AAAGCGAGGGAAAAAAAAAAAAAACGCCCCAGTCAATATTTTTACTAATTGGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:555:4732:87380/1
+CAGGTAGAAGTTTTTTTTTTTTTTCCTGACGTACTACTTGAGTCGGCGGCAGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:659:12505:103681/1
+GCGTTGTGGCTTTTTTTTTTTTTTGCGCCTGTAGTTCAGCTCTCTAGCTGACAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:59:2864:12297/1
+TCGCTCGANNTTTTTTTTTTTTTTGTTACGAATTCAGAGGTACGCACCAGCCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1383:4754:130496/1
+ATTCTGACGCTTTTTTTTTTTTTTCCTCTGTCCAATTGAACAGTCTGGTCCGGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2240:16766:66728/1
+CATGGAGAAGAAAAAAAAAAAAAATCGTTCTTGATGACAGTAGTAATATATTAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2100:7817:14667/1
+ACAGTAGAACTTTTTTTTTTTTTTGAATCCAAACTCTGCGCCATAAAATCATAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:895:13916:72786/1
+AGCGCAGATGTTTTTTTTTTTTTTGGAAATGGATGTAGGATTTGCCAAGAGTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:418:13660:161873/1
+AAGAGAGAAGTTTTTTTTTTTTTTAATGATGGTCAGTCAGCAATCACCACCCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1283:12361:37079/1
+ATTGGGAGCCTTTTTTTTTTTTTTGCCGCAGCATAGGGTTCTCAATTGCTGAAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:347:5935:115159/1
+TTAACCGCNNTTTTTTTTTTTTTTAGGAGACACGAGGATAATCGTGAGTACGAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1983:1677:46718/1
+TTGGTGAGGCTTTTTTTTTTTTTTAAGATGCCTTGCGTGGATGCGAGCCCTGTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1380:1444:154221/1
+GAAACAGCNNTTTTTTTTTTTTTTCATCAGTATCCCGCGGTCGAGAGTCAAAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2040:5556:131112/1
+CACTATCGNNTTTTTTTTTTTTTTGATGCCATTCATCGGCATGGGTGCGTTGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2789:7394:44852/1
+GACACAGAAGTTTTTTTTTTTTTTGATCTGCAGCTTTATGCCGTAATCGAGACT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1215:14424:11602/1
+GTTGTGAGGCTTTTTTTTTTTTTTTAAAGTGTTCCCACCCACCAGATAAGTGCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2475:14123:112282/1
+TCCATAGAAGTTTTTTTTTTTTTTTGGAGGCGATCCTATGACGAAGAGATAATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2546:19759:182333/1
+TTTAGAGAAGTTTTTTTTTTTTTTCTGCGGCGATCGCCGGAAGGCGTTGCATAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1849:14231:162499/1
+TTAAGACAAGTTTTTTTTTTTTTTTTGTCCAAAGATAATTCTCTGAGAACAGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:662:17029:25695/1
+TCCCTGACGCTTTTTTTTTTTTTTTTGCGAGATTGGACGCGCCGTAGAAAGTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:566:4782:154015/1
+CGACCAGAAGTTTTTTTTTTTTTTTTCCCGCCCTCGGCGAGATGAAGGCTCCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2085:5947:93787/1
+CACCGTAGNNTTTTTTTTTTTTTTCTACCTCTTGGACCTAAACCAGTACCGATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1281:315:57513/1
+CCAACAGAAGTTTTTTTTTTTTTTGAAACGTATTACTTCTAACGTTTGGAGGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2225:9077:172869/1
+GCCACGAGGCTTTTTTTTTTTTTTCGTTCTAAAGTTCTCTCCCGTACTTGAGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:373:3420:105949/1
+TTAATAGAAGTTTTTTTTTTTTTTCATTTATGATCAGGCGGGATGTAACATATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1805:9722:68857/1
+CACTGTAGNNTTTTTTTTTTTTTTTGCATCGATCACTATTCCTTGACAATATTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1350:15373:181220/1
+GTTTAGAANNTTTTTTTTTTTTTTCCTCTATTCATTGGCGTGCGGTGCAGTGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:44:17607:19283/1
+TCCTCGAGGCTTTTTTTTTTTTTTACTCCGTCGTAACGATTCGCCGCCCGCCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1732:2108:100386/1
+GTTCAAGTNNTTTTTTTTTTTTTTGGTATGGGCCCTCAACTTATTGCTCATTGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:250:15883:128840/1
+ACCTCGAGGCTTTTTTTTTTTTTTTCCTGAAAGTCGACGACTCTCACGAAATGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2113:7932:11007/1
+GGCAGAGAAGTTTTTTTTTTTTTTAATCAGCTGTAGTACTGACCGACCGAAAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1891:12437:47748/1
+TTCCGAGAAGTTTTTTTTTTTTTTTACTTTCGCTGGAAACGGCCCTCGTCCATT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2369:5530:14886/1
+TGAATAGAAGTTTTTTTTTTTTTTATGCCATGTAGGACCGCCCCCAGGTGTCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1742:17056:119120/1
+CTCGGCAGGCAAAAAAAAAAAAAATTTCCATTCCAAAGGACTCTTCCCGAACAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2687:831:113506/1
+AACCTTCCNNTTTTTTTTTTTTTTTCATTTATTTGTAGGCAACAGAATGCGCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:451:18172:127749/1
+AAAGTGAGGCTTTTTTTTTTTTTTGTTTTATCCAGAGCTGTTAGGGTTCTTTCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1906:6140:154562/1
+ACCCGAGAAGTTTTTTTTTTTTTTTAGAATCACGATGTCAGGACGCAATCCGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:745:10175:155937/1
+TGGCTTGAAGTTTTTTTTTTTTTTCCAACAGCCAGCCCGGCACACGAGGTACAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2679:13605:42940/1
+CACACAGAAGTTTTTTTTTTTTTTGAGTATGTTTACGATACGTGTGCTAGAGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:525:19584:47076/1
+CGGACAGATGTTTTTTTTTTTTTTATCGATTGACTCCTGATGGGTGATTCCGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:541:14390:18530/1
+GCAGGCGANNTTTTTTTTTTTTTTTTTCTCCCCGCGGCACAGTTTGATTCGTCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2371:19010:102788/1
+AAGACTATNNTTTTTTTTTTTTTTAAAGGATATAGAAATTATTCTGGGGCCGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1477:14980:45119/1
+CTCGGAGAAGTTTTTTTTTTTTTTGGTCGATCGCCTTCGCTCCTGGTGTACCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2719:11588:38543/1
+AGTTGAGGNNTTTTTTTTTTTTTTGTCGTTAGTGAGGTGGCCAAGTTTAGCCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1734:8972:136811/1
+GTACTACAAGAAAAAAAAAAAAAAGGAATGACTCCGTGAACAACGCTTCACGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2208:6046:184805/1
+GTACCGTGGCAAAAAAAAAAAAAATACCGATCGCATCGGCCGCCTCAACGATCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:173:5726:155286/1
+CGGGCAGAAGTTTTTTTTTTTTTTCGCTTCGTGGGAGGGTCCGGCTGACTGATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1308:11903:50795/1
+AGCCGGAGGCTTTTTTTTTTTTTTGGAGTCTCATACGAGCACCACCCTTTTGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1898:10887:153698/1
+TCATCCTTNNTTTTTTTTTTTTTTACGGGTCCTGTCGCTGCCGTAGACATGGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:772:13347:20065/1
+AATTTAGAAGTTTTTTTTTTTTTTCCATATATATACCCACGCATTGGGAGGGCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2480:4129:148337/1
+CATATGTGGCTTTTTTTTTTTTTTTCAAACAGACTATGTCATGAAAGCACCCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:256:18917:52058/1
+GTGCGAGAAGTTTTTTTTTTTTTTGTGGCTTCTTATTTTGTGACTTTCCAGATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/t/data/test2_2.fastq b/t/data/test2_2.fastq
new file mode 100644
index 0000000..77f752e
--- /dev/null
+++ b/t/data/test2_2.fastq
@@ -0,0 +1,400 @@
+@HS46_3182:5:2405:11074:43547/2
+CTAGGGCTGTCCAAGCAGATATACGCGCAACACAGAGTGTGAAAATAATACTCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1341:11922:194601/2
+TCCCTCCAGAGGTTAGTCATAGTCAGTCCCCGCACGACGTGGGTTAGGGGTTGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:272:17688:181326/2
+GTGGGGATGTATTGACTTCGCGGTTCTGATCCCACTGGGTAACAACCGATGGAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1564:15853:100266/2
+GTGTATCCAACGATCGCTTGATTGGTGCTGTTGCGGGGGAGTAAACATGGGCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1752:18919:39066/2
+GCGGACCATTCAGGCAGATTGACCTTGCCTGCTTCTACTGCATGCACATATTTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1408:2410:48242/2
+ATTATATGGATGCGTCATTTGTTCTACGCACCGCCTGTCCGACCAGTAACCACT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:381:4299:36056/2
+GGGGGTAGCAGCTATATATCCCCACGGGCTAAGAGCTTAGACCCAAGAGGCGGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1073:8428:128989/2
+TTATATTCTGGAAATTGTTAAGCGTCATAGGCCGCAAAACCCGATTATCCATGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:771:4279:15727/2
+GCTTTCGATTTACAGCGTGTGATGAGTACACTCTCGCAACCAAGGTCCAGTGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:584:1207:121853/2
+GTAAGAAGTTCTACTAAGGCAACCGGCTTAGGCTGATCAAGAATGTCGGGCGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2354:16276:7208/2
+TAGCAGGCTGTAACCCGTGAACTAACTCGGAGTTGTTTGATGGCCGACGACACT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:80:5305:54651/2
+CTATCCACTCGTAGTCTAATAACGCGAGTAGCTCAAGGCAAAGTGTTTGCTCAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:106:2551:71099/2
+TAGTGACGCTTTGAAAGGTCTTGACTTTAGTAAGCCATTTTTTGAAGCCGGCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1817:7790:154523/2
+GGATTGTCTTTTTCAGGCGTTTGTAACCTGTGCTCTAGACTTTATTGATCGTTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:178:16976:72000/2
+GTTGGACGTGTTTTTGGGTTCGGGAGGATTCCCGAGCCACTCACAGGATTTATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2547:4151:12793/2
+CAGGCCTCTCTACTCGCGATATTGTTAACTTGTGGGCTGTTCCATGCGGCAACC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2291:14901:34113/2
+ACGGCGGAATACGCTCAGACCGAACGTCTTAGACCTTACAAGTGCTCAGTATAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2934:2829:31483/2
+CTCTGTGACCTCGACTATTTATACATCCGCGCTGGGCTTTCTGCGCGGTACATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1249:16218:147727/2
+AGGCGTGGGAATTTCCATGTCCTCGCCTTCGAAGGGCTAATGAGAGAGTTATAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:553:771:39903/2
+AGCAAAATCCGTCGAGGCACCAAAGTTGGTCTTTGCCCTCGCGACAAGTGGCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1223:9525:65794/2
+CTAGAGATACACCATTGTTACCCGCGCGATAAATACGCATGGGGTGCCGCACTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1191:17978:187481/2
+ACATCGGGTTACGATCCAAAAGTTCGGAGTAAATAGACAGATACTCAACTGTCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:3:10917:33452/2
+CAATGTGAGGTACTGATCTTCTCGGATGTGGGTTGCCCGATCAGAATTCATTGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:177:10045:179524/2
+CTCCATCAAGCGATCTGGATCAGCATCACTAGCTCGCGTCGCTCCGCTCGCTAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:946:13566:137436/2
+CTTCCGTATAATCGCTTCAGCTTTGCTACAATTTCCCTCGGTTTGAACAGTCAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1148:656:9363/2
+GCTCTGCAGTACGAACAACTCCTAAATTGGCAACGCAGTGGGACCAGAGACAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1498:3307:70020/2
+GGACATGCGAGAAATAATTAAATAGGGGAAGACGTAGGTTCCACTGCTCACAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2248:4900:51665/2
+CGACCGTACGCGCGATTAGTGACGGACGGTTTACAGAAATAACGTAACGGAGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:807:7391:123290/2
+GACGAGAGTGGCTAATACAAAGTCTCTCGGGTCGGCGTATGCCTAAACTTTATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2455:17864:5895/2
+ACGACGTGTTCTCGTTACAAACGCACCTTGCCCTAGCTACGGCAGCTTGTGAGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1257:14923:50658/2
+ATGGTCCGGTTTAGGTGAGCACATCGTCACCCTCCTAAAGCGTCATAATGAACG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:492:12915:12514/2
+AACTAGTGCTTCGCGATCCCGAACTTTTCTATGATATTAGGTTTACTCTCAGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:235:7270:100350/2
+AGGTGTAATGTCACAAAGTCATGCTTCTCCACTATCGCCTTGAACTAATCGCTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2260:14085:3683/2
+AGCATTACATATGGTCACTCATGCAGGCCCGACCCAACGTTGCTCATCTGAGCC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2169:9651:142664/2
+GAGGTGTTCAAAGACTGGTTAAGGTCACTGCTGGGAGGACATCGCATCTCTATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:540:14663:48756/2
+GAATTGTACGGGATTTACCCGAGGCATCAGCATTACATCTCATAAGCCACGGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1091:11404:166341/2
+CCCCGGTGATTTCGTGATGATGCCTTTATAGTGCTTTCTAGCCCACGCATACCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:608:7328:131607/2
+AGTCTGATTTGCTCGCTTTGTCACGAGAGGGGTATCACTAGATACATGGGCATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:522:3049:47753/2
+CGCAAATCTGGCGAGTTCGCATCGGTTATCATTACCCCGCTCAGGCCACTTCTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1283:16887:169154/2
+TGCTAGCGTTTGGTCTCGAGTGGGCGCAAGCGCACTTGCAAAGATACCCAGCGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1095:4319:4606/2
+TGTTCTGGTCCGATCTCCGTCTATCTTAATGATACGAGACGATTCATCCAAGTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:752:19585:108953/2
+AGTTCGCTGGTTGACCTAATTGTAAGCCAGTTGCGGGCCGTAAATGCGAGGATG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:752:3015:99805/2
+GTCAAACGGGGTTGGCAACCTCGCTCTAGCGTAATGTCTCCACCTCGGAACCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:332:4846:77772/2
+GACAATAACACAAGCCCCGGGAAAATTTATCGTCTAGTAGATACCTTGCCATCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1140:784:66805/2
+TGGAGAGTAGTCATCGCACTATACTCACACCTGGAGAGTTTGCGGTTCCCCTAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:473:18338:33594/2
+AGACTCCGCGGACCGACCCATCGTTCTCGCACTTATGCGGATAGGTACCGATTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2702:3903:96584/2
+TGATTGGGGATTTACGGCACGATCCACAATCGGCTTTATTGCGAGTTCAATGTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1797:11052:97278/2
+TCACTCAGCAGTGCGTTCCAACTTCTCCTCGTCAGCGAACACAGCATACTGTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:820:13922:80241/2
+TTTATAGGATGCGAGAGCGTGAAGCACGAAAATCTCGTTTTACTCCCTAAAAAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:555:4732:87380/2
+GTCCCTGGAACAGTAGCGTGAGCAGATCCAAGGCATCTGGGATGTTAGGTTTGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:659:12505:103681/2
+GTGAGGCCGTGCAATCGCTGCACGCGAGCAACATAGAAAAGGCGATTCTGCCCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:59:2864:12297/2
+CAATAATTACGCCCTTCTCGAATAGTCCCCGGTGTCTTCATTACTTTCCAAATT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1383:4754:130496/2
+GGATTCATCATGCTAGTACAGCGAGTCTCTACGGTAGGAAGTTAGATTAGCGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2240:16766:66728/2
+CGTGAAATACCGTGCATGGAACTAAGTGCCGCGGTTGCCTCTACATATCTCATT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2100:7817:14667/2
+GTGAAATCGAGCAGTGAGTTTGCCAGCATACGAGATCACCGACTTCATGCCCAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:895:13916:72786/2
+GGCCAGACAATGGCAATTGCACGCCCCGGTTGCCATTACGTGGTTTCAATATCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:418:13660:161873/2
+TAACTCCCTTGAACGCGTGCCACGGGCGATCGAGATGCGAAAAACACGCGCAGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1283:12361:37079/2
+TAGCTCCGGGTGCGTAAAGCCATAGACTCACGACATATATGGTTTGTCTTGTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:347:5935:115159/2
+AGCAGACTTGCTAAACCTGCTATAGCGTGGGCACCTGGATCACAGAAGAGTTTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1983:1677:46718/2
+AGCTCTTTCAAGTACTTATTGAGTCGACATAGAGATCCGCAGACTCATCCTACA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1380:1444:154221/2
+TTATAACCTGCTATCGTGATGGAGTTAACCCTTTGGTGCTAATGCATGCTTAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2040:5556:131112/2
+GATAATACCGACGGTCAACAGCGAAGTCGTTGGCTATGGACTCGTCTGTAACTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2789:7394:44852/2
+TGTATAATCCTATGTCCTACCACCCTGGCGGAATAGATTTGCAATTACAGACAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1215:14424:11602/2
+ATAAAAGGGAAGCCATACGTGCGAGAGCACACACTAGGTGACTAGTACTTCAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2475:14123:112282/2
+CTCTGTCAATGCCAAATAGACGCGCATTTATAAAATAATAAAGTGCACCATGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2546:19759:182333/2
+GATATCTCGAATAACGGGCGTGTTTTCCTCTCATTGAATTAACGGCGAAGCTTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1849:14231:162499/2
+TCATTCGTTAAATCCAAGGTGCTCGCCATCGGGGAATATACTTACCGATGAGAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:662:17029:25695/2
+TACCTGATGTAGTCCCTGCTAGAAAGTGAGCAAGTAAGTTTGGAGACGGAAATC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:566:4782:154015/2
+CGTTAGTCCCCCCGTGTAGATATCAATTTTACCCCGGGAGTCTAGAGGCGCCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2085:5947:93787/2
+ATTTTAGCCAATTTATAGAGCAATTTATCACGCTGAGCTGGGATGACCAAGACG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1281:315:57513/2
+AGGGAACAAAGACTGTGTCGAGGCACCCTCCCACTCGCCATTTTGGTAGCGGTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2225:9077:172869/2
+TTGGACGTTTTAGGAGGGACACTGCTTCGACTCGTACATGTCCGACCTACCTTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:373:3420:105949/2
+CGACATGAAGTTTCCTACATCTGACTACACGGGCCGGAGGTGATTGGTCCATAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1805:9722:68857/2
+GGAGTTTAAGCAAGTTATGACCAACCCGCAAGTAGCAAACCAACCCCTGATAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1350:15373:181220/2
+CATTGAGCATCTTCCTGACAACAGCCTATGTTTGTTTTCCGCTTACGAAGCAGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:44:17607:19283/2
+TACAACGATCGCTTTGGGCCTTATTTCATTCAAGCAAGCTTCCGGGCGCAAAGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1732:2108:100386/2
+CCAATTGTTACCTATACTGAGAGAGCTCCAATCTTGACTCCAAAAACTTGGGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:250:15883:128840/2
+ACCACAAATTGTGACTGTATTAAATAGAAGGTGTTTTCAGAATAGCGCTCGGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2113:7932:11007/2
+GCAGAACTGCGGAACGGTGCTAAGGAAAGGTAGATACCTCTCGTTCGGTGTCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1891:12437:47748/2
+TCGCGGCCGCACGAATCCATTGGATGGTCATAACTGAAGCGTTGGCTCAAGGGA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2369:5530:14886/2
+GTTCACGTATACTTGTAGTCACCACGCGCCGGCAACCCTTTTCTATAAATACTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1742:17056:119120/2
+TTCGTATGGCTTTACATAACCCTACCTAGACCTTCAGGCGCCACAGTCCTCAAC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2687:831:113506/2
+CACAATGAACCAACCGATGAAATCCCCTCGTTAGGATTCAATTGTCCGACCATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:451:18172:127749/2
+GCGAGGAAACTCCGAAATTGGAAGACGCCCATACTGCAAACCCATATCGCTCCG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1906:6140:154562/2
+GTCGGCATCGCTCGTGACGCAAACGAGTCTTTTCTTAACACTTACAGGAGAGCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:745:10175:155937/2
+ACCCAGTTAGCAGCGGACAAGTACCAGCCTGTACATATAACTCAAAAGACTGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2679:13605:42940/2
+GTCTAATGGTGGGTTGTCTAGGCACCATAACGACCCGTAAGCGTCATCCCGTTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:525:19584:47076/2
+TATTAGCAGTGCGAGTGGAACGGGCGTAAGCTATTGGCGATTTTCTTTGTCCTG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:541:14390:18530/2
+CTTTACTACCATATATTTAGAAAAAACTACAGAAGCAATCCCTGGAGTACGCGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2371:19010:102788/2
+TTAAGCCGCGTGGCAAGTTATGGTTAGTTCCTGATGCCTAGAATTTAAATCCGT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1477:14980:45119/2
+TGGCCTGTTTGGGGCACTAGGCGCTGAGTTTGCAAAACTAGAGACCACATGCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2719:11588:38543/2
+ACCCGTAAGTTGCAACCTGGCTGCGCAGCAGGTGTAGTCGTGGACCCGGATCAG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1734:8972:136811/2
+CGCCAAACGAAATGGTACACACTGCCGGTGTGAACGTGGAGGGGTATCTATGCA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2208:6046:184805/2
+CTGCACCTGTAACCGATGAATTGCACCCAGTTTTCGCGCGCACATTAATTGCTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:173:5726:155286/2
+TGATGGGGAGGCCAGGCACACACCTCCTCGATTAGGGGGTTTCAGTAATGGTCT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1308:11903:50795/2
+TCGATGAGGCGACTAGACGGAGTCCCGGTAGCAACACGGAGCCAAACGATAGTC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:1898:10887:153698/2
+CTTAACGGTTCCGCTCCTTAATTGATGGGTGGTAAGTACCTTTGTTCTCTCTGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:772:13347:20065/2
+GGATACCCAATGCCCTCGATCATAGTGGGACATTATAGGCAACCGATCGTTGAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:2480:4129:148337/2
+AAGCGAGACAAGCACATGGTTGCCAGAGCGATGTCCGAGGTGTATACTGCGCGC
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HS46_3182:5:256:18917:52058/2
+ACGCGTAAATGGAGGCAAACATTTGGGCCGTTTAGTCACTGCGAGTGGCTTTAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/t/data/test3.bam b/t/data/test3.bam
new file mode 100644
index 0000000..1b34227
Binary files /dev/null and b/t/data/test3.bam differ
diff --git a/t/data/test3.bam.bai b/t/data/test3.bam.bai
new file mode 100644
index 0000000..d98d42b
Binary files /dev/null and b/t/data/test3.bam.bai differ
diff --git a/t/data/test3.fa b/t/data/test3.fa
new file mode 100644
index 0000000..954edce
--- /dev/null
+++ b/t/data/test3.fa
@@ -0,0 +1,360 @@
+>1
+cCaCcGAtaaGtaTgttaaccCgAgTACATTaCcGGGCGaatcaTccgAAGaAtTCTTgaAtaagCCcCggcgaCtctTG
+GataggatCTtgctcgtCcaTatGCtaagCAGcAtgGcGTgGaATAgTTcGgTCAtTtTTTGccTCGgacCAGtTgctga
+ACCaCAttgctgcTaGcgagaaCggCaGCtGAGTgCAAAGtaTTaaGgTCGTgCaGgtTTTactaaCACGCACctcCtCg
+CGAGtgATCGAGCtcggcgCgAtcGtcaGcgctaAGgCtAGAAGggAaaGCacCAcTAaaaGAAgGCACacgCtCtcAtt
+gCTacTGgcTtcGACtGGGgcAtAGCaAaTggTTtgggAgCGaTaGACGcccaAgCTTGgtGggATgGatCaCaTGgCTT
+GAcaCtAacaCTcTTgAtAGAACggtggttGAgGtATcCaGGTCgGTtGATtCGctATgTGGaAtTggtgaTAGAtcaGg
+gCAtggCcAATTctTAGCTTcgCCggcAACcTCAgcgCGaCACCAtATGtggTGatTCTCCGGCtAaTaAAAcCTaTGtT
+CgAtACCGCtaGccgCCCacaTaActTaAcAaACcgCgACTTgACCCtaAAgGgtCtCGgTacAatAGggcAaTGGGaCA
+gAcGGcGgGaCcAAaGTGtaTCCaTCAcgGgCATTCaCggGCgGatcTCCgTAGCCGcaaCAtTaTtTGgAGcGgGCCAC
+ACaCgACctAgGaAACcAagCAcGtgtacAaTTATTGTAtAGTcctgAtAAaACgcCTgGAATcCtTtCCGTCaGTcTag
+ATAcgcGaCAaCgTActgCAagaCTACtTAtTctaACGtTcTgACATatACaTcTTgCacatAGttAaATGGcgatatcG
+tGAacacgGTtTtTTtcGccgAAaGACtccCGGGctCCCGgAatTACAGaaATgACCTgTGtcTAgCCaaCctgtCTTAc
+gaAtTAGGctCacGGtCGTcaCGcGGtggGACaggTAgtCGcAcCtCTTcGgAAcGgActCcATGGcTgatcacAcgcaa
+gtgTGGaggtGaTCGGtCtaatTCgtGgGattCgcCAcattaggttTatcTCtaGCcgCctgTaTttatcttCtgcCCGg
+GccCccAaggTctCCgAtcccgTTTcTTACGgatCTACcGAattGttTtcAacGGaaactCtaCGgCTTTgcGCaGacgC
+gAtctGgAAagctcCCtACcGCtTtaGgaaAAtGAGCAgAAgAtttgCTTGCGgcGGgcCggTGAGAgaGCCGtGaGCTC
+CTTCAAtTGtAgTcaaTtgCGcActAGAcctGGaCagtAGacCcgcACTAtcgAtCTtaggtcGGcCgaAGttCgCaTgg
+tAGGGATtgTCtGGTCcAataTtacatgGataCCgcGTaacgGCaCCtTCGTtgtGgaagCAAgCTCGCcgCGgCCtTCA
+CgggcCtCctGGaTCcCCaACTTtgaTAAcTACGgcACCAgActgaTATcGCtaaCCctTCaGTTTTAAggGaTctTAca
+gATtAgTTTGtCggTccCTTctCaTtACAtaTTTCCAgAaAggtattctAAccCCatggcTATCAGAAgggAGGGtTatT
+ccTtgcGtggAaatcaGCAaTaGcAtATtAATGTtAgaAgTcTCCTAGaGgCTagGTccgtGgaTtgAGCGAagacgtTT
+GgCaCTGGtctTGTATaagTgTagTgcTAGcgaAtTcTtGctCtaaACggtGgGaCgtCTTCCGCTGtGTCAAgAactga
+cCAttTgTCTgaaTaacggaGtTcAagGcGCCtaCactGAGccCAtTggtCaaagTAcAtGTGGTgaCtGcAGccCCGCC
+cCAtCGCCGACTaAcgAcAgAggGTcCAtagTTGAtGcCatCTtaaCAGtagCAaCTTGcTcaTcTTAggtGtCGAAGtC
+GgatgtggTaTGATtgAaTgccGTCGAGAtaATGGACgGgGGgAAaGTaGttaTGGCGtATCTATtTACtCCgcccacGg
+ttgAagCAtTTTCtAAggaTCAcCTgctaTgaCaGCgActACTGATTtCTGttcTaCacaAATaTggGAgAgTGaagacg
+aGgaGtacTACAggTTTGGGctACCTATagccaccGtctCcATTgctttctGtACcCTTacTcaCcTTCcaAgtGtCaAG
+GCAcgTCtgtCAAAtaTctCaaAtgGggtataCgacTcaAGTtAtaTaGgGgaGAcCGCccAtgcTgGgcaatTGaCaaA
+GAtCAtgACAtAaGCtAtGcAACcGtcCcTcagTAgtTtAaCctcCTcgTgCtACaCaAAtcATtaCatATGgggcCCcg
+GAgaCcTaCCGTAGtCtCgGccTGCtCcacccatcACTTcaaAagAAtttTccGtGgGggGcCGctACaCaaTCaaacaA
+tCcttATGTTCctTgatACTGtaTTGgAGCGGAcGAcTTCgTgaGCgTAaatgAgcCATCgCctCaaGAaTTcaCtTAcT
+AgaaGtCaGtgccTcCTCAcATtggtTggtGAAatcCcgGGAGATCacaccGCCGTTacGagCgCAaGGgtGcatttcTT
+agTttGttATAGaAgAAAccatTagatgACcttcTcTGgacagtgTgtTGgtGATGGgagGCCtCCTaCGTCtCTCggCg
+GtAtctcTaTcAtCgtCGGaACcaACgGTTGCgcCggCCCggatatAAagAaaTccgTtaAAGtGgaatTCCGAAgCCAa
+atTgccggAtCcAaCAcgaTCGaaTtTTTGaaTAAcAAaGtaCAGAcGccTCaAggAgagCCaTtcGcGATttTATAcTg
+GtcGGTAcaaTgtGTtCcgAcCaTcCgTGaaaCATttTAaCTaCAtatGCCCgGgGgCggGaAcgCAgaatctAAtCaAc
+cCgATCGTcgtcGTgTgcGaaCcGcaCAcAgTgAGAgttcaaccgCctGGtCGGcATAtAACGTcTggtCaCcGCGTAAt
+TagGaGTttTtttAaCgGagAgCgGTTATATagTgCGcgacacGtGcTTgaAgtgaTAtTTcaattcacCctGgCAaAcG
+CTActgTtTGcTGaaatTctgcACtcTCATCtaggacaCtTtGtagtgGgTAgGccTcaTTTTAGCttaAGCcacatTtg
+aAtttGcggCatATaAAGCGtggcCGtCTAAGTTtgGGTgGCtTGaaAcAAgTtacgggTaAcaAcaAaTATCtTTAggC
+TgctTggaGCATgcaCTGcGgtagGTCAGccgTTAgAGTGaGaCtTgacgTTcacgTcggTaAagcGCTAaTTAttAtaG
+CTGCTGgcaTCtGGGccAGtGGtCTgcgctCccCaTAgctGgcACaggactTAccaaATgAcctgttTgaCTGTCTCCtC
+tTccAtcCAAaTaTgCCTaACctGcGaAGtCgaTCAGAGGtAtGcttgAtTcgGttgGAGGcCAcCatTCtCAgaAaCTg
+TtCggAtGaGaGgacTCCGCAGgacctATcAaatGTTtgatTAtcgtGAtgaacgcTcAcAgtAATagTaCgCAacTCTC
+GAccCtgcgTCTGCCTagcgcCGtacCcaCCAcgATAttcgAGcTtTCccgCaGatggacAGgTtgATctTtATcAGtGG
+cCgTcGtACaCtaCCgCGcaTtgTTgTCcaGgGaaGAcTttGAatGAcgaACTAtcGgaAaAAacTGtTaCCTagatGCG
+GCgtGTaCttCtCAAgagCtcacCcaTagGtGcTAaatgCGTgAAAAAccAgTaAttAAtGGtAGgatGacGGcCTaAtC
+GTtaGaaTtGActGGCgcCAgaATggGacaaCgCcGggttAAaCccatATttGCtGTTgacacGcACccgCgctCagGct
+TaTcTTcaTGaCGcgGGaTatGCgCTTagCtcCAtTCGAcctTCTaatGTTTCtcAGTaatctCGGgatCgGcttGCgtc
+TTtAtAtAGgTgTaGacagtagaaaTtGtAaCcgtaGgCcgtgtaTtCcaCTAGTaagcCACggGaAttcGTgtGCacAT
+acTGtctCtCgATAatccAgGtgactgTGgggTGgtCatAaGAGCCAcgCtcaGCgGtacTGGCcgaTaCaTAtAGAaCG
+cgGaAtccATCgtatctTGCAgCTaAGATgaCCGCaTtCCaTctCTCaATCgCgGcGATcgATAataGgcTActtacgaT
+ttcATgTCTAActGAcCCGCAATcAAactAGAcctAtcGaaTAAcCAtccaCgGCAcGGGGatcAggGCcGTAaAGcTcG
+TCcACtCACATggTAAACTTtgcaGTttCCCAgCataCAaacAACGCctcAatCtGtCaaAgTtTaCgGGGgATcgTcCt
+atATcaTatCtCgGATAaaTAtAcAGgAaCTAGGTCTCtTTTccGatACgCtATGCgCGCTctAcgAAGacgCgaaCAgA
+GaCAtTtgTgaCGtagGatAcGctGcggatAaCcTTGAGatggCagTtcCCTGcgtgtgagTccGGgtAacGtGAtaact
+tctaccgGaTCatgcAATCTGtcggTaTaACaTCCGaTGcGgGGTAacCtCaaTGtATaAtActcctGATcaTActCaGt
+AAgCgAAcaATGTtTAAATtgTCTGcgCTCCTCcCCattaaCGtaTATtTAccAGttatGgcaAaGAcAAACTgAaCtCT
+tAAcGgcTgcCCctTGAtTttAaCACCAcAtAGttCTgAgttCcCcCCcTGtTaGtCcTTgTaAaATAacCCCCAgaaCT
+cAatgGTAAtGCtcTaCaacTcgCGgGGTGATacaGATCtaTagTctTtTGAGtGgTatggTcGCcgcCcGtgggaGGGt
+tcTctTAgAgATgaCcaGccACCgTtgTTtaTggGGTGCCtTAaTAGCctcAGTttCtAatAATAtACAGGCggccGctG
+gtctgTgTGgAAgaAgtAaCCCAggatACACGcatcTctatATAgcgtccTttTgCGcCgtATAcatacGGGCTTgTcCt
+ggtcgGtccTATGTcAtGtCctTctaTcTCaTgggcaTcCatTAACaGtctcTTCGtCaGttGcacgtgattGGgtTtAA
+ccCGcATcAgtTcTGtgCGGcTtGTcttcAtCAGaaTTaCtTcCTtcGcTAtGgGCaAGaCACagCtCcTgGGGtGATtC
+GtAaaaaCaTGatGCgTcaTGgcatGTGACcgTgCAGAtGtCtCGGCTCGCCACTGtCgcgCCGtCgACaaTCAgtATgG
+ttcaTtCgaTgcCAtaaatacacgcagtGcAgcTTaCcAgcGGTggttgCAATttTgttAcgGtCcccAgGgGcCCcCGa
+tAAgAGAcATTGtTTtacAcTtTatATGTTGgGActcAaCcTCTgcAacgaGCTTGgAgtGATagGACtTCccgCatgAa
+aAGCgcACATGcGcggAagTAaagcTATGaccCAAaTCGGCtCTcTGaAGcgTAgCAcCCtagGCCActagttACTatgA
+gaTTcCcTgcTgggcCGACCTcacccctTTAGCgTGCgcTCggtgcaggGaatTACTcTtCGAAtGTTTatTcCaGCcGG
+CAtcagATgaatCGagtCAgGttgTttCgcaTTAAgGGaAcTcCagTGGaCaTTtTCcGCTctatGCctTtAaTTtcCTa
+TcCGTcctgGacCgAGACcTCAGctcttGTcGTtCcTAAaAGCGtcACaCggaCCtttcgAtttgcgCtGtCagcgGGCC
+GcTCaCATGAAttgtGGaAtcGAtACATcTGAGtGcTGTAaCAACtGtCAAGagtggAAaGtCATTAcGGTcTaGTATTC
+GcGatTcagAGaTatcTttaTtCcAgccgACaataGagcTgtgAtTCttatATaATGGGAcCaGatGtTacGaaGGcaCA
+tgacGCttActGTcgCAtCTccGCTgAaG
+>2
+GtAATGcCcgAAcGgacCgAtTaaGTtTgGATgacGaTAAAgtcGgacCcgCaGcTGgCAGaaTcttcGCgAgCgTAAcT
+GTaaactaacGCaAgAGttGCGGcACgTaCcTtTTTAGacgAgGTtCgCtagCcGGGCGctAatgaCcTgagagTaGagc
+aCcGGGActaaTGgCcaGgttagTaCACaTcTgCatGTaCcTGGTcCaaCGacgaCacatAgCtcgAttTtTTgTTatGt
+TaAGagggAgGatcATAaACCgtgGAgagATTGGGaCAtGCaaTcTGcaTaTAATcCaGcCaGtAgttaCaatgGAagtc
+ttaCAcacGgTgTtcTGGcGtCgtAatCTggACcCcagCCCgGGTcAAGCcacGaACATtGCCGccTaTaAAaAtttGaG
+tccTaCccagatgtaTAAagATACtAggTtGgCTACgcAcCtAtgtatatacCgAtcGtctGgACTcCCccGaCcACtgG
+gaaTGgatTCCcAGgTAgGacTgtGTCctaTGCtATcttgCCCatCacGGAcTCaTCtctaCgACggTccTGAGGgaAgg
+GCAcgAcCCTaTTgGgcCCTCTGACAgCCctgGCcTAtCgTaGCactaAAtatCcgCCtCTcCtACgCCctaAgGaACGT
+AaaATCTCtgCCAatgaCaaAGtTAtcgttcgATCcGTtaCGGGgAATTacGATtGAtGttaCGtTgAgGgTTaCttGtg
+GAgTTAaCCcCgaGCCcTGcaTGCcTTGCgcGTtcgCgtGCaGCaAtGtAGTCCCgggAgcaGtTaaGgAtCCtaAGctC
+TcgGgAtcAagacTTTTacAtTaaAGAaatgTatAGaggCaaTgcCCtGAgaCTcTCcTagCAcTAAGcTCtAGTGtgtC
+tCGaGtACAcCcGTcGGtTaattacgAagACTCactAACtTgTCAggactCTTacAAGcTtgTcAAacCCgATGaATCCt
+AtcAggaGaGTCCCGTgCttCaACcttATcAtGctgTAatCCgtTaACTTCAGcGGCGTCaAaGAATGAaAaGggAGctA
+TcGcGCtttAtgcCTaGCTgTTgcCtaCgtcCtTAGCaGtcaATAACccCgcactgTCgGcTatTcTGGggggGaTtgAc
+aACAgTTATAAGatatAgATacCGTtAtacgctcAagatAcggtAtAaATgAcGaGCTCCgCGCgtAGTgtCCAAGgcta
+ggctCGaCaCCgGgtCTaaTGGTCGTtTATTTCaGgAtcttAtAtaaGttaaCgGcttccCggcctGtgggCtTAaAATc
+TACAACcGtAaaCAGAtcgcGgATggAgtggcTGcGATtattGtTgCCAcCgaCtaCCcTAAtCGCtcGATgaCCccaaT
+gGAAaagaaTGTAGaTCatACgTGgCCtTtCCAcgaggtCCtcAGcggAcGcgATcgtGacTcGgAcCTACACTGtCaTc
+GTAGaatGAaAGtgCcgttAGAcaAaaTGtGTAATtaGAAAcggggaaATCGGgattaCAagGCtgcAcTaactaAGTgC
+AtgCTAATCGatcaTCCatGTTatAggTGtCCgttGGgCGGaCgAGAATTtTgGTtATcgtTtcTtAGatcTCgAaGttt
+ttGttATTCgcTGcCcctGCcGtActtGctGGTgacaGTgttagATaGcggctaGaaGGAtACtgCtGGccTTaCcCTGA
+tCTCcaGatCTgacgatTTtCaCCAttcAGaGAatGgcAactAtgGaagGgCtGTaTcgGgCCctaccccCTACtCgaCc
+GATgtTcTATcTaAAgccCCtGCgaTACTtcATAtaGTcAatggtcgtatcaAgGTgACGgATCTaTCCgAcaGTattTa
+cgGTGGCGCgcTAaCGGcACtcAaccGtGTGgAtgGtggCgACcatGGTcGTaTgttgCcTGGGcGaaGACacTaaAtGc
+aCTaTccTtAatCTTCAAaaTgcTggGTGATCgTGAAcgGGTCCGAcACAcAtCTtTGTcAcaggTgACTtAAcAATCct
+ctTaatgagTcacgGCAgCCCcCTGCaAattgTaacGatcaGaTATGAAAcTGTTaGCAtcgttTGcTCGtTGCccTgaC
+AtgAcGaCCatagctacGgCACcAGtgccTATTcatggAAGCTgcatCtCAGgCcgTAAGtgcaAGgCAGAgAcATgcCt
+gtGaGaCCAtgGtAcATAACTTAGCGgTCcgAcCcgGTCtCGAaCTtCCctattAGAGgGctGgAaGtTTactatgCAGT
+aCGcAAAcGTTAaaTgAaTGgGGCtCgGtAaATcAaGCgccATaaacGgCATCgAccGtcCGgCtAAGGGGCAgTtGTTT
+GAATGGGtctttCtCGgTgCtGAtGtaAGTTccaCccAGAGcCTTcgTaGtcCAtaTcTCtTCATTAGAattCtCCcgtc
+TGcggcTTGACAACcGATcaAGCCaTCcAGcaCtccGtGatGTctTAATTgtATGtTagcTtcCtCAAtCTaTGTCcCGG
+GTagGAcCgCACttGCGGCGCaaAtGaACAtTCaaGATtggcTTCcTcTtCTccCtTgTGaTgAgaGcaatCCGCCATAa
+GGcaCccgcAacGCctttGTtcGGCctcAaTgAcacCGaGactTtAaCtGTcAcAtGCgCaTTaaTtTCAtCAGtcAcgt
+aTtAAtcGcTgagAgatcTTgtCGACTaGgacTcaGAtGCcGGgAAagACaaCggTtGtgtGgTACtcgCTtCTGGCATT
+TATgGGaTaAaTcCTCatGtcGTAaGgTatAgTAcGCGTtGaATGcAAaTatCCGtCtgtcaTGcCtaTCTaGGCCaGGC
+cAgTTCCCCAATAtCTgAaGcAtACgTTCcTcgtgaCtgtGttatTGgtcacTCTcTCgaCgaagGTcaGctTCAAcACc
+aTggtgGcGgTTgACCtGGGgGacaCCTCtaagTTagcTgAaGCGAtgATAAgactTatTtGgtacTCGGTCAcCgACtC
+ATCAaaAcAtTGTgtgCGGcaCcAtAgttCTcaaAgGaAatgtaCCATctGTGttcCGCgagTgAcGCAcaggGCGAtAg
+CTTAaTAtAcGtacagTtCacCaGCTGTActaggaGaCaCTCCtcgGtaGgcCTGcGTgTcTTtTTCgACtccatACCGt
+cgCccTTCgGCgAatgTcaCaaCAGTgAAtAtCCAACAaCAccTatGAaGCAGtACAccCcataaaATaGGaTgtTCAGg
+aATAttgTgtcgGGgCaGActaACAttgcCaAcGgTgGTttaaACACcGgtcgACactGatcGCagtACCacTaacaaAc
+aGAACgAGCTttctaCCCTgtcCGAggggcTTgTTCCTGcatCGcgAAcCgTTaACgaAaCgCAccAaatTTgCtGGTaC
+TcaGATagaatCacCTtCtaGCacGGTgactgacCcaTAtCgcTtgctttAGtTgAAaACAgcCgCAGTTgAcGtgcCgG
+tTTcaCttcGgCTAtAAttcTTCcTggCAgTcAACgGaaTcTtGCAgGTtGGCttGgACaTagcacgaCaAATtCATttA
+gACCtCGGgGGATCGgcgtgtGtTaatGatGcaaaaGGtAcCcCCTATgCGcCCCaTCacAGACatGaTGTaagCaGAgA
+ggtaTtAgatggGGcaCaCTAGACaACTTgcCCCGCCGagTGaaaGtTGctgaaCCtgCtCtAtGtctACaGTTatTatc
+gtCAtcCCgTtacctcctActagGctagcCcAtCGtTTcCCGccgGCacCTGGaacCaTAtTtCtGgctTcCTAGAgGtT
+tCacAAaCgGTAccgcGaaGCAcCTcGAgaTTAGtagaaCcAAtaaccAGacCAcgaGTCaTacaGtAACaCataTaGcG
+TACgGGGTAACCAAtCaccACctTGCGcatTTAtgccAccCccGtCcAAAAtTgTTtgTacATGatATaGCATaGCgtGA
+gcgAAAtCtTGCAaGTGAccAgAgTgggCagAcgcttgGcCCagaCCCttccTCcCCaTttAgTTGAatgGtATGatTTc
+tAtgACaAAAtgtcCcaAtTACcgatcACTTaGagtcATTtATgCCTGgAgCAAAagggTGaTaCggaTcCGGGTTcACa
+CccgAgcAaTcgtAtcgATtCGGcCCtGTGaGccAtCtcaCtgGgCAaaTTtggAAggTtgATCgAGAAAAattGGAtCC
+CtCCTCcACacTTGGTggaTTgGTcTGaCacAgAAgAggacgCGtcCGaCcaAGcAgAtcCcacAGGtaAcCAActcTca
+TctCgTcTaTCGTgGACGTctaGaaAtacTTACTaTGGTgACaGtAGaTaTGgtggTCATTAactgTttAGaATCATGgt
+AAcccAGtCttAtTAAtCaCtGaaaAtacAATCCGgcCtacCggGGTtcgTTTCaAgCgCCgGcAgAgCGTcttcAAAGC
+acCgCGaccActTCCAAtAgCgGGctaacctATaTCggCtccTtACCtgctCTTAtGCgctagAcAggtAGCGgcTAtAc
+GGGTtTCGGTTtCAAATCccaAcgtttcATgTGtCTtGTCcGcAgaatgGCgCgtACACAAcAtaTgATtTCGCggagcC
+aaCagTcgCcGGGcAcgcGtaACAAggcGAcGCgAacaTcgaatAgaGCgtCtCTGTGaagAGTGccCCGAtccGgGgCg
+aTTTgaCTcGGCACgCccTggACTtaAGCGActTactAaTctcacTAGgccATgTTTGAAgGCaCataCAtaCTGatGtt
+cAtgTGgaAttaCaatGGGAGGAgcgcCaGgCATtTCaGtaGtatAAAgCaAtcCTgTatacgGGctaaTGctTtaGgaC
+gTcGGCCccctaGgCTCTTgCgTGgTcgCGtTTcTGCtCaaGaAGtTTGtCccAcTatTTCGGGagagCCtGATaCttaT
+TtGtaCaaGAtaTaCcTtccGGTcCaaCCaTtAcACCacGTAtaGCTGGGtcagATCttCGAATTccaaCCgtccTaCGa
+gCcCtCaCcGaGcgtgGACGtCaGTcGGGTgGtaatcCTgGggGcGCgCCGgTcgTGgTGtGgGATgaAgtAactAtCTt
+aCaGTCGGGaaaGcacAATaCtcCgCCAgcaAcTAtggGtcTGATGTtaTttgCtcGtgCAtacCaGAGACTggaaATCc
+CCgaaGaTAgggTCCcGTatcatcAgACcTttGCTacaaAtTATCTCtgCagCcgtcGaCtGgtTCCgttCTCtcGTAtg
+ACgTcGaATacgctCgtAtcgCTaAaGaAtgaAtGGagGtCGTAACtgGAgaCcAGtCcGActCagTcgtaCgAtcgGtG
+aaAGGgaACcaCTgCgTCaGtTACTGcAtTTCGCcaTcCAATCACgAaCCcGcCagAAcaAAaTCcCCatCtGCtcCCct
+gGggTGGAAAgCtgTAGTCGcCgcAcacCcCtCcaaCTCGaTGtgCATCTGtaGcTagActaTtAtGGcgAgAGgGTgGg
+CCCaGccAAgTgAacgGtGttCaAgaTccaaCttAgcTgTcGtGctTctATAcgcACtAaActaAACTaaaCgcCgggaT
+ACgGATTTgttcATaAtGcGGgaCGCAAgaAAGcgcgCAcAtgttGgCtgctcACTTAATactgAacgacAACtaAaaCG
+CAcAaAAaGcCGtAggaAtTaGgTgGgcggtGgATTCgActGGCCGTtaCTatTgcaAgaACgGagaCGAtCgaGtAGtt
+aatGATagTGtgGTCGAtcGCGcTTtatGcGAacaTAaagGTttAtAaCtaTCaaTcGttcaAatgagatCTgcttgGCC
+GTtACcCAAaAacGAgTtAGAtcccAGAcgAAgGcaacTTggGcaGCGgTtATAgTTtttaaTgGagAcaAaTgtTggCc
+tTgaTGgTACgcGGCcgCtTAcAtGtcCCTggcAACGTAgtGcgtCGgTaAAgcCccGTCTTCCaCAGcAgGtTGtagcC
+cacagCTTggGCtaAAAtCTcacaGagTTAtaggAaTgACcaCGcgtCgcTgGCatagaAgGGAttGcTCAtATCTCCtA
+TtTGaAAtgcCAcGGgacCTTgccATgAtCCgttCGTgCTGGcgGCTTaaaAAAgAtgAACtGTtTaGgTcAgAaCgTGG
+TGGgAGaAgAcaactCcgCcGAcATCCGtCGGtgAGGAgGctCtGcACtAGccGaTCCAaGTTTgtaCtgGtCactTttg
+TaAGaaAtCgcATaggttAcCACcAcCGcaAACcGTTTtaTtatGGAgAtgCatCtGTcAgcAtAacGTgCagcCgAata
+GtGGaGtaaaGCtGagCagtGgcacGTgctTGGaagACctgGATCcTGTtTTtCaaCAcTAtAGTaTCtGGgTGCcGgcA
+ACtaCGGtGtAcAggcGgAggGGcTtgAAaCTGtAtaTAAAgCcAGAGcCagAgTcTGtgcctACgCgaAAGcTgatTGc
+GcAGccAttCGTcGGctgagtAgCGagaTCTaAaACAgACtgATGGcggAgGagctACGcGgctggcCttAgAGtgCggG
+aGGatATGGcaAAtCATtGgCgcCcagAgaAatCCtagaccAcatgGGcCTcCAgagCAAaATCCCaCCcTGccaTGGCT
+TgGgcaggAgCAaTgGGaCcaGtGAtgaGTcgGGgtaAcTaACctagCgAgGTCCGGCgGATTAaGggTaTAAGaCgTaA
+CatAGccCAccctAaCGTtaAatTcgacAcAgTCcCGTTAcCAAAtTTCtCgccGTactTaAtGCaGCGgtTTatgcggC
+cGaGTATAGtcGGgaTTgCgAtTAGtttatActtatCCTcACAGGAcCcgACTtGgcgagCTAaCTGTGGcaaGGCtCat
+CCCAtCacATatCgcAgagTgCAGcACCAcctGcaCGTGCactaGtctTCaAcgGGaAacGAGCCCatgtgGggCCgAAC
+cCtccTtcGtCTAttCCaAaGgTAaAttagcAaataGgAcGGcgggCTgatCggGcTATGCtGcGCtATGCcGTcgtgCg
+aCCAGtaaTcCTCCGCaAcTCACaCCACCgagATaTAcaagccACAgCtgccCGATGAACGtaATgtgCGTggtaCtTAg
+gAgtCGtTccCgtaacGCAAGgtTCcccTGcaCGTgcGTTggAcTgGaTTCtTTaAaaacAatcGtcgGTTCGccgTTcA
+CttgTacTACttaGGgTtCtttcTTtAaTTctgAAAtAGcTTCcTaGGTcTTACTAcgTTgtACtAaTTaACGGgAtTAA
+taAcctGcTAAtGTACCGtaTcaCAgtCaACGgTTTggCTTaCAtggTTcgTTgcatCtCATgGCCGACtGATACgGAcT
+GagtCcATGttCGCgGgcaGGCTaCtGcCCgAtaaaAcCcTCATgGTTtcatCTAGacCCgGGAACtCtTTCgTTTacgg
+GTcaGatGGActCtAcTCGCgcCTTGTTAGGAccCggcGGCTtTaGGgaaTcGaaaaTTTtAgagACgtCCCCGaaaAGg
+TtCgTgtCgcgGAtgTCCAGGActgctAggGgTTaAatCaTcGTTCtgcaAcgTcCGAGcTaCGaggtcCAAttCgAGTG
+GTTcaatACcgcaAttCAaaacgagTgAAAAtCtAGcGgAAAtagcAaAtTcaCcAgGaGCgAttgtAcCGaGcatAcAG
+cttgctcAaaTTtaAtATGAGAaAATgActCGCtacctaGCgTgTGcaaGGtGaagcTAGccaGGcCTgtaccGtCcCTA
+TAGgGGggAtcaATgGaCAcCTGccaTgcgcCttgTCctACgCCTCGCctTcTTCtCcCAAaTGAGGgTcAagCAAgCCG
+taAAaatgGcTagtcgGGTcACCtTcTAtgAtCcCtaAcaGgCGcCaCCAgcGTTtgTgCaGttcaGGacCAaaTCaGgt
+AaGTTgGAtctaaTaTTCCcgcTgagAtTTgGcAcatctCctgtTctAagATtctgACcTccCCtcAggcgTATCgtCgA
+aGGcgCaGGCCtCCgAatGcatcAGagAAtcaaCgtaGggGaCgCGAGcgcaCtATTgAtCTaagtaCaGTggcGTctGg
+aCAaCgtgTgGctaTATgtaacctAGaAACtttAtaTGGCCcgaacattGGTaAAcaGggCtGtctATGgcaaTtGgGGG
+tCacTgcActTtcaGTtCcGgtaaggTatCACttgATAaaTCCAtgGGtaaacaCCAGAtaTgACagtTGAcatttAgGC
+CcGgCccCATcAGcAtgcaTCGTAtTCCAtCCTcCTAtTGgtAATaTcctTggCtctctAAaCGaCTAcAccCgCggaca
+aaACcGGgTAAtCGtcCTATCGtGgTATGTtcttTcTtGtAGtcTgCTtCtatTagGgctgaCagaGCaTcgTATatGCt
+TCcaGaaTtgcAtCTTcAGtcGgtagGGggcCatcTCGGCcgcacccaCgtTgCaTTggtAGaTtgaGagACgGggTCTg
+taTcaAAtAtaAGCGAa
+>3
+tgGcAgtGCgcATAgggctCtTCgCggCCCcatTGtTAGaAgCATgttGaAtccTgactTgagaTtggtACgTaAgTcAG
+tGTTGAaaAGaTaTaaCAacAaTCTGagTagAtTCGatAcTaCCGttgCATagctgTaAataaTaTCaAaatccCTagTC
+AgcTgAgtcCttgTtGaTccatCgactgCCTttAtaTgcTGaTcACaATacCTttTcAcAacATaacgaccaaaAATaTA
+AGGACGactATcTTccTcAacAgATtGgTtcCggaGaGtAaGGgtgATaGGgtTCcTggttgAGtacCgAaGgAgtgtgt
+atgGcATcTgGGCTcATcTGacagGCtGGAtgGCcttctTgGGtgggCtAtTGtAAatacatgaGGGgGgGCAgCGttTC
+aGaacCgACTgGTgCGCtttAgACGacgCtGgaGgacgGgtGAcCAaGgAGAGtTGGccGtAaaGTAgGGCCAAgAATta
+gaTtCCtgCCCtAagaCaCCGCtTcATTCaacggaCCTTGAagTGtCTGaTtAAaCgctgaCcgGTAActaGaGGttacT
+gGcggcaTtgCTCGcATGCAcTgTCaGTgcCTGtTacAccaaAAgcCcCATttaGcACagTgttgtTTGtcaAaGTacgT
+atGcTTCCtAcaGCcAaggGCaGgAgctGAaAAAAaatTatAAGggcGgAgAcaAGgcTgaCcgAAttaAGtGaAcCGTg
+gtTgaTgAcTAtTctcgaAcggCcGcAAACcAGTaAtAttgcggTGtCGCaGcTaTccCGtGtAgAgacTTGGTgTgAAg
+CATGgCTTTTACATaCaAtcgcaTCtCattcCtgCCTggTTTgcTGgCcgCtACtTtCaaCTTccCAGaTgcagTAcgtt
+tATGTTgGGtaaaaTaGGaTaTaCTtgcgTgtCtCctCCtcaatgGgggaGctGAAtGcAGTTTCCGATaGTTACTATag
+AAACaAAtggaCAGtAAgGgGaTaATcCCcTTtattgCAtATCTagtcCCtAAAgCCgtaagacCcaACACctcTGagaa
+TACTTCgTTGgatcCAaGGCAtCcAtCtTaGTgTAGGAtaCCAGGAGTgggaTtcAActtgtGGTgCgGaCacCaCAaAA
+cCgtgctGGgcCGCtCcttgggTAAggatGgggacCtgCgAacgAcTCcatCggGGAGcAGaCACTgACtAtAaTatGGC
+tcGtGCTTAcgtgcAtAccTgCAGtatAGTgGGCaGtcCaggTCcCaagTcGacgGtcCaaTGAagGGtagACAATTTat
+GTcAttaTTTACGCAtGttGgCaaAGaCgTCCccCAgTTgtAGGTATaAGcAcaATTataaCcttgcaCtATCatCGaCa
+atgCGTAcCagacGCtcatCAaGaGAcTTaCtGCtTActGattcgTTgAcGaTGAaaCTAgcGCAcAgTcgGCtAaTTgG
+aAATggCGTACggatacTtCgGtaTgggTGacTctATAtaCTcgaAAtTAGgaattccAtcaatAaaAcATTatatGggc
+GtaATgTtGAAATCctTACgggTattTTAcaaggGTcACTCTAGtaCAcTCGATaacgGATGCgggCcgaTCTaGatctt
+aGGgTcgaTTgAactGcTttcCtgCTtaTTtacAatTcTaAtGGcattcaCaagCgaCcGTAgCgaCgatCcgGTaTAaT
+ttaGcatatGaAgTAggcGaGgaTCCAacgGaaAgCCggCgCcgCcGTtCAAacTcgaGGtaAtaCTgcaTTgAAGccGt
+AggCTtcgcGAtGcCcaTaAATCgTtTaGTcAGgcGTaCacaaCTAaGatctttctTTCTGtcaatCtGCaaTgaAGCGg
+CacTAGgCgTtaaCgGCTgTgagGGATGGTCATgTcGCTAagGcCgCgttGtAtacTgttAaAtcGCTCtTcacGgcCgt
+TTgTcatgTgctcaTTtTtAtgcTaAgcGgTgcTgCATGGggACaAAGAtTAaCAgcTAaTcCTTtgCgGcgTaTaaCgt
+AtGGgTtAGtgCttgCTtTggcGGcgCaaatctTcttTCagtGaTaGCcTTCtAgAtCatgTgAGaACgaCagCaaTCaa
+GTAgagatGctAggCatCggGgGTACCcaTAcgCgtaccTTcatTTgGGCtCAAaatAacaTCcAACcTCgGtCTGgCTt
+GTCagCtAtcacGcCtttcaTggGctCgggtgCcgAGGAAGTtTtggAGGctCTCgCcCAtACgTaTCGcAtttcgcatt
+CgtGCAtcAtttGATgtgAAcGcAatGcATcgATtttTctCCTTGtGAAGGttTaGTtAgTGgtTGgcGActgGCaCaTg
+aCACtCTgCTaaaCcatgAtTCGGTGtttTGGtcGCcAtgCcAcGgAAgTagagcTATcaTttcgTAaaATatGctcggC
+tGTgCAcGcGAtGcAGTgAaGtGttatgaTAcccGtGgaGGGtTtCgtTactGcgAagACCCgaAAggCctAaaGAaCat
+tAcATtTcCGctTTCAcgcCCtgccAGcagGAaAGAAtggCaACTgGGCTAcGCGGGtcgtAAtggtGCaaGtATGCgCC
+gtCagtgtCgtGccCacCGtgaaGaaAAaaGAcAagTGcGtGGaTAatGtTccGTaTTCGaGgCTGAaTcTTTAcACGcg
+TTgaCTCcgTAAcTaGAgtGcccAtcGGtTTAttCtTAccgAaTACGagactGAacgcCCTgcTtGAcaACtaTaAAGcC
+GggTTGatgctaatcgCagaAGGGAaGgTtTGaGCGaGattAAaGAgaAaCgAAGAAAgcGatGggttTcgAgCCCaCcT
+cATAGAGCGCCacaagGaaaGCtGagGttTAcCGGaTatcTtCGGaTaagTGcCTtgATcctCcGctatcggGCaaAatA
+TTTaGgATGAtggccCGccaacacAtAcAgaTGCAaCGtAcGcgaaaaTTCatTTatTagGAcGAtATgtGTAATtaTCc
+tGcgaggggCTcGCcAcGCcGttGcaGACgAtaaATATAacaTAGTggacAtCaCaCaaATCTTtAAAccTCgGCaGcgA
+TtTCGtggTAAtctAaatTcAcaCCaTtAcaCcACACGaAATAaCgGgtGtaTTtttCgGGttgTACgCtTctGTggCCA
+CtgggtTTtGTactccATATcCAGgtAcTATggTaTccGtctGccAGCGAAagTAaAAgtAtgggATAgATgTcCCCgag
+gAttgTcTtAcCGaCGaCTCCGTatCgGCcgccTaCcGtATGAcCAaGTCCaggCTcAgtacgAcCCcaTGAGTGGattC
+TaGTtGagGcTtttggacTgGCgTgaTCTTCcGTgACcAGcgGCtCTcGCAGatagcGCAtgTTgGAgcCcgGCcaAGGc
+GTGGTaAaGTCGGgtAtaAAggaACggTTCACaGgGgTccGTACttggcaAGTTgcttGtAATTgCGAcgCAAtcCGgaA
+ccCCgGCGgtCgcCttAcGtgCgGcTCtgTgATATtgtAgTtACCccTtgGCAgacCgaCCaacagGTcgttaAGaAgcA
+ccTtGtcTGcAcgtAtGcTaGcctccTaGttCGgagactaCCATGaccCGGGcCgtTTTaGcgTAGgaTCCcTtgtaTaG
+cGCaTCatGgcCTgAgaaATtctAGTcgaTCAaTGcAcGactTctccaaTGGGGCgtcCaaGCcGaatcgTgaATTAggg
+CtaAAaTgtATccGaCcTgCgTgtGcGggTtTaTgGCaactCTgCtcGTTggATAtGTtcTTtgAtCaCTaCgcAtCgcT
+gTATgCCCcTaggttCtTaGagAtActaaCATTCtcatcTGCcCtagAAtaTaCtCGaTtgaccCtatAtGgCgcaaGca
+ATcgGCttTGACTgTcGTATaTaGgGggatAaGtagaACattCcGAAaACCCAcTtagAAtgcCgtACtCactTtcacGG
+cCgCCcCGCTagCcAcGcAaCgtCgAGTgAtgCcttGccAtCtAgGcgAaGCtcCtGAcagGaCgcCTttAtCagcggGg
+GtCtGATCacaCCGgTCgcgacCTGAAGgCAtAtTTtGGTTCcTGaCgtCctaaGAacAaTaTTcaAgAAGGaTTCagTt
+CACgAcTTAGggAgtAatTTGaAGacATCTaaTAaTtaagatGAtCgCGTtaAcAatCcGgtgAcAaGgaTtAtcCaTtc
+CTcatTgTAcctcGgcaccTaGGagaGCGAaatTgTACaAaAGtcAgaaTGaTGGgACCTTcgCgTtTGagcgtAgGtTt
+gCtaGcGggaGttatCTgatggCCgcgAACTctCaTCAaCCAGtaAAcCgaatAActtgCggTCtcGGAAtcCTaCaGCa
+AggGcctCTtTgTggCAGGggcgtTTgatatGgTGactACcCGAgGAGAAACGTtgcgAaaAcATtttGactaCagAgCA
+gtGctTCGTattaGtCTCTgAcGCGcaaaCGGTCgcaaTAgTAAaaTCGgCtGTTgcAacTaTgTtGGgaacCTaTGgTa
+GgcTGCtTtTgcaGCaCTgCgGAaCCCCcctTtGgcGtGcgCaagCcttATGTCGCAattaAGGccgACGgccAgttagg
+CcTGtGGaAttTCCGGGAgtCtgCgAAGtTaTgcTCGcCGTAgTtGCagtGAGgtaGgCTgggaTcTTaAGgtaGTcgAc
+CTTaAgGagaAcCcaGACGGaTTtacGTGagAgAAagccatgGcAtTaTgATAatGACaACGacTgccggaTCGACacct
+gcTtgTcACatCAcgTATAgTCtCCgAgCCccCCgatGttcCgCGgatGaagcccatgtcTGAtatCACacGCCaCcacT
+GCcTtCaATtaAtaAgacGtcgATCGaaggCgTaacGgtaaTtaaGtAgAGcgaacaggTcCTGCtTGTGCCgGtCtCCa
+TTacAcaGcaAgaGcgccTtacCtTTacgtgACCACCCAcTGTGAGtagACaAAGaAgtgCaTtaactGTgAGgagTcaG
+ttcCcgACgGgagaAcaaTTAAgggAgTtaaAaCaCcgGtTTcGTAtcCactCccTCaTAactgTAACCtTcTaGGAGaG
+AGCAtCcAAagGAgCcAtGCcaaTcGttAcTtTTgGTCCATcGAtggctaggGCATtAcacGgTTCGtgGACaggctcTC
+AGcGgaCAcGaaaaccACCTTggCAAGggACTCTGggACgGCaAcCAaatatgGggCCgtacTagaATcCgCtTTgCttc
+tCctTtAATCgaAaaccCAGggtagcaaatcccCAtTgaGagTcaTCTGTgGtTtAAaAtggGGgTtccAAATacAaGGg
+TAaTaTatGaGgAacAgAatcgACTTTGCAGtatataAtGGcagttatgctgaTTaAtGcaCAGgaCagAagAGcaAaTc
+gaAAaAtaaaATTCtgTaaggGcTgcagTaTGtcCcTATACGCTACcaTTAaTCagTtgcTtGaCgGtGacaCAaaCtca
+aCGCgCaCAttAaGAgaTTAGcGCaCgTacTAGAGttaTAccatTAAGCTGtACgGGttaTTTCTTagTTcCAcCCGttA
+agaAGcgtcagTTCGtatGGggATgcaAgTCACtcgatgcTtTaCgaTgCTCGAGcTcctaCaaAcTGAACaAATccaTt
+TcTTGccatTCccAcCAAtAtGTGtctcCCCaAagtgaCGTTgatgtGTCtAAaCAACcATGGggCTgCgtGGCgtTGTg
+aggatttTgCatcacgtcTggGAAaTgcAAATcgatcCAACTGCTGCAtaTCAtCCAaaGCTGAAtTAgcCTTTtCgcaT
+gGggcTaATCcttgTgtTaTAAtTGCgagTcAgTacgtaaCGGGcCGaAcCCCgtaGCatTatGtGacttTgTgcCCAtg
+CaCTgtGATAGTgtgTcTCGTaGggAAGaGCaggtCaTGGaaAtTGtCTAatTtAGGCAgAtaCtGaCaCCTcgtcTcTA
+gaCTGcCaaGCTtcttTAaAGtgttTGtcgGgCgCacgAtgGTCcAAtcGtAaGAacCATCCGagtTaaTtGAgcgCAGA
+CActagGgGCTtTGgGCTAtaGtGCgtCATCAagtaaTatGAAaCcgCgtTGaggCCttAAAGacAcgTCGGGtAtAtTA
+tcAGAGttcCGTAcTaaTCcAaGaatCAgatCaCcccTTcTCGcTGTCcCcACGGGCGttAtgctTGTgaagAaCgTCAG
+CcCAaAgTAACggCcgCgCTTctcccCCaTCTctggttgTCgcgaGCccgtGaCcCCtatTCaCcCgcAGTcAGtCAGgG
+ACagaCGCaGtTGcGATgaCtCGcAGagCTAAatCctAcAtGcacttaGgCGcaaTCgGgaaCTaaaAgtgaaGtTtTaG
+CATTAGaCaagTcgCAaataGTTcCGaTgctaCctCtccGcGTgGTaGcCGTgtgTTCCAtCTAagGctCgggtgTcgGC
+CaAaCTTAAaTgGgtAagTggtGgCgtcTaCaCCtgTTTgcCacgccAccGCGttTataaCCAGCcgCCGcGagaggaCc
+GaTcTcGtaTccaAtCaCAtACTaCTATGGATtgTtCAccCCaCAttTtTcGaTCGtCgGgTttcGaTgCtgGgAGTtcC
+GcGgCCTggaACTTaaCcGAAAcCctTGGCcaAaGaATGCGtATgGtGtAGAAcaGcCGcACgCAAtccgaCaAcaTCat
+gcAccaCaTttaTCtCatCCcaTTCgtTGtAaGacCGGAccGACGgAccTACatagTGGcAggGtCAcgCtGtgtgGAtC
+aCAtcctAGaGGcCaAGAACCgctGtGccCgcCAGTtCTaGCTCcggagtggGgTgTtcccGtCacGcCTgGgCaAtTgT
+ttcgTATGtAGCcgatttCCTggAGTctCggGaattCaAaTCcacAGgaCTcCtaCAccgcCtGAtacGgagacCCcatt
+GGGGCaaGGCGCtcGGaAgtACgGaCtCGCgAgAaAtaTatAAcaatcacGgGcAccTGGAaTTcgCcAcATggCtAaCc
+CCggtGcaCGggCtTaaaGgtTgactATCaCcGAGcggcATaGTCTCccgATcagTgtaaGTgCGGcgCCGgCCATtCct
+tCgggTGcttaaGGtaGagTaCcGTCGtTTaGtgTccgGtGgACGCaGatataCAtAagGGaTctaAATAagAaAcGgta
+ACTcGggtcaTAgGaaTATgatGaTATacTATGAaatCacCAtgCGagaaGCATgCaGtGTCTGAACGAACcgAtTCGCT
+aCGccGTgCgtAaaTGGagTgGTaCactAGAggTGCgGatGcagGAgccAgTCGAGaATTGTaacAgcACcgAtaTACcA
+tcacGCGAcacAGTcGCGCtgACAaCtatcGgatCGAgcTgcCgCcaAgaAAaTtAgcTgGAAGcTATGtCgGCTgAGAg
+gCCaCcTCTACGAGcAccagaaAtGcTAatTGCcaACGcCTgccCGTgaCCgtCttaCaaaaAAAtgTcGtCCGggCgGg
+aAaActCTgtTAAagATGgCAcGcctTgGGaaaCaGGCcttCCCaCTCgcaGCTccaccaTAGCCcgTGGCCgtAAGCGg
+GAGGtaCagCgGCAaAggcGcCAGtgtcCCActTCtTTataGTAGCAACcttaGAcCtaGgtTTaaGaGGACTGGATttC
+gtTtATgGaGaAtggcGaTcgCccGcgtCCCaaaCaCAGtcAtAcaGcaacgTctTTctAaAACtCtgcaAAGttCcGCC
+GgTTaggcTaaCCtGacTTGTTttccAGACCtggaaggtACGGtgTcaactGcGACccgCCcATAcCtCCcGaTTAtcCT
+TTCAtCtCCCgaAGGtgACTactccCgagCaGCGtGTTgatAaAgtGatCgttActAaGtCtTttcGcaTATCCcAaggG
+TCTTCGGAcatcCgaagaaCATgaAACGgagGCcAtAtcgCtcAACAACtttctccgGcTcGTctCAaacATacaCaccg
+TAactaccgtAcCTTcgTCtacCtGaTtGTtAatGAAGcaCcAtCCAaAAGGGgcTtTTatAATtTGCaCTtAtcATGGg
+tcTaGaaTGAtTGcCggTtagAtGtaTAaaTaAcAAgaCacgCGttAatgAAtcCgTCcagTctgttaAacGGTAtaGAT
+aGCGaaaTAcaAcGaGaAGgacGTCTGGTgtAcGGggACcagtGtcAatgGATGaaTTATgAtaagtcATacCCgTGaTa
+agacATcCGACGTtCccAgcaGttgtaAgtATTTcACATAAaagGTgcGTtAGCccGtataTtgAggcTaAgCAcgttcc
+gtACaTtCggAaTTCcaTaatgACgTaCcTCgaCgtACCcTTAcgGAgaaAGtAATtatcaTTgaCGAaGTAaacaGatC
+TGGagTTaTcttcATgAaaTCatgTaAaCAaaGgaCTAtcgtgTcTctaCCctACgtgTATAtAaAtGagGgaGccttTg
+GaccgcagAtagagtGcacattTATaTCTcTtgGAcGaACAtgaaagaGGgtcATcacgACcAAgagAgGGaTCgTTCgg
+CAgTGCCCACctAAGATccgagATcCtgTGtTTaGgGaAAGgGcATACcCAGAAGAAAcTgtGAaaGAtAgtcTCTTGcG
+catgcCtgGgAgATaGGCtccctTCggCAAagAGgCggTattgTtTcGtTGacCcgggtGCaTTgGtCggttGAATtgGc
+CtcCctgTtgcTG
+>4
+CGgtATaTacgcACAAaAacGcAaAgTagAcTTTTggCgGtaaGaCCtaTgAGgcCtagTctttgGaGGCcTtgtAAaCa
+gAGtcgaCgGgCTGccGcAtGGATCATTGggGCCCGGgCTcTTTtTAGcGaAataTgAcgaGtttCTGaAttGtcTACCA
+GaGGCgTGacGTaGgtGCaGGTAtAatatAcacCTGgAgAcgCCCGtAgActaTATcCcGGGgCatcCcgaccctacTGG
+TtGCgcGTggATGaAattagaGcACtTTTacCaGGTtACTaATcACACgACGatATTTGcaAGTaGtCcgGcGCgatGtC
+AtATTCcccagACAaTGTaggcggactTCgtATaCgGCTcTCcAcgGtttgtgAtcgcTCcTgGaGaCagcAGacAcgCc
+CatTtAGAcTCacGGGCCtTcgtCCcacctctTcacACccAaAtaccacTtaTActAtAttGtaACaCgTgtcGaACtTG
+ccaTgatAaGCACaCGatgtctTCAtaAaTgGCcACACAGcCGCtTAcCTTaGaaCatAaatgGGTgATAgggtcgacGC
+gaaCCTaTctctTCgtTaTttatGaTcAgtACaTggTcgTcaGcCGccaAgTtGtGTaTtAtGCttTaaGgAATTaACAc
+gAtAGTAgTCgagtttcacGAcAAGcTCCccGGTCCtgctaaccTAcTCgGcgttAtaACcccCTtcTCgctACGCTAga
+AAGTctCTCgAgtaAtCacttTTcCCAttcGaGCctGgcgAtATcCGtTTTcTagaGcTTAgacCCCccTAggcCtAgat
+CtctCCTACgtTGgcctcTCGtgAcgAccAaAggTAtCACCTaACagaGtcAAGaCGtaCTTCAcatGtcatgAcTcCaT
+agGcattgTACaccGcgcAGgcACtaTGaGCGtggGggAtAgaTgcgcCTGAaCttGAGtccaCAGAgGAACGAAcTaAC
+ggAaatTGcATgCGAcCtcCAGgtgtCcActATATTatGgACCaaGgTccGGTagtgTTCTAcTATcATCacaCTgACcc
+cGcgcgAgGCCTttATAgcAtcCCtaaCctgcAAaCgATgcctGTCGgTcatGtcCcAAggGCCcTgATAccgaGAgatT
+tAtgcctGCgCACttCtTcAagCggaCAGCggtCcgTGaacGgtATCaccatGATctcatAcACaCtcCtAGtAcGcATG
+TaAGCAgcgctgagtAGgCgGgGctTagCTGAaCGcggAGGaAtcGtAtAagAGgcCTcTccCagagtCATtCCaActAa
+ccACCTggCAaaGGCCctCatCaCGtGGcGgTgcTTgCAcTcTaAcGAAGTtGcctcaccgagGAcTgAaCGGCGcagtg
+cCgGcCActccCAgcCAGgAGagGGttTcatAaTCTttCGgTAGctttAAATGTaaccttActtTgCAtGTTTgAgaGtC
+CaAAGcAtGAtacGGTCGggGcaCccAGGtTGTACGgcGCACgTcTttTCCAcggTtGgagaaACcACgccaCGTcGTTc
+TtGCcAGACAGTcATTTATCaccttCACTCtCctTcgtAcaCcaAaGtgTTgCTAcAccGGATtGTAtcTccGcTttcTa
+tAacCGagtgTtCtTgggacCGGcccgctcCgCGGaAcAtCcttttTgctcgATGTcTcCgatgcCGTcAgcacActGCg
+CGCaAGagTGtgAACGtAaGaaACCAttaccTAtTATcTgtAaAagcTtgaatgaatctTtcTCTAGGGGTgtGgacagc
+TCTcGaccTccTatgTTCttcTCcGTtCTTgatTATtTACCaCcggag
+>5
+GGtTaacCtcatcAcaaTGcAaGgTaCAgtcgACatAtcgTACTgaaaCTTtCttCccGcgCaGttggTGCgAGaCTCcg
+ccaAgTTGgtAgTTTCAGaTaTAccTcaggTtACGggTgCGGaACACCAGGtgGCatggcCggCGtgGGtGcagTtGTgC
+TcaaCATcTgAgCaAgAccaTtaGAcGaGtTGGCgTccACggtAATtttGggGCtgAtCcgggGaGtcGAgtTtTGGGgg
+TGcaaAcTGgCgTGaaATtCtcAgaGaaCaAGgTtCaatAaGattgAagaggtTCacTgACTCCTgTTGCAACCCtCCGA
+cctGTcAGccGACTgTaAtttTcggCAAGgGAtAttCaCtCTAcCgcctctAatGggaATAGCGCcccCCttCGaCCgat
+AtAcCACcaattCggaCtGtCGCCCtAtttAataGCTgctgTcGcgGCtAcGcccTGTTagcGagcGAttCgtAAgtaCG
+CCacAGTAActATAaGGCGCcTCtcaaAGAGAGccTtCtATcAtagTTtTtCtgTaTGTAtcGGGcCacCgggTTcTATg
+TTcTGAgCCgacCcAtgGTTAttTCgaTGgCTgcTcagAAAcTGaGgCGATTTCgcattGtCAcagttACcGTTAGTtTA
+AGGTGTtCtAAcGACgtGTCTAtAGaGGCactGgaCaCctgGagCCgCttaATCtgTCATTtTgagCAaGGCaAcaAGCc
+CtCaCAtcacCcatGTtTGCcCAagCAtCaTagGCTGCcaCGcCGGacATGCaaAggGGCCtGAGAtCCtCtgCaAAggG
+agtgcacgcatgGtGtGatgTggTcCaACcGCAgGGgTAAcGCtgaTTTGCcAtctTactgtTcTttaCcCCCTACgTta
+TGACaCAAtcGCtTttgccACagAACGggAtTCAAagcCAtAtATAcccgTcaAtGGACtTAcctCcgacAgTCcCTCGg
+GgCCtgtTttTAgGGCgTTGcaCCTtAtTGgAtaactaaCCaAaggtaGgGgcaCtCtgcagGAgCcTcCaCaACGTgTa
+GcgCAAcCtcGcTTCtTGGGAtTtTcaGCCAGaAtACaaAggGcCgGtAtCTATaTccAacaTCtatTACgTcgGCGTgG
+gAgcTaggATTcaCgatggTTaaacTcTTAGaagAcaCcTCCtaATCcATTTacgcAcAGcTGTtccGTgTagAgAaTAT
+AtgATAGaAAgggACCTTgAtaAGgtaAAgcCaTAAgACcCGGGcCaGgtCACaTAGTttATaTccCAcCGcCCtACgat
+CccCGaccatgcaAAgAAcctcgCTtGaGaCaTACtcTgtAgtCGATATaAtGcCCttaaTAtttAcTCCACGaggaTAG
+TTccgTtcgCtGcTGTcCATgGcGaCcTcGaAcCgTCCgGccaACgtTGaAgtcgCCAGcgAAcCcTGgCgGccaTttaT
+AGGaatcacGgGgcGGctAaaccgaTCcgTcgCtTgagcAcGGTATGGAttCgTtTAGtTAcCccaAAACGATAtCtGAt
+AttGcaCtAAcGGAAtCTaGCAGTaTggCGcAATaGtACagaccCCAAtAcGGaTcTactGGTaTCGcCacTtaGggcGt
+tTcGACCGgaaTAtcaGTCAcTcCCagGTGCAcggTAgtacttaCgCAgctaaatAGGgAtcAaacaGcCtaCccGTGag
+agAcaGcTAGTaaAGgtACgACGacGtctTagCgTaTtTCccCTCTCTTTTacGaaCGACGCCaaTGgTGtTGGCgATAC
+AtAtGGCTCgAgTaCgCatGTccccACAaCCCAaaAggGtataCAcAAAtATAGctGgACcggGGCatGAGttGtTcTGg
+GcaaGcaatTCTCAgtGcCCaTCtGtGcccacTTcAgcCgCtaAGCAGgTAatCcacgAacCGgcgcCGTaatGcaGacg
+gCGGcgctCcAAaTGaGCtAcTcaagcgGTGaTAagCTCtCCcaaacAAaaGTatatttaGTtacaaGAtTCaCgGtTTA
+TCACACcgccCCTCCcCgGGTTtcTTCATGgGGTAttGaGTGTGACAGacccgtAGcGAAGaGGgATAtgTAtcaagCGA
+GcgCtAGTCATccgtatTtACCAcAGAtaCGAcaTAcGTAGaCaAtccCCGAccCAtctTTgGCcCGAaaataCGaTCCT
+AaCttCAtggAgCTTCcaTGGTAgGCcGatcgTCaATTGAcAAGCgcTGgCCCtCtGaCGCGCAatcCTTAcACTgaGTg
+ttcATCCAaaacAgatCaccaCtCgTTCTGaaGTgTCGGaGaGtatGcaAaagTgcatAagGgcgtCTGgGGtcGCcAAA
+acTaGGaTaTataGTatTAGTaGCTCacCgCGCtTGGgtcGTGTgCCTttGAGcGggAGtTTgaCGcgccTcATTagtga
+TGaGcCgcagCcgcaCcaTccaaggaAtcCaAaaGaGtGGTTCcgcacTTCGACcCGcaGatgGgGgaTgTgcCGacgCC
+CCAatTCccGGtAgcacTGCacaTataGGTtGCagATtgcccCAGcggcgtgATTtTTgCCGaaagTcTtcCagTTaTTg
+caTTCCGcgGcatacAgCTggccgTcgGaCGAGgaatcaggcagGGgGaggGGgAtggGtAtctatctACTTGgAggCcG
+cTaAGACctTCtcggacCatattgcaAGGAGTTaTaccTcAAccCAAGTCTCgacCCTCAGccTaGCggCattGaTCGcc
+tggGgCACtAaactGCctGggtgaCGAgAtaCgAaGcAgGTAcaCgcaGgATgtcGCTAtGgGaAACAacCacCTgcAGg
+tATAtaACGAGAatAgGagTATTatGATgCcgCCGCggTaTATaCActTaaTGcaacgtTggTgcTaaAagaATGgCTTT
+cgATgCCTgtaCagGGtaATAAGCgTcATCCaaCAttggtGCGgtgTCTTaTggccTacCAaGaTcggcgTGcTTttcGG
+cGCCacTgtGccgTggaTTACtcACCaagAtAtTAgCgGGATcATctcgCtGAccCCGcCGGaCGcTcTtTaAGCCtaAT
+CtTcTcctCacTtGtgGCtTgAtTTcTAGAAGgGGgcGTgAGcGtGcAAcgTcCTtAAaaactTGtttCGcctGagTCgC
+AacGCacTTAGacCtaacCTcACTgGccGtgGGtTcTgAgatcgcAcAAAaCCagGAaCAtgtAaagAtccgGaCTaTAT
+gGCaAagCgcaatAgCtcTcTTTGAGcgTCACACgtGACggcggTGtCCCgcgcCGtGcGTcGtcGGtcGcaagGTTcCg
+AaGCtaGgCgccagCgTctaGcaCtcTtaTtgggtAATTTGGcGGAcacGgaGCagacTTGGtgaaGTGCAcgTtAAGcG
+cgggCgaGTtATtAtTCAttgtTTTtcaGTcAgtTtATccATtgaCCAAa
diff --git a/t/data/test3.fa.fai b/t/data/test3.fa.fai
new file mode 100644
index 0000000..4eae6e0
--- /dev/null
+++ b/t/data/test3.fa.fai
@@ -0,0 +1,5 @@
+1	5869	3	80	81
+2	8417	5949	80	81
+3	8653	14475	80	81
+4	1808	23240	80	81
+5	3410	25074	80	81
diff --git a/t/data/test_analysis1122.yaml b/t/data/test_analysis1122.yaml
new file mode 100644
index 0000000..4d11193
--- /dev/null
+++ b/t/data/test_analysis1122.yaml
@@ -0,0 +1,43 @@
+name: zmp_ph1
+chunk_total: 3
+read1_length: 30
+read2_length: 54
+mismatch_threshold: 2
+bin_size: 100
+peak_buffer_width: 100
+hmm_sig_level: 0.001
+hmm_binary: bin/quince_chiphmmnew
+r_binary: R
+deseq_script: script/run_deseq.R
+output_sig_level: 0.05
+ref_fasta: t/data/test12.fa
+ensembl_species: danio_rerio
+samples:
+  -
+    name:        zmp_ph1_1m
+    description: ZMP phenotype 1.1 mutant
+    condition:   mutant
+    group:       1
+    tag:         NNNNBGAGGC
+    bam_file:    t/data/test1.bam
+  -
+    name:        zmp_ph1_1s
+    description: ZMP phenotype 1.1 sibling
+    condition:   sibling
+    group:       1
+    tag:         NNNNBAGAAG
+    bam_file:    t/data/test1.bam
+  -
+    name:        zmp_ph1_2m
+    description: ZMP phenotype 1.2 mutant
+    condition:   mutant
+    group:       2
+    tag:         NNNNBCAGAG
+    bam_file:    t/data/test2.bam
+  -
+    name:        zmp_ph1_2s
+    description: ZMP phenotype 1.2 sibling
+    condition:   sibling
+    group:       2
+    tag:         NNNNBGCACG
+    bam_file:    t/data/test2.bam
diff --git a/t/data/test_analysis12.yaml b/t/data/test_analysis12.yaml
new file mode 100644
index 0000000..d0adcba
--- /dev/null
+++ b/t/data/test_analysis12.yaml
@@ -0,0 +1,28 @@
+name: zmp_ph1
+chunk_total: 3
+read1_length: 30
+read2_length: 54
+mismatch_threshold: 2
+bin_size: 100
+peak_buffer_width: 100
+hmm_sig_level: 0.001
+hmm_binary: bin/quince_chiphmmnew
+r_binary: R
+deseq_script: script/run_deseq.R
+output_sig_level: 0.05
+ref_fasta: t/data/test12.fa
+samples:
+  -
+    name:        zmp_ph1_1m
+    description: ZMP phenotype 1.1 mutant
+    condition:   mutant
+    group:       1
+    tag:         NNNNBGAGGC
+    bam_file:    t/data/test1.bam
+  -
+    name:        zmp_ph1_1s
+    description: ZMP phenotype 1.1 sibling
+    condition:   sibling
+    group:       1
+    tag:         NNNNBCAGAG
+    bam_file:    t/data/test2.bam
diff --git a/t/data/test_analysis13.yaml b/t/data/test_analysis13.yaml
new file mode 100644
index 0000000..78028f6
--- /dev/null
+++ b/t/data/test_analysis13.yaml
@@ -0,0 +1,28 @@
+name: zmp_ph1
+chunk_total: 3
+read1_length: 30
+read2_length: 54
+mismatch_threshold: 2
+bin_size: 100
+peak_buffer_width: 100
+hmm_sig_level: 0.001
+hmm_binary: bin/quince_chiphmmnew
+r_binary: R
+deseq_script: script/run_deseq.R
+output_sig_level: 0.05
+ref_fasta: t/data/test12.fa
+samples:
+  -
+    name:        zmp_ph1_1m
+    description: ZMP phenotype 1.1 mutant
+    condition:   mutant
+    group:       1
+    tag:         NNNNBGAGGC
+    bam_file:    t/data/test1.bam
+  -
+    name:        zmp_ph1_1s
+    description: ZMP phenotype 1.1 sibling
+    condition:   sibling
+    group:       1
+    tag:         NNNNBCGCAA
+    bam_file:    t/data/test3.bam
diff --git a/t/data/test_de.yaml b/t/data/test_de.yaml
new file mode 100644
index 0000000..4a47940
--- /dev/null
+++ b/t/data/test_de.yaml
@@ -0,0 +1,75 @@
+-
+  name:           count_tags
+  default_memory: 3000
+-
+  name:           bin_reads
+  default_memory: 3000
+-
+  name:           get_read_peaks
+  default_memory: 3000
+-
+  name:           merge_read_peaks
+  default_memory: 50
+  prerequisites:
+    - get_read_peaks
+-
+  name:           summarise_read_peaks
+  default_memory: 200
+  prerequisites:
+    - merge_read_peaks
+-
+  name:           run_peak_hmm
+  default_memory: 300
+  prerequisites:
+    - bin_reads
+    - summarise_read_peaks
+-
+  name:           join_hmm_bins
+  default_memory: 50
+  prerequisites:
+    - run_peak_hmm
+-
+  name:           get_three_prime_ends
+  default_memory: 1000
+  prerequisites:
+    - join_hmm_bins
+-
+  name:           merge_three_prime_ends
+  default_memory: 50
+  prerequisites:
+    - get_three_prime_ends
+-
+  name:           filter_three_prime_ends
+  default_memory: 50
+  prerequisites:
+    - merge_three_prime_ends
+-
+  name:           choose_three_prime_end
+  default_memory: 50
+  prerequisites:
+    - filter_three_prime_ends
+-
+  name:           count_reads
+  default_memory: 300
+  prerequisites:
+    - choose_three_prime_end
+-
+  name:           merge_read_counts
+  default_memory: 50
+  prerequisites:
+    - count_reads
+-
+  name:           run_deseq
+  default_memory: 2000
+  prerequisites:
+    - merge_read_counts
+-
+  name:           add_gene_annotation
+  default_memory: 3000
+  prerequisites:
+    - run_deseq
+-
+  name:           dump_as_table
+  default_memory: 3000
+  prerequisites:
+    - add_gene_annotation
diff --git a/t/gene.t b/t/gene.t
new file mode 100644
index 0000000..57f1d56
--- /dev/null
+++ b/t/gene.t
@@ -0,0 +1,119 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 54;
+
+use DETCT::Gene;
+
+my $gene = DETCT::Gene->new(
+    {
+        genebuild_version => 'e61',
+        stable_id         => 'ENSDARG00000095747',
+        biotype           => 'protein_coding',
+        seq_name          => '5',
+        start             => 40352744,
+        end               => 40354399,
+        strand            => 1,
+    }
+);
+
+isa_ok( $gene, 'DETCT::Gene' );
+
+# Test genebuild version attribute
+is( $gene->genebuild_version,            'e61', 'Get genebuild version' );
+is( $gene->set_genebuild_version('e62'), undef, 'Set genebuild version' );
+is( $gene->genebuild_version,            'e62', 'Get new genebuild version' );
+throws_ok { $gene->set_genebuild_version() }
+qr/No genebuild version specified/ms, 'No genebuild version';
+throws_ok { $gene->set_genebuild_version('#invalid#') }
+qr/Invalid genebuild version/ms, 'Invalid genebuild version';
+
+# Test stable id attribute
+is( $gene->stable_id, 'ENSDARG00000095747', 'Get stable id' );
+is( $gene->set_stable_id('ENSDARG00000024771'), undef, 'Set stable id' );
+is( $gene->stable_id, 'ENSDARG00000024771', 'Get new stable id' );
+throws_ok { $gene->set_stable_id() } qr/No stable id specified/ms,
+  'No stable id';
+throws_ok { $gene->set_stable_id('#invalid#') } qr/Invalid stable id/ms,
+  'Invalid stable id';
+
+# Test name attribute
+is( $gene->name,              undef,   'Get name' );
+is( $gene->set_name('cxc64'), undef,   'Set name' );
+is( $gene->name,              'cxc64', 'Get new name' );
+is( $gene->set_name(),        undef,   'Set undef name' );
+is( $gene->name,              undef,   'Get undef name' );
+my $long_name = 'X' x ( $DETCT::Gene::MAX_NAME_LENGTH + 1 );
+throws_ok { $gene->set_name('') } qr/Name is empty/ms, 'Empty name';
+throws_ok { $gene->set_name($long_name) } qr/longer than \d+ characters/ms,
+  'Invalid name';
+
+# Test description attribute
+is( $gene->description,                         undef, 'Get description' );
+is( $gene->set_description('CXC chemokine 64'), undef, 'Set description' );
+is( $gene->description,       'CXC chemokine 64', 'Get new description' );
+is( $gene->set_description(), undef,              'Set undef description' );
+is( $gene->description,       undef,              'Get undef description' );
+
+# Test biotype attribute
+is( $gene->biotype, 'protein_coding', 'Get biotype' );
+is( $gene->set_biotype('nonsense_mediated_decay'), undef, 'Set biotype' );
+is( $gene->biotype, 'nonsense_mediated_decay', 'Get new biotype' );
+throws_ok { $gene->set_biotype() } qr/No biotype specified/ms, 'No biotype';
+throws_ok { $gene->set_biotype('#invalid#') } qr/Invalid biotype/ms,
+  'Invalid biotype';
+
+# Test sequence name attribute
+is( $gene->seq_name,          '5',   'Get sequence name' );
+is( $gene->set_seq_name('6'), undef, 'Set sequence name' );
+is( $gene->seq_name,          '6',   'Get new sequence name' );
+throws_ok { $gene->set_seq_name() } qr/No sequence name specified/ms,
+  'No sequence name';
+throws_ok { $gene->set_seq_name('#invalid#') } qr/Invalid sequence name/ms,
+  'Invalid sequence name';
+
+# Test start attribute
+is( $gene->start,               40352744, 'Get start' );
+is( $gene->set_start(30352744), undef,    'Set start' );
+is( $gene->start,               30352744, 'Get new start' );
+throws_ok { $gene->set_start() } qr/No start specified/ms, 'No start';
+throws_ok { $gene->set_start(-1) } qr/Invalid start/ms, 'Invalid start';
+
+# Test end attribute
+is( $gene->end,               40354399, 'Get end' );
+is( $gene->set_end(30354399), undef,    'Set end' );
+is( $gene->end,               30354399, 'Get new end' );
+throws_ok { $gene->set_end() } qr/No end specified/ms, 'No end';
+throws_ok { $gene->set_end(-2) } qr/Invalid end/ms, 'Invalid end';
+
+# Test strand attribute
+is( $gene->strand,         1,     'Get strand' );
+is( $gene->set_strand(-1), undef, 'Set strand' );
+is( $gene->strand,         -1,    'Get new strand' );
+throws_ok { $gene->set_strand() } qr/No strand specified/ms, 'No strand';
+throws_ok { $gene->set_strand(0) } qr/Invalid strand/ms, 'Invalid strand';
+
+# Mock transcript objects
+my $transcript1 = Test::MockObject->new();
+$transcript1->set_isa('DETCT::Transcript');
+my $transcript2 = Test::MockObject->new();
+$transcript2->set_isa('DETCT::Transcript');
+
+# Test adding and retrieving transcripts
+my $transcripts;
+$transcripts = $gene->get_all_transcripts();
+is( scalar @{$transcripts},              0,     'No transcripts' );
+is( $gene->add_transcript($transcript1), undef, 'Add transcript' );
+$transcripts = $gene->get_all_transcripts();
+is( scalar @{$transcripts}, 1, 'Get one transcript' );
+$gene->add_transcript($transcript2);
+is( scalar @{$transcripts}, 2, 'Get two transcripts' );
+throws_ok { $gene->add_transcript() } qr/No transcript specified/ms,
+  'No transcript specified';
+throws_ok { $gene->add_transcript('invalid') } qr/Class of transcript/ms,
+  'Invalid transcript';
+
diff --git a/t/genefinder.t b/t/genefinder.t
new file mode 100644
index 0000000..ccd7a86
--- /dev/null
+++ b/t/genefinder.t
@@ -0,0 +1,311 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 93;
+
+use DETCT::GeneFinder;
+
+# Mock genes
+my @genes;
+my @transcript_three_prime_ends =
+  ( [ 100, 1 ], [ 200, 1 ], [ 300, -1 ], [ 400, -1 ], );
+foreach my $transcript_three_prime_end (@transcript_three_prime_ends) {
+    my ( $pos, $strand ) = @{$transcript_three_prime_end};
+
+    # Construct start and end so $pos is always 3' end
+    my $start = $strand == 1 ? $pos - 50 : $pos;
+    my $end   = $strand == 1 ? $pos      : $pos + 50;
+
+    # Create genes named by 3' end position
+    my $gene = Test::MockObject->new();
+    $gene->set_always( 'stable_id',         'ENSDARG00000095747' );
+    $gene->set_always( 'external_name',     q{g} . $pos . q{:} . $strand );
+    $gene->set_always( 'description',       undef );
+    $gene->set_always( 'biotype',           'protein_coding' );
+    $gene->set_always( 'seq_region_start',  $start );
+    $gene->set_always( 'seq_region_end',    $end );
+    $gene->set_always( 'seq_region_strand', $strand );
+    my $transcript = Test::MockObject->new();
+    $transcript->set_always( 'stable_id',        'ENSDART00000133571' );
+    $transcript->set_always( 'external_name',    q{t} . $pos . q{:} . $strand );
+    $transcript->set_always( 'description',      undef );
+    $transcript->set_always( 'biotype',          'protein_coding' );
+    $transcript->set_always( 'seq_region_start', $start );
+    $transcript->set_always( 'seq_region_end',   $end );
+    $transcript->set_always( 'seq_region_strand', $strand );
+    my $transcript_far = Test::MockObject->new();
+    $transcript_far->set_always( 'stable_id',         'ENSDART00000133572' );
+    $transcript_far->set_always( 'external_name',     'cxc64-001' );
+    $transcript_far->set_always( 'description',       undef );
+    $transcript_far->set_always( 'biotype',           'protein_coding' );
+    $transcript_far->set_always( 'seq_region_start',  100_000 );
+    $transcript_far->set_always( 'seq_region_end',    100_100 );
+    $transcript_far->set_always( 'seq_region_strand', $strand );
+    $gene->set_always( 'get_all_Transcripts',
+        [ $transcript, $transcript_far ] );
+    push @genes, $gene;
+}
+
+# Mock slice
+my $slice = Test::MockObject->new();
+$slice->set_always( 'get_all_Genes', \@genes );
+
+# Mock slice adaptor
+my $slice_adaptor = Test::MockObject->new();
+$slice_adaptor->set_isa('Bio::EnsEMBL::DBSQL::SliceAdaptor');
+$slice_adaptor->set_always( 'fetch_by_region', $slice );
+
+my $gene_finder =
+  DETCT::GeneFinder->new( { slice_adaptor => $slice_adaptor, } );
+
+isa_ok( $gene_finder, 'DETCT::GeneFinder' );
+
+# Test Ensembl slice adaptor attribute
+isa_ok( $gene_finder->slice_adaptor, 'Bio::EnsEMBL::DBSQL::SliceAdaptor' );
+throws_ok { $gene_finder->set_slice_adaptor() }
+qr/No Ensembl slice adaptor specified/ms, 'No Ensembl slice adaptor';
+throws_ok { $gene_finder->set_slice_adaptor('invalid') }
+qr/Class of Ensembl slice adaptor/ms, 'Invalid Ensembl slice adaptor';
+
+my $genes;
+my $transcripts;
+my $distance;
+
+# Near to one gene on forward strand
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 110, 1 );
+is( $genes->[0]->name, 'g100:1',
+    q{Gene with 3' end at 100 bp on forward strand} );
+is( $distance,        10,  q{3' end is 10 bp downstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 90, 1 );
+is( $genes->[0]->name, 'g100:1',
+    q{Gene with 3' end at 100 bp on forward strand} );
+is( $distance,        -10, q{3' end is 10 bp upstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+
+# Near to one gene on reverse strand
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 290, -1 );
+is( $genes->[0]->name, 'g300:-1',
+    q{Gene with 3' end at 300 bp on reverse strand} );
+is( $distance,        10,  q{3' end is 10 bp downstream} );
+is( $nearest_end_pos, 300, q{3' end at 300 bp} );
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 310, -1 );
+is( $genes->[0]->name, 'g300:-1',
+    q{Gene with 3' end at 300 bp on reverse strand} );
+is( $distance,        -10, q{3' end is 10 bp upstream} );
+is( $nearest_end_pos, 300, q{3' end at 300 bp} );
+
+# Between two genes on forward strand
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 150, 1 );
+is( $genes->[0]->name, 'g100:1',
+    q{Gene with 3' end at 100 bp on forward strand} );
+is( $distance,        50,  q{3' end is 50 bp upstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+
+# Between two genes on reverse strand
+( $genes, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_genes( '1', 350, -1 );
+is( $genes->[0]->name, 'g400:-1',
+    q{Gene with 3' end at 400 bp on reverse strand} );
+is( $distance,        50,  q{3' end is 50 bp upstream} );
+is( $nearest_end_pos, 400, q{3' end at 400 bp} );
+
+# Near to one transcript on forward strand
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 110, 1 );
+is( $transcripts->[0]->name,
+    't100:1', q{Transcript with 3' end at 100 bp on forward strand} );
+is( $distance,        10,  q{3' end is 10 bp downstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 90, 1 );
+is( $transcripts->[0]->name,
+    't100:1', q{Transcript with 3' end at 100 bp on forward strand} );
+is( $distance,        -10, q{3' end is 10 bp upstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+
+# Near to one transcript on reverse strand
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 290, -1 );
+is( $transcripts->[0]->name,
+    't300:-1', q{Transcript with 3' end at 300 bp on reverse strand} );
+is( $distance,        10,  q{3' end is 10 bp downstream} );
+is( $nearest_end_pos, 300, q{3' end at 300 bp} );
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 310, -1 );
+is( $transcripts->[0]->name,
+    't300:-1', q{Transcript with 3' end at 300 bp on reverse strand} );
+is( $distance,        -10, q{3' end is 10 bp upstream} );
+is( $nearest_end_pos, 300, q{3' end at 300 bp} );
+
+# Between two transcripts on forward strand
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 150, 1 );
+is( $transcripts->[0]->name,
+    't100:1', q{Transcript with 3' end at 100 bp on forward strand} );
+is( $distance,        50,  q{3' end is 50 bp upstream} );
+is( $nearest_end_pos, 100, q{3' end at 100 bp} );
+
+# Between two transcripts on reverse strand
+( $transcripts, $distance, $nearest_end_pos ) =
+  $gene_finder->get_nearest_transcripts( '1', 350, -1 );
+is( $transcripts->[0]->name,
+    't400:-1', q{Transcript with 3' end at 400 bp on reverse strand} );
+is( $distance,        50,  q{3' end is 50 bp upstream} );
+is( $nearest_end_pos, 400, q{3' end at 400 bp} );
+
+# Check adding gene annotation required parameters
+throws_ok { $gene_finder->add_gene_annotation() } qr/No regions specified/ms,
+  'No regions';
+
+my $regions;
+
+# Adding gene annotation
+$regions = [
+    [ '1', 1, 1000, 10, -10, '1', 110, 1,  10, [], undef, undef, [], [] ],
+    [ '1', 1, 1000, 10, -10, '1', 290, -1, 10, [], undef, undef, [], [] ],
+    [ '1', 1, 1000, 10, -10, '1', 100, 1,  10, [], undef, undef, [], [] ],
+    [ '1', 1, 1000, 10, -10, '1', 300, -1, 10, [], undef, undef, [], [] ],
+];
+my $annotated_regions = $gene_finder->add_gene_annotation($regions);
+my ($gv) = keys %{ $annotated_regions->[0]->[-1] };   # Genebuild version varies
+is( scalar keys %{ $annotated_regions->[0]->[-1] },   1, '1 genebuild' );
+is( scalar @{ $annotated_regions->[0]->[-1]->{$gv} }, 1, '1 gene' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[0],
+    'ENSDARG00000095747', 'Stable id' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[1],
+    'g100:1', q{3' end as name} );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[2], undef, 'Description' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[3],
+    'protein_coding', 'Biotype' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[4], 10, 'Distance downstream' );
+is( scalar @{ $annotated_regions->[0]->[-1]->{$gv}->[0]->[5] },
+    1, '1 transcript' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[5]->[0]->[0],
+    'ENSDART00000133571', 'Transcript stable id' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[5]->[0]->[1],
+    'protein_coding', 'Transcript biotype' );
+is( scalar keys %{ $annotated_regions->[1]->[-1] },   1, '1 genebuild' );
+is( scalar @{ $annotated_regions->[1]->[-1]->{$gv} }, 1, '1 gene' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[0],
+    'ENSDARG00000095747', 'Stable id' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[1],
+    'g300:-1', q{3' end as name} );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[2], undef, 'Description' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[3],
+    'protein_coding', 'Biotype' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[4], 10, 'Distance downstream' );
+is( scalar @{ $annotated_regions->[1]->[-1]->{$gv}->[0]->[5] },
+    1, '1 transcript' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[5]->[0]->[0],
+    'ENSDART00000133571', 'Transcript stable id' );
+is( $annotated_regions->[1]->[-1]->{$gv}->[0]->[5]->[0]->[1],
+    'protein_coding', 'Transcript biotype' );
+is( scalar keys %{ $annotated_regions->[2]->[-1] },   1, '1 genebuild' );
+is( scalar @{ $annotated_regions->[2]->[-1]->{$gv} }, 1, '1 gene' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[0],
+    'ENSDARG00000095747', 'Stable id' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[1],
+    'g100:1', q{3' end as name} );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[2], undef, 'Description' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[3],
+    'protein_coding', 'Biotype' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[4], 0, 'Distance downstream' );
+is( scalar @{ $annotated_regions->[2]->[-1]->{$gv}->[0]->[5] },
+    1, '1 transcript' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[5]->[0]->[0],
+    'ENSDART00000133571', 'Transcript stable id' );
+is( $annotated_regions->[2]->[-1]->{$gv}->[0]->[5]->[0]->[1],
+    'protein_coding', 'Transcript biotype' );
+is( scalar keys %{ $annotated_regions->[3]->[-1] },   1, '1 genebuild' );
+is( scalar @{ $annotated_regions->[3]->[-1]->{$gv} }, 1, '1 gene' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[0],
+    'ENSDARG00000095747', 'Stable id' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[1],
+    'g300:-1', q{3' end as name} );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[2], undef, 'Description' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[3],
+    'protein_coding', 'Biotype' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[4], 0, 'Distance downstream' );
+is( scalar @{ $annotated_regions->[3]->[-1]->{$gv}->[0]->[5] },
+    1, '1 transcript' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[5]->[0]->[0],
+    'ENSDART00000133571', 'Transcript stable id' );
+is( $annotated_regions->[3]->[-1]->{$gv}->[0]->[5]->[0]->[1],
+    'protein_coding', 'Transcript biotype' );
+
+# Mock genes all on one strand
+@genes = ();
+@transcript_three_prime_ends =
+  ( [ 100, 1 ], [ 200, 1 ], [ 300, 1 ], [ 400, 1 ], );
+foreach my $transcript_three_prime_end (@transcript_three_prime_ends) {
+    my ( $pos, $strand ) = @{$transcript_three_prime_end};
+
+    # Create genes named by 3' end position
+    my $gene = Test::MockObject->new();
+    $gene->set_always( 'stable_id',         'ENSDARG00000095747' );
+    $gene->set_always( 'external_name',     q{g} . $pos . q{:} . $strand );
+    $gene->set_always( 'description',       undef );
+    $gene->set_always( 'biotype',           'protein_coding' );
+    $gene->set_always( 'seq_region_start',  1 );
+    $gene->set_always( 'seq_region_end',    $pos );
+    $gene->set_always( 'seq_region_strand', $strand );
+    my $transcript = Test::MockObject->new();
+    $transcript->set_always( 'stable_id',         'ENSDART00000133571' );
+    $transcript->set_always( 'external_name',     'cxc64-001' );
+    $transcript->set_always( 'description',       undef );
+    $transcript->set_always( 'biotype',           'protein_coding' );
+    $transcript->set_always( 'seq_region_start',  $pos - 50 );
+    $transcript->set_always( 'seq_region_end',    $pos );
+    $transcript->set_always( 'seq_region_strand', $strand );
+    $gene->set_always( 'get_all_Transcripts', [$transcript] );
+    push @genes, $gene;
+}
+
+# Mock slice
+$slice = Test::MockObject->new();
+$slice->set_always( 'get_all_Genes', \@genes );
+
+# Mock slice adaptor
+$slice_adaptor = Test::MockObject->new();
+$slice_adaptor->set_isa('Bio::EnsEMBL::DBSQL::SliceAdaptor');
+$slice_adaptor->set_always( 'fetch_by_region', $slice );
+
+$gene_finder = DETCT::GeneFinder->new( { slice_adaptor => $slice_adaptor, } );
+
+isa_ok( $gene_finder, 'DETCT::GeneFinder' );
+
+# Adding gene annotation with genes only on one strand
+$regions = [
+    [ '1', 1, 1000, 10, -10, '1', 110, 1,  10, [], undef, undef, [], [] ],
+    [ '1', 1, 1000, 10, -10, '1', 290, -1, 10, [], undef, undef, [], [] ],
+];
+my $annotated_regions = $gene_finder->add_gene_annotation($regions);
+my ($gv) = keys %{ $annotated_regions->[0]->[-1] };   # Genebuild version varies
+is( scalar keys %{ $annotated_regions->[0]->[-1] },   1, '1 genebuild' );
+is( scalar @{ $annotated_regions->[0]->[-1]->{$gv} }, 1, '1 gene' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[0],
+    'ENSDARG00000095747', 'Stable id' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[1],
+    'g100:1', q{3' end as name} );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[2], undef, 'Description' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[3],
+    'protein_coding', 'Biotype' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[4], 10, 'Distance downstream' );
+is( scalar @{ $annotated_regions->[0]->[-1]->{$gv}->[0]->[5] },
+    1, '1 transcript' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[5]->[0]->[0],
+    'ENSDART00000133571', 'Transcript stable id' );
+is( $annotated_regions->[0]->[-1]->{$gv}->[0]->[5]->[0]->[1],
+    'protein_coding', 'Transcript biotype' );
+is( scalar keys %{ $annotated_regions->[1]->[-1] },
+    0, 'No genes on reverse strand' );
diff --git a/t/misc-bam.t b/t/misc-bam.t
new file mode 100644
index 0000000..86f41bf
--- /dev/null
+++ b/t/misc-bam.t
@@ -0,0 +1,1652 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 323;
+
+use DETCT::Misc::BAM qw(
+  get_reference_sequence_lengths
+  get_sequence
+  count_tags
+  bin_reads
+  get_read_peaks
+  get_three_prime_ends
+  merge_three_prime_ends
+  filter_three_prime_ends
+  choose_three_prime_end
+  count_reads
+  merge_read_counts
+);
+
+=for comment
+
+Test random BAM files can be regenerated using:
+
+perl script/make_test_sam.pl --seed 10 --seq_region_count 5 \
+--seq_region_max_length 10_000 --read_pair_count 100 \
+--read_tags NNNNBGAGGC NNNNBAGAAG | samtools view -bS - | samtools sort - test1
+perl script/make_test_sam.pl --seed 10 --seq_region_count 5 \
+--seq_region_max_length 10_000 --read_pair_count 100 \
+--read_tags NNNNBCAGAG NNNNBGCACG | samtools view -bS - | samtools sort - test2
+perl script/make_test_sam.pl --seed 20 --seq_region_count 5 \
+--seq_region_max_length 10_000 --read_pair_count 100 \
+--read_tags NNNNBCGCAA NNNNBCAAGA | samtools view -bS - | samtools sort - test3
+ls *.bam | xargs -n1 samtools index
+mv test* t/data/
+
+Some numbers in tests below will then need updating. Code to generate numbers
+(using independent methods) is given before each test.
+
+Test random FASTA files can be regenerated using:
+
+perl script/make_test_fasta.pl --seed 10 --seq_region_count 5 \
+--seq_region_max_length 10_000  > test12.fa
+perl script/make_test_fasta.pl --seed 20 --seq_region_count 5 \
+--seq_region_max_length 10_000  > test3.fa
+ls *.fa | xargs -n1 samtools faidx
+mv test* t/data/
+
+=cut
+
+# Check reference sequence length returned by test BAM file
+throws_ok { get_reference_sequence_lengths() } qr/No BAM file specified/ms,
+  'No BAM file';
+my %bam_length = get_reference_sequence_lengths('t/data/test1.bam');
+is( $bam_length{1}, 8789, 'Chr 1 length' );
+is( $bam_length{2}, 7958, 'Chr 2 length' );
+is( $bam_length{3}, 4808, 'Chr 3 length' );
+
+# Check getting sequence from test FASTA file
+# First 10 bp of chromosome 1 should be CCAGGCGCGG according to:
+
+=for comment
+head -2 t/data/test12.fa
+=cut
+
+throws_ok {
+    get_sequence(
+        {
+            seq_name => '1',
+            start    => 1,
+            end      => 10,
+            strand   => 1,
+        }
+    );
+}
+qr/No FASTA index or FASTA file specified/ms, 'No FASTA index or file';
+throws_ok {
+    get_sequence(
+        {
+            ref_fasta => 't/data/test12.fa',
+            start     => 1,
+            end       => 10,
+            strand    => 1,
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    get_sequence(
+        {
+            ref_fasta => 't/data/test12.fa',
+            seq_name  => '1',
+            end       => 10,
+            strand    => 1,
+        }
+    );
+}
+qr/No sequence start specified/ms, 'No sequence start';
+throws_ok {
+    get_sequence(
+        {
+            ref_fasta => 't/data/test12.fa',
+            seq_name  => '1',
+            start     => 1,
+            strand    => 1,
+        }
+    );
+}
+qr/No sequence end specified/ms, 'No sequence end';
+throws_ok {
+    get_sequence(
+        {
+            ref_fasta => 't/data/test12.fa',
+            seq_name  => '1',
+            start     => 1,
+            end       => 10,
+        }
+    );
+}
+qr/No sequence strand specified/ms, 'No sequence strand';
+my $seq;
+$seq = get_sequence(
+    {
+        ref_fasta => 't/data/test12.fa',
+        seq_name  => '1',
+        start     => 1,
+        end       => 10,
+        strand    => 1,
+    }
+);
+is( length $seq, 10,           'Subsequence length' );
+is( $seq,        'CCAGGCGCGG', 'Subsequence' );
+$seq = get_sequence(
+    {
+        ref_fasta => 't/data/test12.fa',
+        seq_name  => '1',
+        start     => 1,
+        end       => 10,
+        strand    => -1,
+    }
+);
+is( length $seq, 10,           'Reverse complement subsequence length' );
+is( $seq,        'CCGCGCCTGG', 'Reverse complement subsequence' );
+
+# Check counting tags required parameters
+throws_ok {
+    count_tags(
+        {
+            mismatch_threshold => 2,
+            seq_name           => '1',
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No BAM file specified/ms, 'No BAM file';
+throws_ok {
+    count_tags(
+        {
+            bam_file => 't/data/test1.bam',
+            seq_name => '1',
+            tags     => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok {
+    count_tags(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    count_tags(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            seq_name           => '1',
+        }
+    );
+}
+qr/No tags specified/ms, 'No tags';
+
+my $count;
+
+# Check tag counts returned by chromosome 1 of test BAM file
+# Should be 50 random tags according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 1 | awk '{ print $1 }' \
+| sed -e 's/.*#//' | grep GAGGC$ | sort -u | wc -l
+=cut
+
+$count = count_tags(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 100,
+        seq_name           => '1',
+        tags               => ['NNNNBGAGGC'],
+    }
+);
+is( scalar keys %{$count},                 1,  '1 tag' );
+is( scalar keys %{ $count->{NNNNBGAGGC} }, 50, '50 random tags' );
+
+# Check tag counts returned in 1000 bp onwards of chromosome 1 of test BAM file
+# Should be 45 random tags according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 1:1000 | awk '{ print $1 }' \
+| sed -e 's/.*#//' | grep GAGGC$ | sort -u | wc -l
+=cut
+
+$count = count_tags(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 100,
+        seq_name           => '1',
+        start              => 1000,
+        tags               => ['NNNNBGAGGC'],
+    }
+);
+is( scalar keys %{$count},                 1,  '1 tag' );
+is( scalar keys %{ $count->{NNNNBGAGGC} }, 45, '45 random tags' );
+
+# Check tag counts returned in first 1000 bp of chromosome 1 of test BAM file
+# Should be 6 random tags according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 1:1-1000 | awk '{ print $1 }' \
+| sed -e 's/.*#//' | grep GAGGC$ | sort -u | wc -l
+=cut
+
+$count = count_tags(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 100,
+        seq_name           => '1',
+        start              => 1,
+        end                => 1000,
+        tags               => ['NNNNBGAGGC'],
+    }
+);
+is( scalar keys %{$count},                 1, '1 tag' );
+is( scalar keys %{ $count->{NNNNBGAGGC} }, 6, '6 random tags' );
+
+# Check tag counts returned with low mismatch threshold
+# Should be 13 random tags according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 1 | grep NM:i:0 \
+| awk '{ if ($6 == "54M") print $1 }' \
+| sed -e 's/.*#//' | grep GAGGC$ | sort -u | wc -l
+=cut
+
+$count = count_tags(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        seq_name           => '1',
+        tags               => ['NNNNBGAGGC'],
+    }
+);
+is( scalar keys %{$count},                 1,  '1 tag' );
+is( scalar keys %{ $count->{NNNNBGAGGC} }, 13, '13 random tags' );
+
+# Check binning reads required parameters
+throws_ok {
+    bin_reads(
+        {
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            seq_name           => '1',
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No BAM file specified/ms, 'No BAM file';
+throws_ok {
+    bin_reads(
+        {
+            bam_file => 't/data/test1.bam',
+            bin_size => 100,
+            seq_name => '1',
+            tags     => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok {
+    bin_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            seq_name           => '1',
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No bin size specified/ms, 'No bin size';
+throws_ok {
+    bin_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    bin_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            bin_size           => 100,
+            seq_name           => '1',
+        }
+    );
+}
+qr/No tags specified/ms, 'No tags';
+
+# Check read bins returned by test BAM file
+# Should be 35 bins according to:
+
+=for comment
+(samtools view -f 16 -F 1028 t/data/test1.bam 2 | grep 54M | grep NM:i:0 \
+| awk '{ print ($4) / 100 "\t" ($4 + 53 - 1) / 100 }'; \
+samtools view -f 32 -F 1028 t/data/test1.bam 2 | grep 54M | grep NM:i:0 \
+| awk '{ print ($4) / 100 "\t" ($4 + 53 - 1) / 100 }') \
+| sed -e 's/\.[0-9]*//g' \
+| awk '{ if ($1 == $2) print $1; else print $1 "\n" $2 }' \
+| sort | uniq -c | wc -l
+=cut
+
+$count = bin_reads(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        bin_size           => 100,
+        seq_name           => '2',
+        tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+    }
+);
+is( scalar keys %{$count},          1,  '1 sequence' );
+is( scalar keys %{ $count->{'2'} }, 35, '35 bins' );
+
+# Check read bins returned with non-existent tag
+$count = bin_reads(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        bin_size           => 100,
+        seq_name           => '1',
+        tags               => ['NNNNTTTTTT'],
+    }
+);
+is( scalar keys %{$count},          1, '1 sequence' );
+is( scalar keys %{ $count->{'1'} }, 0, '0 bins' );
+
+# Check getting read peaks required parameters
+throws_ok {
+    get_read_peaks(
+        {
+            mismatch_threshold => 0,
+            peak_buffer_width  => 100,
+            seq_name           => '1',
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/No BAM file specified/ms, 'No BAM file';
+throws_ok {
+    get_read_peaks(
+        {
+            bam_file          => 't/data/test1.bam',
+            peak_buffer_width => 100,
+            seq_name          => '1',
+            tags              => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok {
+    get_read_peaks(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            seq_name           => '1',
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/No peak buffer width specified/ms, 'No peak buffer width';
+throws_ok {
+    get_read_peaks(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            peak_buffer_width  => 100,
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    get_read_peaks(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            peak_buffer_width  => 100,
+            seq_name           => '1',
+        }
+    );
+}
+qr/No tags specified/ms, 'No tags';
+
+my $peaks;
+
+# Check read peaks returned by test BAM file
+# First peak should be 262 - 350 (2 reads) according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 2 | grep 54M | grep NM:i:0 \
+| awk '{ print $4 "\t" $4 + 53 }' | head -4
+=cut
+
+# Last peak should be 7399 - 7452 (1 read) according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 2 | grep 54M | grep NM:i:0 \
+| awk '{ print $4 "\t" $4 + 53 }' | tail -4
+=cut
+
+$peaks = get_read_peaks(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        peak_buffer_width  => 100,
+        seq_name           => '2',
+        tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+    }
+);
+is( scalar keys %{$peaks},    1,    '1 sequence' );
+is( $peaks->{'2'}->[0]->[0],  262,  'Start of first peak' );
+is( $peaks->{'2'}->[0]->[1],  350,  'End of first peak' );
+is( $peaks->{'2'}->[0]->[2],  2,    'First peak read count' );
+is( $peaks->{'2'}->[-1]->[0], 7399, 'Start of last peak' );
+is( $peaks->{'2'}->[-1]->[1], 7452, 'End of last peak' );
+is( $peaks->{'2'}->[-1]->[2], 1,    'Last peak read count' );
+
+# Check read peaks returned by test BAM file
+# First peak should be 78 - 131 (1 read) according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test2.bam 1 | grep 54M | grep NM:i:0 \
+| awk '{ print $4 "\t" $4 + 53 }' | head -4
+=cut
+
+# Last peak should be 8666 - 8719 (1 read) according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test2.bam 1 | grep 54M | grep NM:i:0 \
+| awk '{ print $4 "\t" $4 + 53 }' | tail -4
+=cut
+
+$peaks = get_read_peaks(
+    {
+        bam_file           => 't/data/test2.bam',
+        mismatch_threshold => 0,
+        peak_buffer_width  => 100,
+        seq_name           => '1',
+        tags               => [ 'NNNNBCAGAG', 'NNNNBGCACG' ],
+    }
+);
+is( scalar keys %{$peaks},    1,    '1 sequence' );
+is( $peaks->{'1'}->[0]->[0],  78,   'Start of first peak' );
+is( $peaks->{'1'}->[0]->[1],  131,  'End of first peak' );
+is( $peaks->{'1'}->[0]->[2],  1,    'First peak read count' );
+is( $peaks->{'1'}->[-1]->[0], 8666, 'Start of last peak' );
+is( $peaks->{'1'}->[-1]->[1], 8719, 'End of last peak' );
+is( $peaks->{'1'}->[-1]->[2], 1,    'Last peak read count' );
+
+# Check read peaks returned with non-existent tag
+$peaks = get_read_peaks(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        peak_buffer_width  => 100,
+        seq_name           => '1',
+        tags               => ['NNNNTTTTTT'],
+    }
+);
+is( scalar keys %{$peaks},     1, '1 sequence' );
+is( scalar @{ $peaks->{'1'} }, 0, '0 peaks' );
+
+# Check getting 3' ends required parameters
+throws_ok {
+    get_three_prime_ends(
+        {
+            mismatch_threshold => 0,
+            seq_name           => '1',
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+            regions            => [ [ 1, 1000, 10, -10 ] ],
+        }
+    );
+}
+qr/No BAM file specified/ms, 'No BAM file';
+throws_ok {
+    get_three_prime_ends(
+        {
+            bam_file => 't/data/test1.bam',
+            seq_name => '1',
+            tags     => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+            regions  => [ [ 1, 1000, 10, -10 ] ],
+        }
+    );
+}
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok {
+    get_three_prime_ends(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+            regions            => [ [ 1, 1000, 10, -10 ] ],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    get_three_prime_ends(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            seq_name           => '1',
+            regions            => [ [ 1, 1000, 10, -10 ] ],
+        }
+    );
+}
+qr/No tags specified/ms, 'No tags';
+throws_ok {
+    get_three_prime_ends(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            seq_name           => '1',
+            tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/No regions specified/ms, 'No regions';
+
+my $three_prime_ends;
+
+# Check 3' ends returned in first 2000 bp of chromosome 1 of test BAM file
+# Should be 9 3' ends according to:
+
+=for comment
+(samtools view -f 160 -F 1036 t/data/test1.bam 1:1-2000 \
+| grep NM:i:0 | grep 54M | awk '{ print "1:" $8 + 29 ":1" }'; \
+samtools view -f 128 -F 1068 t/data/test1.bam 1:1-2000 \
+| grep NM:i:0 | grep 54M | awk '{ print "1:" $8 ":-1" }') \
+| wc -l
+=cut
+
+# One forward strand 3' end should be 1:2642:1 with 1 read according to:
+
+=for comment
+samtools view -f 160 -F 1036 t/data/test1.bam 1:1-2000 \
+| grep NM:i:0 | grep 54M | awk '{ print "1:" $8 + 29 ":1" }' | sort | uniq -c
+=cut
+
+# One reverse strand 3' end should be 1:632:-1 with 1 read according to:
+
+=for comment
+samtools view -f 128 -F 1068 t/data/test1.bam 1:1-2000 \
+| grep NM:i:0 | grep 54M | awk '{ print "1:" $8 ":-1" }' | sort | uniq -c
+=cut
+
+$three_prime_ends = get_three_prime_ends(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        seq_name           => '1',
+        tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        regions            => [ [ 1, 2000, 10, -10 ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},               1, '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} },           1, '1 region' );
+is( scalar @{ $three_prime_ends->{'1'}->[0]->[4] }, 9, q{9 3' ends} );
+my $got_forward = 0;
+my $got_reverse = 0;
+foreach my $three_prime_end ( @{ $three_prime_ends->{'1'}->[0]->[4] } ) {
+    my ( $seq, $pos, $strand, $read_count ) = @{$three_prime_end};
+    my $string_form = join q{:}, $seq, $pos, $strand;
+    if ( $string_form eq '1:2642:1' ) {
+        $got_forward = 1;
+    }
+    if ( $string_form eq '1:632:-1' ) {
+        $got_reverse = 1;
+    }
+}
+ok( $got_forward, q{1 forward strand 3' end} );
+ok( $got_reverse, q{1 reverse strand 3' end} );
+
+# Get 3' ends returned with non-existent tag
+$three_prime_ends = get_three_prime_ends(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        seq_name           => '1',
+        tags               => ['NNNNTTTTTT'],
+        regions            => [ [ 1, 2000, 10, -10 ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},               1, '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} },           1, '1 region' );
+is( scalar @{ $three_prime_ends->{'1'}->[0]->[4] }, 0, q{0 3' ends} );
+
+# Get 3' ends for sequence name with a peak
+$three_prime_ends = get_three_prime_ends(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        seq_name           => '3',
+        tags               => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        regions            => [ [ 1, 10000, 10, -10 ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1, '1 sequence' );
+is( scalar @{ $three_prime_ends->{'3'} }, 1, '1 region' );
+my $max_read_count = 0;
+foreach my $three_prime_end ( @{ $three_prime_ends->{'3'}->[0]->[4] } ) {
+    my ( $seq, $pos, $strand, $read_count ) = @{$three_prime_end};
+    if ( $read_count > $max_read_count ) {
+        $max_read_count = $read_count;
+    }
+}
+ok( $max_read_count > 1, q{Read count for 3' end of peak} );
+
+# Check merging 3' ends required parameters
+throws_ok {
+    merge_three_prime_ends( { regions => [ [ [ 1, 1000, 10, -10, [] ] ] ], } );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    merge_three_prime_ends( { seq_name => '1', } );
+}
+qr/No regions specified/ms, 'No regions';
+
+# Test lists with different number of regions
+throws_ok {
+    merge_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [
+                [ [ 1, 1000, 10, -10, [] ], ],
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 3000, 10, -10, [] ], ],
+            ],
+        }
+    );
+}
+qr/Number of regions does not match in all lists/ms,
+  'Different number of regions';
+
+# Test lists with different regions
+throws_ok {
+    merge_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 10, -10, [] ], ],
+                [ [ 1, 1000, 10, -10, [] ], [ 3000, 4000, 10, -10, [] ], ],
+            ],
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region start';
+throws_ok {
+    merge_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 10, -10, [] ], ],
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 5000, 10, -10, [] ], ],
+            ],
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region end';
+throws_ok {
+    merge_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 10, -10, [] ], ],
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 20, -10, [] ], ],
+            ],
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region maximum read count';
+throws_ok {
+    merge_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 10, -10, [] ], ],
+                [ [ 1, 1000, 10, -10, [] ], [ 2000, 4000, 10, -20, [] ], ],
+            ],
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region log probability sum';
+
+# Test one list of regions
+$three_prime_ends = merge_three_prime_ends(
+    {
+        seq_name => '1',
+        regions  => [ [ [ 1, 1000, 10, -10, [] ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( @{ $three_prime_ends->{'1'}->[0]->[4] }, 0, q{No 3' ends} );
+
+# Test two lists of regions
+$three_prime_ends = merge_three_prime_ends(
+    {
+        seq_name => '1',
+        regions  => [
+            [
+                [
+                    1, 1000, 10, -10,
+                    [ [ '1', 2000, 1, 10 ], [ '1', 3000, 1, 10 ], ]
+                ]
+            ],
+            [ [ 1, 1000, 10, -10, [ [ '1', 3000, 1, 10 ], ] ] ],
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( @{ $three_prime_ends->{'1'}->[0]->[4] },      2,    q{2 3' ends} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[0], '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[1], 3000, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[2], 1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[3], 20,   q{3' end read count} );
+
+# Test different strands
+$three_prime_ends = merge_three_prime_ends(
+    {
+        seq_name => '1',
+        regions  => [
+            [ [ 1, 1000, 10, -10, [ [ '1', 2000, 1,  10 ], ] ] ],
+            [ [ 1, 1000, 10, -10, [ [ '1', 2000, -1, 10 ], ] ] ],
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},        1, '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} },    1, '1 region' );
+is( @{ $three_prime_ends->{'1'}->[0]->[4] }, 2, q{2 3' ends} );
+
+my $analysis;
+
+# Mock analysis object returning non-polyA
+$analysis = Test::MockObject->new();
+$analysis->set_isa('DETCT::Analysis');
+$analysis->set_always( 'get_subsequence', 'TTTTTTTTTT' );
+
+# Check filtering 3' ends required parameters
+throws_ok {
+    filter_three_prime_ends(
+        {
+            seq_name => '1',
+            regions  => [ [ [ 1, 1000, 10, -10, [] ] ] ],
+        }
+    );
+}
+qr/No analysis specified/ms, 'No analysis';
+throws_ok {
+    filter_three_prime_ends(
+        {
+            analysis => $analysis,
+            regions  => [ [ [ 1, 1000, 10, -10, [] ] ] ],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    filter_three_prime_ends(
+        {
+            analysis => $analysis,
+            seq_name => '1',
+        }
+    );
+}
+qr/No regions specified/ms, 'No regions';
+
+# Test filtering 3' ends
+$three_prime_ends = filter_three_prime_ends(
+    {
+        analysis => $analysis,
+        seq_name => '1',
+        regions  => [
+            [
+                1, 1000, 10, -10,
+                [
+                    [ '1', 1000, 1,  20 ],
+                    [ '1', 2000, -1, 10 ],
+                    [ '1', 3000, 1,  1 ],
+                    [ '1', 4000, 1,  3 ],
+                ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( @{ $three_prime_ends->{'1'}->[0]->[4] },      2,    q{2 3' ends} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[0], '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[1], 1000, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[2], 1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[4]->[0]->[3], 20,   q{3' end read count} );
+
+# Mock analysis object returning polyA
+$analysis = Test::MockObject->new();
+$analysis->set_isa('DETCT::Analysis');
+$analysis->set_always( 'get_subsequence', 'AAAATTTTTT' );
+
+# Test filtering 3' ends
+$three_prime_ends = filter_three_prime_ends(
+    {
+        analysis => $analysis,
+        seq_name => '1',
+        regions  => [
+            [
+                1, 1000, 10, -10,
+                [
+                    [ '1', 1000, 1,  20 ],
+                    [ '1', 2000, -1, 10 ],
+                    [ '1', 3000, 1,  1 ],
+                    [ '1', 4000, 1,  3 ],
+                ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( @{ $three_prime_ends->{'1'}->[0]->[4] }, 0, q{0 3' ends} );
+
+# Check choosing 3' end required parameters
+throws_ok {
+    choose_three_prime_end( { regions => [ [ [ 1, 1000, 10, -10, [] ] ] ], } );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    choose_three_prime_end( { seq_name => '1', } );
+}
+qr/No regions specified/ms, 'No regions';
+
+# Test choosing 3' end
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [
+            [
+                1, 1000, 10, -10,
+                [
+                    [ '1', 1000, 1,  20 ],
+                    [ '1', 2000, -1, 10 ],
+                    [ '1', 3000, 1,  1 ],
+                    [ '1', 4000, 1,  3 ],
+                ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   1000, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test choosing 3' end with no 3' ends
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1, 1000, 10, -10, [] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,     '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,     '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,     'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000,  'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,    'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,   'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   undef, q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   undef, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   undef, q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   undef, q{3' end read count} );
+
+# Test choosing 3' end with reduced region end
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1, 1000, 10, -10, [ [ '1', 900, 1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,   '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,   '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,   'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   900, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,  'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10, 'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1', q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   900, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,  q{3' end read count} );
+
+# Test choosing 3' end with reduced region start
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1, 1000, 10, -10, [ [ '1', 100, -1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   100,  'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   100,  q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test choosing 3' end with different sequence name
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1, 1000, 10, -10, [ [ '2', 100, -1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '2',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   100,  q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test choosing 3' end beyond region start
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1000, 2000, 10, -10, [ [ '1', 900, 1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   900,  q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1000, 2000, 10, -10, [ [ '1', 900, -1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   900,  q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test choosing 3' end beyond region end
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1000, 2000, 10, -10, [ [ '1', 2100, -1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   2100, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [ [ 1000, 2000, 10, -10, [ [ '1', 2100, 1, 20 ], ] ] ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   2100, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test choosing 3' end with same read count
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [
+            [
+                1000, 2000, 10, -10,
+                [ [ '1', 900, -1, 20 ], [ '1', 2200, -1, 20 ], ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   900,  q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [
+            [
+                1000, 2000, 10, -10,
+                [ [ '1', 900, -1, 20 ], [ '1', 2100, -1, 20 ], ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+$three_prime_ends = choose_three_prime_end(
+    {
+        seq_name => '1',
+        regions  => [
+            [
+                1000, 2000, 10, -10,
+                [ [ '2', 900, -1, 20 ], [ '2', 2100, -1, 20 ], ]
+            ]
+        ],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1000, 'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '2',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[6],   -1,   q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   20,   q{3' end read count} );
+
+# Test checking for polyA
+is( DETCT::Misc::BAM::is_polya('TTTTTTTTTT'), 0, 'PolyT' );
+is( DETCT::Misc::BAM::is_polya('AAAATTTTTT'), 1, '>3 As at start' );
+is( DETCT::Misc::BAM::is_polya('TTTTTTAAAA'), 0, '>3 As at end' );
+is( DETCT::Misc::BAM::is_polya('TAAAATAAAT'), 1, '>6 As' );
+is( DETCT::Misc::BAM::is_polya('AAATAAATTT'), 1, 'AAA.AAA... regexp' );
+is( DETCT::Misc::BAM::is_polya('AAATAATATT'), 1, 'AAA.AA.A.. regexp' );
+is( DETCT::Misc::BAM::is_polya('AAATATAATT'), 1, 'AAA.A.AA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('AATAAAATTT'), 1, 'AA.AAAA... regexp' );
+is( DETCT::Misc::BAM::is_polya('AATAAATATT'), 1, 'AA.AAA.A.. regexp' );
+is( DETCT::Misc::BAM::is_polya('AATATAAATT'), 1, 'AA.A.AAA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('ATAAAAATTT'), 1, 'A.AAAAA... regexp' );
+is( DETCT::Misc::BAM::is_polya('ATAAAATATT'), 1, 'A.AAAA.A.. regexp' );
+is( DETCT::Misc::BAM::is_polya('ATAAATAATT'), 1, 'A.AAA.AA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('ATAATAAATT'), 1, 'A.AA.AAA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('ATATAAAATT'), 1, 'A.A.AAAA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('AATAATAATT'), 1, 'AA.AA.AA.. regexp' );
+is( DETCT::Misc::BAM::is_polya('TATAATAATA'), 0, '6 As' );
+
+# Check counting reads required parameters
+throws_ok {
+    count_reads(
+        {
+            mismatch_threshold => 2,
+            seq_name           => '1',
+            regions => [ [ 1000, 2000, 10, -10, '1', 2000, 1, 10 ], ],
+            tags    => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No BAM file specified/ms, 'No BAM file';
+throws_ok {
+    count_reads(
+        {
+            bam_file => 't/data/test1.bam',
+            seq_name => '1',
+            regions  => [ [ 1000, 2000, 10, -10, '1', 2000, 1, 10 ], ],
+            tags     => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No mismatch threshold specified/ms, 'No mismatch threshold';
+throws_ok {
+    count_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            regions => [ [ 1000, 2000, 10, -10, '1', 2000, 1, 10 ], ],
+            tags    => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    count_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 2,
+            seq_name           => '1',
+            tags               => ['NNNNBGAGGC'],
+        }
+    );
+}
+qr/No regions specified/ms, 'No regions';
+throws_ok {
+    count_reads(
+        {
+            bam_file           => 't/data/test1.bam',
+            mismatch_threshold => 0,
+            seq_name           => '1',
+            regions            => [ [ 1, 2000, 10, -10, '1', 2000, 1, 10 ], ],
+        }
+    );
+}
+qr/No tags specified/ms, 'No tags';
+
+# Check read counts returned by test BAM file
+# Should be 11 reads according to:
+
+=for comment
+samtools view -f 128 -F 1028 t/data/test1.bam 1:1-2000 \
+| grep 54M | grep NM:i:0 | awk '{ print $1 }' \
+| sed -e 's/.*#//' | grep GAGGC$ | wc -l
+=cut
+
+$three_prime_ends = count_reads(
+    {
+        bam_file           => 't/data/test1.bam',
+        mismatch_threshold => 0,
+        seq_name           => '1',
+        regions            => [ [ 1, 2000, 10, -10, '1', 2000, 1, 10 ], ],
+        tags               => ['NNNNBGAGGC'],
+    }
+);
+is( scalar keys %{$three_prime_ends},     1,    '1 sequence' );
+is( scalar @{ $three_prime_ends->{'1'} }, 1,    '1 region' );
+is( $three_prime_ends->{'1'}->[0]->[0],   1,    'Region start' );
+is( $three_prime_ends->{'1'}->[0]->[1],   2000, 'Region end' );
+is( $three_prime_ends->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $three_prime_ends->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $three_prime_ends->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $three_prime_ends->{'1'}->[0]->[5],   2000, q{3' end position} );
+is( $three_prime_ends->{'1'}->[0]->[6],   1,    q{3' end strand} );
+is( $three_prime_ends->{'1'}->[0]->[7],   10,   q{3' end read count} );
+is( scalar keys %{ $three_prime_ends->{'1'}->[0]->[8] }, 1, '1 tag' );
+is( $three_prime_ends->{'1'}->[0]->[8]->{NNNNBGAGGC},    4, '4 reads' );
+
+# Mock sample objects
+my $sample1 = Test::MockObject->new();
+$sample1->set_isa('DETCT::Sample');
+$sample1->set_always( 'bam_file', '1.bam' );
+$sample1->set_always( 'tag',      'AA' );
+my $sample2 = Test::MockObject->new();
+$sample2->set_isa('DETCT::Sample');
+$sample2->set_always( 'bam_file', '2.bam' );
+$sample2->set_always( 'tag',      'TT' );
+my $samples = [ $sample1, $sample2 ];
+
+# Check merging read counts required parameters
+throws_ok {
+    merge_read_counts(
+        {
+            regions => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            samples  => $samples,
+        }
+    );
+}
+qr/No regions specified/ms, 'No regions';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+            },
+        }
+    );
+}
+qr/No samples specified/ms, 'No samples';
+
+# Test lists with different number of regions
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' => [
+                    [ 1,    1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ],
+                    [ 3000, 4000, 10, -10, '1', 5000, 1, 10, { AA => 10 } ],
+                ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Number of regions does not match in all lists/ms,
+  'Different number of regions';
+
+# Test lists with different regions
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 2, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region start';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1001, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region end';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 11, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region maximum read count';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -11, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  'Different region log probability sum';
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '2', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Different 3' end sequence};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2001, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Different 3' end position};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, -1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Different 3' end strand};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 11, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Different 3' end read count};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, undef, 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{3' end sequence undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, undef, 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Other 3' end sequence undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', undef, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{3' end position undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', undef, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Other 3' end position undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, undef, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{3' end strand undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, undef, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Other 3' end strand undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, undef, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{3' end read count undefined};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, undef, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Regions not in the same order or not the same in each list/ms,
+  q{Other 3' end read count undefined};
+
+# Test unknown BAM file and/or tag
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '3.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Unknown BAM file/ms, q{BAM file not in samples};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { CC => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Unknown BAM file/ms, q{Tag not in samples};
+throws_ok {
+    merge_read_counts(
+        {
+            seq_name => '1',
+            regions  => {
+                '1.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 10 } ] ],
+                '2.bam' =>
+                  [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+            },
+            samples => $samples,
+        }
+    );
+}
+qr/Unknown BAM file/ms, q{Combination of BAM file and tag not in samples};
+
+my $read_counts;
+
+$read_counts = merge_read_counts(
+    {
+        seq_name => '1',
+        regions  => {
+            '1.bam' => [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { AA => 10 } ] ],
+            '2.bam' => [ [ 1, 1000, 10, -10, '1', 2000, 1, 10, { TT => 20 } ] ],
+        },
+        samples => $samples,
+    }
+);
+is( scalar keys %{$read_counts},     1,    '1 sequence' );
+is( scalar @{ $read_counts->{'1'} }, 1,    '1 region' );
+is( $read_counts->{'1'}->[0]->[0],   1,    'Region start' );
+is( $read_counts->{'1'}->[0]->[1],   1000, 'Region end' );
+is( $read_counts->{'1'}->[0]->[2],   10,   'Region maximum read count' );
+is( $read_counts->{'1'}->[0]->[3],   -10,  'Region log probability sum' );
+is( $read_counts->{'1'}->[0]->[4],   '1',  q{3' end sequence} );
+is( $read_counts->{'1'}->[0]->[5],   2000, q{3' end position} );
+is( $read_counts->{'1'}->[0]->[6],   1,    q{3' end strand} );
+is( $read_counts->{'1'}->[0]->[7],   10,   q{3' end read count} );
+is( scalar @{ $read_counts->{'1'}->[0]->[8] }, 2,  '2 samples' );
+is( $read_counts->{'1'}->[0]->[8]->[0],        10, '10 reads' );
+is( $read_counts->{'1'}->[0]->[8]->[1],        20, '20 reads' );
+
+$read_counts = merge_read_counts(
+    {
+        seq_name => '1',
+        regions  => {
+            '1.bam' => [
+                [ 1, 1000, 10, -10, undef, undef, undef, undef, { AA => 10 } ]
+            ],
+            '2.bam' => [
+                [ 1, 1000, 10, -10, undef, undef, undef, undef, { TT => 20 } ]
+            ],
+        },
+        samples => $samples,
+    }
+);
+is( scalar keys %{$read_counts},     1,     '1 sequence' );
+is( scalar @{ $read_counts->{'1'} }, 1,     '1 region' );
+is( $read_counts->{'1'}->[0]->[0],   1,     'Region start' );
+is( $read_counts->{'1'}->[0]->[1],   1000,  'Region end' );
+is( $read_counts->{'1'}->[0]->[2],   10,    'Region maximum read count' );
+is( $read_counts->{'1'}->[0]->[3],   -10,   'Region log probability sum' );
+is( $read_counts->{'1'}->[0]->[4],   undef, q{3' end sequence} );
+is( $read_counts->{'1'}->[0]->[5],   undef, q{3' end position} );
+is( $read_counts->{'1'}->[0]->[6],   undef, q{3' end strand} );
+is( $read_counts->{'1'}->[0]->[7],   undef, q{3' end read count} );
+is( scalar @{ $read_counts->{'1'}->[0]->[8] }, 2,  '2 samples' );
+is( $read_counts->{'1'}->[0]->[8]->[0],        10, '10 reads' );
+is( $read_counts->{'1'}->[0]->[8]->[1],        20, '20 reads' );
diff --git a/t/misc-output.t b/t/misc-output.t
new file mode 100644
index 0000000..555bfd8
--- /dev/null
+++ b/t/misc-output.t
@@ -0,0 +1,98 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 13;
+
+use DETCT::Misc::Output qw(
+  dump_as_table
+);
+
+use File::Temp qw( tempdir );
+use File::Spec;
+
+my $tmp_dir = tempdir( CLEANUP => 1 );
+
+# Mock sample objects
+my $sample1 = Test::MockObject->new();
+$sample1->set_isa('DETCT::Sample');
+$sample1->set_always( 'name',      'wt1' );
+$sample1->set_always( 'condition', 'sibling' );
+$sample1->set_always( 'group',     '1' );
+my $sample2 = Test::MockObject->new();
+$sample2->set_isa('DETCT::Sample');
+$sample2->set_always( 'name',      'wt2' );
+$sample2->set_always( 'condition', 'sibling' );
+$sample2->set_always( 'group',     '2' );
+my $sample3 = Test::MockObject->new();
+$sample3->set_isa('DETCT::Sample');
+$sample3->set_always( 'name',      'mut1' );
+$sample3->set_always( 'condition', 'mutant' );
+$sample3->set_always( 'group',     '1' );
+my $sample4 = Test::MockObject->new();
+$sample4->set_isa('DETCT::Sample');
+$sample4->set_always( 'name',      'mut2' );
+$sample4->set_always( 'condition', 'mutant' );
+$sample4->set_always( 'group',     '2' );
+my $samples = [ $sample1, $sample2, $sample3, $sample4 ];
+
+# Mock analysis object
+my $analysis = Test::MockObject->new();
+$analysis->set_isa('DETCT::Analysis');
+$analysis->set_always( 'get_all_samples', $samples );
+$analysis->set_always( 'ensembl_species', 'danio_rerio' );
+
+my $regions = [
+    [
+        '1', 1, 110, 10, -10, '1', 110,
+        1,   10,
+        [ 4,   1,   2,   7 ],
+        [ 4.6, 1.1, 2.1, 4.6 ],
+        undef, undef,
+        [ 1.18, 0.233 ],
+        [ [ 0.46, -1.13 ], [ 4.18, 2.06 ] ],
+        {
+            e61 => [
+                [
+                    'ENSDARG00000095747',
+                    'cxc64',
+                    'CXC chemokine 64',
+                    'protein_coding',
+                    5,
+                    [ [ 'ENSDART00000133571', 'protein_coding' ] ]
+                ]
+            ]
+        }
+    ],
+    [
+        '1',   1,
+        1000,  10,
+        -10,   undef,
+        undef, undef,
+        undef, [ 4, 1, 2, 7 ],
+        [ 4.6, 1.1, 2.1, 4.6 ], undef,
+        undef, [ 1.18, 0.233 ],
+        [ [ 0.46, -1.13 ], [ 4.18, 2.06 ] ], {}
+    ],
+];
+
+is(
+    dump_as_table(
+        { analysis => $analysis, dir => $tmp_dir, regions => $regions, }
+    ),
+    undef, 'Dump'
+);
+
+foreach my $format ( 'csv', 'tsv', 'html' ) {
+    foreach my $level ( 'all', 'sig' ) {
+        my $file = $level . q{.} . $format;
+        my $filepath = File::Spec->catfile( $tmp_dir, $file );
+        ok( -e $filepath,  $file . ' exists' );
+        ok( !-z $filepath, $file . ' is not empty' );
+    }
+}
+
+# TODO: Actually test output
diff --git a/t/misc-peakhmm.t b/t/misc-peakhmm.t
new file mode 100644
index 0000000..3764343
--- /dev/null
+++ b/t/misc-peakhmm.t
@@ -0,0 +1,572 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 103;
+
+use DETCT::Misc::PeakHMM qw(
+  merge_read_peaks
+  summarise_read_peaks
+  run_peak_hmm
+  join_hmm_bins
+);
+
+use File::Temp qw( tempdir );
+use File::Path qw( make_path );
+use POSIX qw( WIFEXITED);
+
+# Compile quince_chiphmmnew if necessary
+if ( !-r 'bin/quince_chiphmmnew' ) {
+    make_path('bin');
+    my $cmd = 'g++ -o bin/quince_chiphmmnew src/quince_chiphmmnew.cpp';
+    WIFEXITED( system $cmd) or confess "Couldn't run $cmd";
+}
+
+my $input_peaks;
+my $output_peaks;
+
+# Check merging read peaks required parameters
+throws_ok {
+    merge_read_peaks(
+        {
+            seq_name => 1,
+            peaks    => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No peak buffer width specified/ms, 'No peak buffer width';
+throws_ok {
+    merge_read_peaks(
+        {
+            peak_buffer_width => 100,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    merge_read_peaks(
+        {
+            peak_buffer_width => 100,
+            seq_name          => 1,
+        }
+    );
+}
+qr/No peaks specified/ms, 'No peaks';
+
+# Two peaks but no merging
+$input_peaks = [ [ 100, 200, 1 ], [ 500, 600, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 2,   '2 peaks' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   200, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   1,   'First peak read count' );
+is( $output_peaks->{'1'}->[-1]->[0],  500, 'Start of last peak' );
+is( $output_peaks->{'1'}->[-1]->[1],  600, 'End of last peak' );
+is( $output_peaks->{'1'}->[-1]->[2],  1,   'Last peak read count' );
+
+# Two peaks merged into one
+$input_peaks = [ [ 100, 200, 1 ], [ 250, 350, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 1,   '1 peak' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   350, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   2,   'First peak read count' );
+
+# Three peaks with first two merged
+$input_peaks = [ [ 100, 200, 1 ], [ 250, 350, 1 ], [ 500, 600, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 2,   '2 peaks' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   350, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   2,   'First peak read count' );
+is( $output_peaks->{'1'}->[-1]->[0],  500, 'Start of last peak' );
+is( $output_peaks->{'1'}->[-1]->[1],  600, 'End of last peak' );
+is( $output_peaks->{'1'}->[-1]->[2],  1,   'Last peak read count' );
+
+# Three peaks with second two merged
+$input_peaks = [ [ 100, 200, 1 ], [ 500, 600, 1 ], [ 550, 650, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 2,   '2 peaks' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   200, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   1,   'First peak read count' );
+is( $output_peaks->{'1'}->[-1]->[0],  500, 'Start of last peak' );
+is( $output_peaks->{'1'}->[-1]->[1],  650, 'End of last peak' );
+is( $output_peaks->{'1'}->[-1]->[2],  2,   'Last peak read count' );
+
+# Two peaks separated by buffer width
+$input_peaks = [ [ 100, 200, 1 ], [ 300, 400, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 2,   '2 peaks' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   200, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   1,   'First peak read count' );
+is( $output_peaks->{'1'}->[-1]->[0],  300, 'Start of last peak' );
+is( $output_peaks->{'1'}->[-1]->[1],  400, 'End of last peak' );
+is( $output_peaks->{'1'}->[-1]->[2],  1,   'Last peak read count' );
+
+# Two peaks separated by just under buffer width
+$input_peaks = [ [ 100, 200, 1 ], [ 299, 400, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 1,   '1 peak' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   400, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   2,   'First peak read count' );
+
+# Two peaks with same start
+$input_peaks = [ [ 100, 200, 1 ], [ 100, 300, 1 ], ];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1,   '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 1,   '1 peak' );
+is( $output_peaks->{'1'}->[0]->[0],   100, 'Start of first peak' );
+is( $output_peaks->{'1'}->[0]->[1],   300, 'End of first peak' );
+is( $output_peaks->{'1'}->[0]->[2],   2,   'First peak read count' );
+
+# No peaks
+$input_peaks  = [];
+$output_peaks = merge_read_peaks(
+    {
+        peak_buffer_width => 100,
+        seq_name          => 1,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$output_peaks},     1, '1 sequence' );
+is( scalar @{ $output_peaks->{'1'} }, 0, '0 peaks' );
+
+my $summary;
+
+# Check summarising read peaks required parameters
+throws_ok {
+    summarise_read_peaks(
+        {
+            peak_buffer_width => 100,
+            hmm_sig_level     => 0.001,
+            seq_name          => '1',
+            seq_bp            => 1000,
+            read_length       => 54,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No bin size specified/ms, 'No bin size';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size      => 100,
+            hmm_sig_level => 0.001,
+            seq_name      => '1',
+            seq_bp        => 1000,
+            read_length   => 54,
+            peaks         => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No peak buffer width specified/ms, 'No peak buffer width';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size          => 100,
+            peak_buffer_width => 100,
+            seq_name          => '1',
+            seq_bp            => 1000,
+            read_length       => 54,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No HMM significance level specified/ms, 'No HMM significance level';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size          => 100,
+            peak_buffer_width => 100,
+            hmm_sig_level     => 0.001,
+            seq_bp            => 1000,
+            read_length       => 54,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size          => 100,
+            peak_buffer_width => 100,
+            hmm_sig_level     => 0.001,
+            seq_name          => '1',
+            read_length       => 54,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No sequence bp specified/ms, 'No sequence bp';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size          => 100,
+            peak_buffer_width => 100,
+            hmm_sig_level     => 0.001,
+            seq_name          => '1',
+            seq_bp            => 1000,
+            peaks             => [ [ 1, 2, 1 ] ],
+        }
+    );
+}
+qr/No read length specified/ms, 'No read length';
+throws_ok {
+    summarise_read_peaks(
+        {
+            bin_size          => 100,
+            peak_buffer_width => 100,
+            hmm_sig_level     => 0.001,
+            seq_name          => '1',
+            seq_bp            => 1000,
+            read_length       => 54,
+        }
+    );
+}
+qr/No peaks specified/ms, 'No peaks';
+
+# Two peaks, one significant
+$input_peaks = [ [ 100, 199, 5 ], [ 300, 399, 1 ], ];
+$summary = summarise_read_peaks(
+    {
+        bin_size          => 100,
+        peak_buffer_width => 100,
+        hmm_sig_level     => 0.001,
+        seq_name          => '1',
+        seq_bp            => 1000,
+        read_length       => 54,
+        peaks             => $input_peaks,
+    }
+);
+is( scalar keys %{$summary},          1, '1 sequence' );
+is( scalar keys %{ $summary->{'1'} }, 9, '9 keys' );
+is(
+    $summary->{'1'}->{total_read_count_per_mb},
+    6 / 1_000_000,
+    'Total read count per Mb'
+);
+is(
+    $summary->{'1'}->{total_sig_read_count_per_mb},
+    5 / 1_000_000,
+    'Total significant read count per Mb'
+);
+is(
+    $summary->{'1'}->{total_sig_peak_width_in_mb},
+    100 / 1_000_000,
+    'Total significant peak width in Mb'
+);
+is( $summary->{'1'}->{median_sig_peak_width},
+    100, 'Median significant peak width' );
+is( $summary->{'1'}->{total_sig_peaks},   1,   'Total significant peaks' );
+is( $summary->{'1'}->{peak_buffer_width}, 100, 'Peak buffer width' );
+ok( $summary->{'1'}->{read_threshold} < 5, 'Read threshold' );
+is( $summary->{'1'}->{bin_size}, 100, 'Bin size' );
+is( $summary->{'1'}->{num_bins}, 10,  'Number of bins' );
+
+# No significant peaks
+$input_peaks = [ [ 300, 399, 1 ], ];
+$summary = summarise_read_peaks(
+    {
+        bin_size          => 100,
+        peak_buffer_width => 100,
+        hmm_sig_level     => 0.001,
+        seq_name          => '1',
+        seq_bp            => 1000,
+        read_length       => 54,
+        peaks             => $input_peaks,
+    }
+);
+is( $summary->{'1'}->{median_sig_peak_width},
+    0, 'Median significant peak width' );
+
+# Three significant peaks
+$input_peaks = [ [ 100, 149, 500 ], [ 300, 399, 500 ], [ 600, 759, 500 ], ];
+$summary = summarise_read_peaks(
+    {
+        bin_size          => 100,
+        peak_buffer_width => 100,
+        hmm_sig_level     => 0.001,
+        seq_name          => '1',
+        seq_bp            => 1000,
+        read_length       => 54,
+        peaks             => $input_peaks,
+    }
+);
+is( $summary->{'1'}->{median_sig_peak_width},
+    100, 'Median significant peak width' );
+
+# No peaks
+$summary = summarise_read_peaks(
+    {
+        bin_size          => 100,
+        peak_buffer_width => 100,
+        hmm_sig_level     => 0.001,
+        seq_name          => '1',
+        seq_bp            => 1000,
+        read_length       => 54,
+        peaks             => [],
+    }
+);
+is( scalar keys %{ $summary->{'1'} }, 0, 'No summary' );
+
+my $tmp_dir = tempdir( CLEANUP => 1 );
+
+# Check running peak HMM required parameters
+my $read_bins = {
+    1 => 500,
+    3 => 1,
+};
+$summary = {
+    total_read_count_per_mb     => 501 / 1_000_000,
+    total_sig_read_count_per_mb => 500 / 1_000_000,
+    total_sig_peak_width_in_mb  => 100 / 1_000_000,
+    median_sig_peak_width       => 100,
+    total_sig_peaks             => 1,
+    peak_buffer_width           => 100,
+    read_threshold              => 3,
+    bin_size                    => 100,
+    num_bins                    => 10,
+};
+throws_ok {
+    run_peak_hmm(
+        {
+            hmm_sig_level => 0.001,
+            seq_name      => '1',
+            read_bins     => $read_bins,
+            summary       => $summary,
+            hmm_binary    => 'bin/quince_chiphmmnew',
+        }
+    );
+}
+qr/No directory specified/ms, 'No directory';
+throws_ok {
+    run_peak_hmm(
+        {
+            dir        => $tmp_dir,
+            seq_name   => '1',
+            read_bins  => $read_bins,
+            summary    => $summary,
+            hmm_binary => 'bin/quince_chiphmmnew',
+        }
+    );
+}
+qr/No HMM significance level specified/ms, 'No HMM significance level';
+throws_ok {
+    run_peak_hmm(
+        {
+            dir           => $tmp_dir,
+            hmm_sig_level => 0.001,
+            read_bins     => $read_bins,
+            summary       => $summary,
+            hmm_binary    => 'bin/quince_chiphmmnew',
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    run_peak_hmm(
+        {
+            dir           => $tmp_dir,
+            hmm_sig_level => 0.001,
+            seq_name      => '1',
+            summary       => $summary,
+            hmm_binary    => 'bin/quince_chiphmmnew',
+        }
+    );
+}
+qr/No read bins specified/ms, 'No read bins';
+throws_ok {
+    run_peak_hmm(
+        {
+            dir           => $tmp_dir,
+            hmm_sig_level => 0.001,
+            seq_name      => '1',
+            read_bins     => $read_bins,
+            hmm_binary    => 'bin/quince_chiphmmnew',
+        }
+    );
+}
+qr/No summary specified/ms, 'No summary';
+throws_ok {
+    run_peak_hmm(
+        {
+            dir           => $tmp_dir,
+            hmm_sig_level => 0.001,
+            seq_name      => '1',
+            read_bins     => $read_bins,
+            summary       => $summary,
+        }
+    );
+}
+qr/No HMM binary specified/ms, 'No HMM binary';
+
+my $hmm;
+
+# Run peak HMM
+$hmm = run_peak_hmm(
+    {
+        dir           => $tmp_dir,
+        hmm_sig_level => 0.001,
+        seq_name      => '1',
+        read_bins     => $read_bins,
+        summary       => $summary,
+        hmm_binary    => 'bin/quince_chiphmmnew',
+    }
+);
+is( scalar keys %{$hmm},     1,   '1 sequence' );
+is( scalar @{ $hmm->{'1'} }, 1,   '1 peak' );
+is( $hmm->{'1'}->[0]->[0],   1,   'Bin 1' );
+is( $hmm->{'1'}->[0]->[1],   500, '500 reads' );
+ok( $hmm->{'1'}->[0]->[2] < 0, 'Log probability negative' );
+
+# Run peak HMM with no summary
+$hmm = run_peak_hmm(
+    {
+        dir           => $tmp_dir,
+        hmm_sig_level => 0.001,
+        seq_name      => '1',
+        read_bins     => $read_bins,
+        summary       => {},
+        hmm_binary    => 'bin/quince_chiphmmnew',
+    }
+);
+is( scalar keys %{$hmm},     1, '1 sequence' );
+is( scalar @{ $hmm->{'1'} }, 0, '0 peaks' );
+
+# Run peak HMM with non-existent working directory
+$hmm = run_peak_hmm(
+    {
+        dir           => $tmp_dir . '/test',
+        hmm_sig_level => 0.001,
+        seq_name      => '1',
+        read_bins     => $read_bins,
+        summary       => $summary,
+        hmm_binary    => 'bin/quince_chiphmmnew',
+    }
+);
+is( scalar keys %{$hmm},     1, '1 sequence' );
+is( scalar @{ $hmm->{'1'} }, 1, '1 peak' );
+
+# Check running peak HMM required parameters
+my $hmm_bins = [
+    [ 1, 10, -2.3 ],
+    [ 2, 20, -2.3 ],
+    [ 4, 10, -2.3 ],
+    [ 5, 30, -2.3 ],
+    [ 6, 20, -2.3 ],
+];
+throws_ok {
+    join_hmm_bins(
+        {
+            seq_name => '1',
+            hmm_bins => $hmm_bins,
+        }
+    );
+}
+qr/No bin size specified/ms, 'No bin size';
+throws_ok {
+    join_hmm_bins(
+        {
+            bin_size => 100,
+            hmm_bins => $hmm_bins,
+        }
+    );
+}
+qr/No sequence name specified/ms, 'No sequence name';
+throws_ok {
+    join_hmm_bins(
+        {
+            bin_size => 100,
+            seq_name => '1',
+        }
+    );
+}
+qr/No HMM bins specified/ms, 'No HMM bins';
+
+my $regions;
+
+# Five peaks joined to two regions
+$regions = join_hmm_bins(
+    {
+        bin_size => 100,
+        seq_name => '1',
+        hmm_bins => $hmm_bins,
+    }
+);
+is( scalar keys %{$regions},     1,    '1 sequence' );
+is( scalar @{ $regions->{'1'} }, 2,    '2 peaks' );
+is( $regions->{'1'}->[0]->[0],   101,  'Region 1 start' );
+is( $regions->{'1'}->[0]->[1],   300,  'Region 1 end' );
+is( $regions->{'1'}->[0]->[2],   20,   'Region 1 max read count' );
+is( $regions->{'1'}->[0]->[3],   -4.6, 'Region 1 log probability sum' );
+is( $regions->{'1'}->[1]->[0],   401,  'Region 2 start' );
+is( $regions->{'1'}->[1]->[1],   700,  'Region 2 end' );
+is( $regions->{'1'}->[1]->[2],   30,   'Region 2 max read count' );
+is( $regions->{'1'}->[1]->[3],   -6.9, 'Region 2 log probability sum' );
+
+# No peaks
+$regions = join_hmm_bins(
+    {
+        bin_size => 100,
+        seq_name => '1',
+        hmm_bins => [],
+    }
+);
+is( scalar keys %{$regions},     1, '1 sequence' );
+is( scalar @{ $regions->{'1'} }, 0, '0 peaks' );
diff --git a/t/misc-tag.t b/t/misc-tag.t
new file mode 100644
index 0000000..2cf6a8a
--- /dev/null
+++ b/t/misc-tag.t
@@ -0,0 +1,260 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 104;
+
+use DETCT::Misc::Tag qw(
+  detag_trim_fastq
+  convert_tag_to_regexp
+);
+
+use File::Temp qw( tempdir );
+use File::Slurp;
+
+=for comment
+
+Test random FASTQ files can be regenerated using:
+
+perl script/make_test_fastq.pl --seed 1 --output_prefix test1 \
+--read_tags NNNNBGAGGC NNNNBAGAAG
+perl script/make_test_fastq.pl --seed 2 --output_prefix test2 \
+--read_tags NNNNBGAGGC NNNNBAGAAG --read_length 54
+mv test* t/data/
+
+Some numbers in tests below will then need updating.
+
+test1	NNNNBGAGGC:	25
+test1	NNNNBAGAAG:	24
+test1	XXXXXXXXXX:	51
+
+test2	NNNNBGAGGC:	24
+test2	NNNNBAGAAG:	35
+test2	XXXXXXXXXX:	41
+
+=cut
+
+my $tmp_dir = tempdir( CLEANUP => 1 );
+
+# Check detagging and trimming FASTQ files
+is(
+    detag_trim_fastq(
+        {
+            fastq_read1_input     => 't/data/test1_1.fastq',
+            fastq_read2_input     => 't/data/test1_2.fastq',
+            fastq_output_prefix   => $tmp_dir . '/test1',
+            pre_detag_trim_length => 54,
+            polyt_trim_length     => 14,
+            polyt_min_length      => 10,
+            read_tags             => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    ),
+    undef,
+    'Detag and trim FASTQ'
+);
+
+my @fastq;
+@fastq = read_file( $tmp_dir . '/test1_NNNNBGAGGC_1.fastq' );
+is( scalar @fastq / 4, 25, '25 read 1s' );
+@fastq = read_file( $tmp_dir . '/test1_NNNNBGAGGC_2.fastq' );
+is( scalar @fastq / 4, 25, '25 read 2s' );
+@fastq = read_file( $tmp_dir . '/test1_NNNNBAGAAG_1.fastq' );
+is( scalar @fastq / 4, 24, '24 read 1s' );
+@fastq = read_file( $tmp_dir . '/test1_NNNNBAGAAG_2.fastq' );
+is( scalar @fastq / 4, 24, '24 read 2s' );
+@fastq = read_file( $tmp_dir . '/test1_XXXXXXXXXX_1.fastq' );
+is( scalar @fastq / 4, 51, '51 read 1s' );
+@fastq = read_file( $tmp_dir . '/test1_XXXXXXXXXX_2.fastq' );
+is( scalar @fastq / 4, 51, '51 read 2s' );
+
+@fastq = read_file( $tmp_dir . '/test1_NNNNBGAGGC_1.fastq' );
+my $read_name = $fastq[0];
+chomp $read_name;
+is( substr( $read_name, -7 ), 'GAGGC/1', 'Tag added to read name' );
+my $read_seq = $fastq[1];
+chomp $read_seq;
+is( length $read_seq, 30, 'Sequence trimmed to 30 bp' );
+my $read_qual = $fastq[3];
+chomp $read_qual;
+is( length $read_qual, 30, 'Quality trimmed to 30 bp' );
+
+@fastq = read_file( $tmp_dir . '/test1_NNNNBGAGGC_2.fastq' );
+my $read_name = $fastq[0];
+chomp $read_name;
+is( substr( $read_name, -7 ), 'GAGGC/2', 'Tag added to read name' );
+my $read_seq = $fastq[1];
+chomp $read_seq;
+is( length $read_seq, 54, 'Sequence trimmed to 54 bp' );
+my $read_qual = $fastq[3];
+chomp $read_qual;
+is( length $read_qual, 54, 'Quality trimmed to 54 bp' );
+
+@fastq = read_file( $tmp_dir . '/test1_XXXXXXXXXX_1.fastq' );
+my $read_name = $fastq[0];
+chomp $read_name;
+is( substr( $read_name, -13 ), '#XXXXXXXXXX/1', 'Tag added to read name' );
+my $read_seq = $fastq[1];
+chomp $read_seq;
+is( length $read_seq, 54, 'Sequence trimmed to 54 bp' );
+my $read_qual = $fastq[3];
+chomp $read_qual;
+is( length $read_qual, 54, 'Quality trimmed to 54 bp' );
+
+@fastq = read_file( $tmp_dir . '/test1_XXXXXXXXXX_2.fastq' );
+my $read_name = $fastq[0];
+chomp $read_name;
+is( substr( $read_name, -13 ), '#XXXXXXXXXX/2', 'Tag added to read name' );
+my $read_seq = $fastq[1];
+chomp $read_seq;
+is( length $read_seq, 54, 'Sequence trimmed to 54 bp' );
+my $read_qual = $fastq[3];
+chomp $read_qual;
+is( length $read_qual, 54, 'Quality trimmed to 54 bp' );
+
+# Check detagging and trimming FASTQ files
+is(
+    detag_trim_fastq(
+        {
+            fastq_read1_input     => 't/data/test2_1.fastq',
+            fastq_read2_input     => 't/data/test2_2.fastq',
+            fastq_output_prefix   => $tmp_dir . '/test2',
+            pre_detag_trim_length => 54,
+            polyt_trim_length     => 14,
+            polyt_min_length      => 10,
+            read_tags             => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    ),
+    undef,
+    'Detag and trim FASTQ'
+);
+
+my @fastq;
+@fastq = read_file( $tmp_dir . '/test2_NNNNBGAGGC_1.fastq' );
+is( scalar @fastq / 4, 24, '30 read 1s' );
+@fastq = read_file( $tmp_dir . '/test2_NNNNBGAGGC_2.fastq' );
+is( scalar @fastq / 4, 24, '30 read 2s' );
+@fastq = read_file( $tmp_dir . '/test2_NNNNBAGAAG_1.fastq' );
+is( scalar @fastq / 4, 35, '24 read 1s' );
+@fastq = read_file( $tmp_dir . '/test2_NNNNBAGAAG_2.fastq' );
+is( scalar @fastq / 4, 35, '24 read 2s' );
+@fastq = read_file( $tmp_dir . '/test2_XXXXXXXXXX_1.fastq' );
+is( scalar @fastq / 4, 41, '46 read 1s' );
+@fastq = read_file( $tmp_dir . '/test2_XXXXXXXXXX_2.fastq' );
+is( scalar @fastq / 4, 41, '46 read 2s' );
+
+throws_ok {
+    detag_trim_fastq(
+        {
+            fastq_read1_input     => 't/data/test1_1.fastq',
+            fastq_read2_input     => 't/data/test2_2.fastq',
+            fastq_output_prefix   => $tmp_dir . '/test',
+            pre_detag_trim_length => 54,
+            polyt_trim_length     => 14,
+            polyt_min_length      => 10,
+            read_tags             => [ 'NNNNBGAGGC', 'NNNNBAGAAG' ],
+        }
+    );
+}
+qr/Read order does not match in input/ms, 'FASTQ files not matched';
+
+# Check converting tags to regular expressions
+
+my @tags = qw( N B D H V R Y K M S W A G C T AA );
+
+%re_for = convert_tag_to_regexp(@tags);
+
+ok( q{A} =~ $re_for{A}->[0], 'A matches A' );
+ok( q{G} !~ $re_for{A}->[0], 'A does not match G' );
+ok( q{C} !~ $re_for{A}->[0], 'A does not match C' );
+ok( q{T} !~ $re_for{A}->[0], 'A does not match T' );
+ok( q{N} !~ $re_for{A}->[0], 'A does not match N' );
+
+ok( q{A} !~ $re_for{G}->[0], 'G does not match A' );
+ok( q{G} =~ $re_for{G}->[0], 'G matches G' );
+ok( q{C} !~ $re_for{G}->[0], 'G does not match C' );
+ok( q{T} !~ $re_for{G}->[0], 'G does not match T' );
+ok( q{N} !~ $re_for{G}->[0], 'G does not match N' );
+
+ok( q{A} !~ $re_for{C}->[0], 'C does not match A' );
+ok( q{G} !~ $re_for{C}->[0], 'C does not match G' );
+ok( q{C} =~ $re_for{C}->[0], 'C matches C' );
+ok( q{T} !~ $re_for{C}->[0], 'C does not match T' );
+ok( q{N} !~ $re_for{C}->[0], 'C does not match N' );
+
+ok( q{A} !~ $re_for{T}->[0], 'T does not match A' );
+ok( q{G} !~ $re_for{T}->[0], 'T does not match G' );
+ok( q{C} !~ $re_for{T}->[0], 'T does not match C' );
+ok( q{T} =~ $re_for{T}->[0], 'T matches T' );
+ok( q{N} !~ $re_for{T}->[0], 'T does not match N' );
+
+ok( q{A} =~ $re_for{R}->[0], 'R matches A' );
+ok( q{G} =~ $re_for{R}->[0], 'R matches G' );
+ok( q{C} !~ $re_for{R}->[0], 'R does not match C' );
+ok( q{T} !~ $re_for{R}->[0], 'R does not match T' );
+ok( q{N} !~ $re_for{R}->[0], 'R does not match N' );
+
+ok( q{A} !~ $re_for{Y}->[0], 'Y does not match A' );
+ok( q{G} !~ $re_for{Y}->[0], 'Y does not match G' );
+ok( q{C} =~ $re_for{Y}->[0], 'Y matches C' );
+ok( q{T} =~ $re_for{Y}->[0], 'Y matches T' );
+ok( q{N} !~ $re_for{Y}->[0], 'Y does not match N' );
+
+ok( q{A} !~ $re_for{S}->[0], 'S does not match A' );
+ok( q{G} =~ $re_for{S}->[0], 'S matches G' );
+ok( q{C} =~ $re_for{S}->[0], 'S matches C' );
+ok( q{T} !~ $re_for{S}->[0], 'S does not match T' );
+ok( q{N} !~ $re_for{S}->[0], 'S does not match N' );
+
+ok( q{A} =~ $re_for{W}->[0], 'W matches A' );
+ok( q{G} !~ $re_for{W}->[0], 'W does not match G' );
+ok( q{C} !~ $re_for{W}->[0], 'W does not match C' );
+ok( q{T} =~ $re_for{W}->[0], 'W matches T' );
+ok( q{N} !~ $re_for{W}->[0], 'W does not match N' );
+
+ok( q{A} !~ $re_for{K}->[0], 'K does not match A' );
+ok( q{G} =~ $re_for{K}->[0], 'K matches G' );
+ok( q{C} !~ $re_for{K}->[0], 'K does not match C' );
+ok( q{T} =~ $re_for{K}->[0], 'K matches T' );
+ok( q{N} !~ $re_for{K}->[0], 'K does not match N' );
+
+ok( q{A} =~ $re_for{M}->[0], 'M matches A' );
+ok( q{G} !~ $re_for{M}->[0], 'M does not match G' );
+ok( q{C} =~ $re_for{M}->[0], 'M matches C' );
+ok( q{T} !~ $re_for{M}->[0], 'M does not match T' );
+ok( q{N} !~ $re_for{M}->[0], 'M does not match N' );
+
+ok( q{A} !~ $re_for{B}->[0], 'B does not match A' );
+ok( q{G} =~ $re_for{B}->[0], 'B matches G' );
+ok( q{C} =~ $re_for{B}->[0], 'B matches C' );
+ok( q{T} =~ $re_for{B}->[0], 'B matches T' );
+ok( q{N} !~ $re_for{B}->[0], 'B does not match N' );
+
+ok( q{A} =~ $re_for{D}->[0], 'D matches A' );
+ok( q{G} =~ $re_for{D}->[0], 'D matches G' );
+ok( q{C} !~ $re_for{D}->[0], 'D does not match C' );
+ok( q{T} =~ $re_for{D}->[0], 'D matches T' );
+ok( q{N} !~ $re_for{D}->[0], 'D does not match N' );
+
+ok( q{A} =~ $re_for{H}->[0], 'M matches A' );
+ok( q{G} !~ $re_for{H}->[0], 'M does not match G' );
+ok( q{C} =~ $re_for{H}->[0], 'M matches C' );
+ok( q{T} =~ $re_for{H}->[0], 'M matches T' );
+ok( q{N} !~ $re_for{H}->[0], 'M does not match N' );
+
+ok( q{A} =~ $re_for{V}->[0], 'V matches A' );
+ok( q{G} =~ $re_for{V}->[0], 'V matches G' );
+ok( q{C} =~ $re_for{V}->[0], 'V matches C' );
+ok( q{T} !~ $re_for{V}->[0], 'V does not match T' );
+ok( q{N} !~ $re_for{V}->[0], 'V does not match N' );
+
+ok( q{A} =~ $re_for{N}->[0], 'N matches A' );
+ok( q{G} =~ $re_for{N}->[0], 'N matches G' );
+ok( q{C} =~ $re_for{N}->[0], 'N matches C' );
+ok( q{T} =~ $re_for{N}->[0], 'N matches T' );
+ok( q{N} =~ $re_for{N}->[0], 'N matches N' );
+
+ok( q{A} !~ $re_for{AA}->[0], 'AA does not match A' );
+ok( q{AA} =~ $re_for{AA}->[0], 'AA matches AA' );
diff --git a/t/pipeline-job.t b/t/pipeline-job.t
new file mode 100644
index 0000000..3458a56
--- /dev/null
+++ b/t/pipeline-job.t
@@ -0,0 +1,102 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 48;
+
+use DETCT::Pipeline::Job;
+
+# Mock stage objects with different names
+my $stage1 = Test::MockObject->new();
+$stage1->set_isa('DETCT::Pipeline::Stage');
+$stage1->set_always( 'name', 'get_read_peaks' );
+my $stage2 = Test::MockObject->new();
+$stage2->set_isa('DETCT::Pipeline::Stage');
+$stage2->set_always( 'name', 'merge_read_peaks' );
+
+my $job = DETCT::Pipeline::Job->new(
+    {
+        stage         => $stage1,
+        component     => 2,
+        scheduler     => 'local',
+        base_filename => './run_deseq/1',
+    }
+);
+
+isa_ok( $job, 'DETCT::Pipeline::Job' );
+
+# Test stage attribute
+is( $job->stage->name,        'get_read_peaks',   'Get stage' );
+is( $job->set_stage($stage2), undef,              'Set stage' );
+is( $job->stage->name,        'merge_read_peaks', 'Get new stage' );
+throws_ok { $job->set_stage() } qr/No stage specified/ms, 'No stage';
+throws_ok { $job->set_stage('invalid') } qr/Class of stage/ms, 'Invalid stage';
+
+# Test component attribute
+is( $job->component,        2,     'Get component' );
+is( $job->set_component(3), undef, 'Set component' );
+is( $job->component,        3,     'Get new component' );
+throws_ok { $job->set_component() } qr/No component specified/ms,
+  'No component';
+throws_ok { $job->set_component(-1) } qr/Invalid component/ms,
+  'Invalid component';
+
+# Test scheduler attribute
+is( $job->scheduler,            'local', 'Get scheduler' );
+is( $job->set_scheduler('lsf'), undef,   'Set scheduler' );
+is( $job->scheduler,            'lsf',   'Get new scheduler' );
+throws_ok { $job->set_scheduler() } qr/Invalid scheduler specified/ms,
+  'No scheduler';
+throws_ok { $job->set_scheduler('invalid') } qr/Invalid scheduler specified/ms,
+  'Invalid scheduler';
+
+# Test base_filename attribute
+is( $job->base_filename, './run_deseq/1', 'Get base filename' );
+is( $job->set_base_filename('./count_reads/2'), undef, 'Set base filename' );
+is( $job->base_filename, './count_reads/2', 'Get new base filename' );
+throws_ok { $job->set_base_filename() } qr/No base filename specified/ms,
+  'No base filename';
+throws_ok { $job->set_base_filename('') } qr/No base filename specified/ms,
+  'Empty base filename';
+
+# Test parameters attribute
+is( $job->parameters,             undef,  'Get parameters' );
+is( $job->set_parameters('test'), undef,  'Set parameters' );
+is( $job->parameters,             'test', 'Get new parameters' );
+is( $job->set_parameters(),       undef,  'Set undef parameters' );
+is( $job->parameters,             undef,  'Get undef parameters' );
+
+# Test memory attribute
+is( $job->memory,           undef, 'Get memory' );
+is( $job->set_memory(1000), undef, 'Set memory' );
+is( $job->memory,           1000,  'Get new memory' );
+throws_ok { $job->set_memory(-1) } qr/Invalid memory/ms, 'Invalid memory';
+
+# Test retries attribute
+is( $job->retries,        undef, 'Get retries' );
+is( $job->set_retries(5), undef, 'Set retries' );
+is( $job->retries,        5,     'Get new retries' );
+throws_ok { $job->set_retries(-1) } qr/Invalid retries/ms, 'Invalid retries';
+
+# Test status code attribute
+is( $job->status_code,                'NOT_RUN', 'Get not run status code' );
+is( $job->set_status_code('DONE'),    undef,     'Set done status code' );
+is( $job->status_code,                'DONE',    'Get done status code' );
+is( $job->set_status_code('RUNNING'), undef,     'Set running status code' );
+is( $job->status_code,               'RUNNING', 'Get new running status code' );
+is( $job->set_status_code('FAILED'), undef,     'Set failed status code' );
+is( $job->status_code,               'FAILED',  'Get new failed status code' );
+throws_ok { $job->set_status_code() } qr/No status code specified/ms,
+  'No status code';
+throws_ok { $job->set_status_code('invalid') } qr/Invalid status code/ms,
+  'Invalid status code';
+
+# Test status text attribute
+is( $job->status_text,                            undef, 'Get status text' );
+is( $job->set_status_text('Job killed by owner'), undef, 'Set status text' );
+is( $job->status_text,       'Job killed by owner', 'Get new status text' );
+is( $job->set_status_text(), undef,                 'Set undef status text' );
+is( $job->status_text,       undef,                 'Get undef status text' );
diff --git a/t/pipeline-stage.t b/t/pipeline-stage.t
new file mode 100644
index 0000000..56dfaff
--- /dev/null
+++ b/t/pipeline-stage.t
@@ -0,0 +1,64 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 22;
+
+use DETCT::Pipeline::Stage;
+
+my $stage = DETCT::Pipeline::Stage->new(
+    {
+        name           => 'count_tags',
+        default_memory => 3000,
+    }
+);
+
+isa_ok( $stage, 'DETCT::Pipeline::Stage' );
+
+# Test name attribute
+is( $stage->name,                  'count_tags', 'Get name' );
+is( $stage->set_name('bin_reads'), undef,        'Set name' );
+is( $stage->name,                  'bin_reads',  'Get new name' );
+throws_ok { $stage->set_name() } qr/No name specified/ms, 'No name';
+throws_ok { $stage->set_name('/') } qr/Invalid name/ms, 'Invalid name';
+
+# Test default memory attribute
+is( $stage->default_memory,           3000,  'Get default memory' );
+is( $stage->set_default_memory(2000), undef, 'Set default memory' );
+is( $stage->default_memory,           2000,  'Get new default memory' );
+throws_ok { $stage->set_default_memory() } qr/No default memory specified/ms,
+  'No default memory';
+throws_ok { $stage->set_default_memory(-1) } qr/Invalid default memory/ms,
+  'Invalid default memory';
+
+# Test all jobs run attribute
+is( $stage->all_jobs_run,         0,     'Get all jobs run' );
+is( $stage->set_all_jobs_run(10), undef, 'Set all jobs run to true' );
+is( $stage->all_jobs_run,         1,     'Get new true all jobs run' );
+is( $stage->set_all_jobs_run(),   undef, 'Set all jobs run to false' );
+is( $stage->all_jobs_run,         0,     'Get new false all jobs run' );
+
+# Mock stage object with different name
+my $stage1 = Test::MockObject->new();
+$stage1->set_isa('DETCT::Pipeline::Stage');
+$stage1->set_always( 'name', 'get_read_peaks' );
+my $stage2 = Test::MockObject->new();
+$stage2->set_isa('DETCT::Pipeline::Stage');
+$stage2->set_always( 'name', 'merge_read_peaks' );
+
+# Test adding and retrieving prerequisites
+my $prerequisites;
+$prerequisites = $stage->get_all_prerequisites();
+is( scalar @{$prerequisites},          0,     'No prerequisites' );
+is( $stage->add_prerequisite($stage1), undef, 'Add prerequisite' );
+$prerequisites = $stage->get_all_prerequisites();
+is( scalar @{$prerequisites}, 1, 'Get one prerequisite' );
+$stage->add_prerequisite($stage2);
+is( scalar @{$prerequisites}, 2, 'Get two prerequisites' );
+throws_ok { $stage->add_prerequisite() } qr/No prerequisite specified/ms,
+  'No prerequisite specified';
+throws_ok { $stage->add_prerequisite('invalid') } qr/Class of prerequisite/ms,
+  'Invalid prerequisite';
diff --git a/t/sample.t b/t/sample.t
new file mode 100644
index 0000000..16b3bc4
--- /dev/null
+++ b/t/sample.t
@@ -0,0 +1,81 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 36;
+
+use DETCT::Sample;
+
+my $sample = DETCT::Sample->new(
+    {
+        name      => 'zmp_ph1_1m',
+        condition => 'mutant',
+        group     => '1',
+        tag       => 'NNNNBGAGGC',
+        bam_file  => 't/data/test1.bam',
+    }
+);
+
+isa_ok( $sample, 'DETCT::Sample' );
+
+# Test name attribute
+is( $sample->name,                   'zmp_ph1_1m', 'Get name' );
+is( $sample->set_name('zmp_ph1_1s'), undef,        'Set name' );
+is( $sample->name,                   'zmp_ph1_1s', 'Get new name' );
+throws_ok { $sample->set_name() } qr/No name specified/ms, 'No name';
+my $long_name = 'X' x ( $DETCT::Sample::MAX_NAME_LENGTH + 1 );
+throws_ok { $sample->set_name(' ') } qr/Invalid name specified/ms,
+  'Invalid name';
+throws_ok { $sample->set_name('') } qr/Empty name specified/ms, 'Empty name';
+throws_ok { $sample->set_name($long_name) } qr/longer than \d+ characters/ms,
+  'Long name';
+
+# Test description attribute
+is( $sample->description, undef, 'Get description' );
+is( $sample->set_description('ZMP phenotype 1.1 mutant'),
+    undef, 'Set description' );
+is( $sample->description, 'ZMP phenotype 1.1 mutant', 'Get new description' );
+is( $sample->set_description(), undef, 'Set undef description' );
+is( $sample->description,       undef, 'Get undef description' );
+
+# Test condition attribute
+is( $sample->condition,                'mutant',  'Get condition' );
+is( $sample->set_condition('sibling'), undef,     'Set condition' );
+is( $sample->condition,                'sibling', 'Get new condition' );
+throws_ok { $sample->set_condition() } qr/No condition specified/ms,
+  'No condition';
+my $long_condition = 'X' x ( $DETCT::Sample::MAX_CONDITION_LENGTH + 1 );
+throws_ok { $sample->set_condition('') } qr/Empty condition specified/ms,
+  'Empty condition';
+throws_ok { $sample->set_condition($long_condition) }
+qr/longer than \d+ characters/ms, 'Long condition';
+
+# Test group attribute
+is( $sample->group,          '1',   'Get group' );
+is( $sample->set_group('2'), undef, 'Set group' );
+is( $sample->group,          '2',   'Get new group' );
+is( $sample->set_group(),    undef, 'Set undefined group' );
+is( $sample->group,          undef, 'Get undefined group' );
+my $long_group = 'X' x ( $DETCT::Sample::MAX_GROUP_LENGTH + 1 );
+throws_ok { $sample->set_group('') } qr/Empty group specified/ms, 'Empty group';
+throws_ok { $sample->set_group($long_group) } qr/longer than \d+ characters/ms,
+  'Long group';
+
+# Test tag attribute
+is( $sample->tag,                   'NNNNBGAGGC', 'Get tag' );
+is( $sample->set_tag('NNNNBCAGAG'), undef,        'Set tag' );
+is( $sample->tag,                   'NNNNBCAGAG', 'Get new tag' );
+throws_ok { $sample->set_tag() } qr/No tag specified/ms, 'No tag';
+throws_ok { $sample->set_tag('NNNNBCAGAN') } qr/Invalid tag/ms, 'Invalid tag';
+
+# Test bam file attribute
+is( $sample->bam_file, 't/data/test1.bam', 'Get BAM file' );
+is( $sample->set_bam_file('t/data/test2.bam'), undef, 'Set BAM file' );
+is( $sample->bam_file, 't/data/test2.bam', 'Get new BAM file' );
+throws_ok { $sample->set_bam_file() } qr/No BAM file specified/ms,
+  'No BAM file';
+throws_ok { $sample->set_bam_file('nonexistent.bam') }
+qr/does not exist or cannot be read/ms, 'Missing BAM file';
diff --git a/t/sequence.t b/t/sequence.t
new file mode 100644
index 0000000..7aa985d
--- /dev/null
+++ b/t/sequence.t
@@ -0,0 +1,36 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 12;
+
+use DETCT::Sequence;
+
+my $sequence = DETCT::Sequence->new(
+    {
+        name => '1',
+        bp   => 60_348_388,
+    }
+);
+
+isa_ok( $sequence, 'DETCT::Sequence' );
+
+# Test name attribute
+is( $sequence->name,          '1',   'Get name' );
+is( $sequence->set_name('2'), undef, 'Set name' );
+is( $sequence->name,          '2',   'Get new name' );
+throws_ok { $sequence->set_name() } qr/No name specified/ms, 'No name';
+my $long_name = 'X' x ( $DETCT::Sequence::MAX_NAME_LENGTH + 1 );
+throws_ok { $sequence->set_name('') } qr/Empty name specified/ms, 'Empty name';
+throws_ok { $sequence->set_name($long_name) } qr/longer than \d+ characters/ms,
+  'Long name';
+
+# Test bp attribute
+is( $sequence->bp,                 60_348_388, 'Get bp' );
+is( $sequence->set_bp(60_300_536), undef,      'Set bp' );
+is( $sequence->bp,                 60_300_536, 'Get new bp' );
+throws_ok { $sequence->set_bp() } qr/No bp specified/ms, 'No bp';
+throws_ok { $sequence->set_bp(-1) } qr/Invalid bp/ms, 'Invalid bp';
diff --git a/t/transcript.t b/t/transcript.t
new file mode 100644
index 0000000..87fbca3
--- /dev/null
+++ b/t/transcript.t
@@ -0,0 +1,102 @@
+use Test::More;
+use Test::Exception;
+use Test::Warn;
+use Test::DatabaseRow;
+use Test::MockObject;
+use Carp;
+
+plan tests => 47;
+
+use DETCT::Transcript;
+
+my $transcript = DETCT::Transcript->new(
+    {
+        stable_id => 'ENSDART00000133571',
+        biotype   => 'protein_coding',
+        seq_name  => '5',
+        start     => 40352744,
+        end       => 40354399,
+        strand    => 1,
+    }
+);
+
+isa_ok( $transcript, 'DETCT::Transcript' );
+
+# Test stable id attribute
+is( $transcript->stable_id, 'ENSDART00000133571', 'Get stable id' );
+is( $transcript->set_stable_id('ENSDART00000033574'), undef, 'Set stable id' );
+is( $transcript->stable_id, 'ENSDART00000033574', 'Get new stable id' );
+throws_ok { $transcript->set_stable_id() } qr/No stable id specified/ms,
+  'No stable id';
+throws_ok { $transcript->set_stable_id('#invalid#') } qr/Invalid stable id/ms,
+  'Invalid stable id';
+
+# Test name attribute
+is( $transcript->name,                  undef,       'Get name' );
+is( $transcript->set_name('cxc64-001'), undef,       'Set name' );
+is( $transcript->name,                  'cxc64-001', 'Get new name' );
+is( $transcript->set_name(),            undef,       'Set undef name' );
+is( $transcript->name,                  undef,       'Get undef name' );
+my $long_name = 'X' x ( $DETCT::Transcript::MAX_NAME_LENGTH + 1 );
+throws_ok { $transcript->set_name('') } qr/Name is empty/ms, 'Empty name';
+throws_ok { $transcript->set_name($long_name) }
+qr/longer than \d+ characters/ms, 'Invalid name';
+
+# Test description attribute
+is( $transcript->description, undef, 'Get description' );
+is( $transcript->set_description('CXC chemokine 64'), undef,
+    'Set description' );
+is( $transcript->description, 'CXC chemokine 64', 'Get new description' );
+is( $transcript->set_description(), undef, 'Set undef description' );
+is( $transcript->description,       undef, 'Get undef description' );
+
+# Test biotype attribute
+is( $transcript->biotype, 'protein_coding', 'Get biotype' );
+is( $transcript->set_biotype('nonsense_mediated_decay'), undef, 'Set biotype' );
+is( $transcript->biotype, 'nonsense_mediated_decay', 'Get new biotype' );
+throws_ok { $transcript->set_biotype() } qr/No biotype specified/ms,
+  'No biotype';
+throws_ok { $transcript->set_biotype('#invalid#') } qr/Invalid biotype/ms,
+  'Invalid biotype';
+
+# Test sequence name attribute
+is( $transcript->seq_name,          '5',   'Get sequence name' );
+is( $transcript->set_seq_name('6'), undef, 'Set sequence name' );
+is( $transcript->seq_name,          '6',   'Get new sequence name' );
+throws_ok { $transcript->set_seq_name() } qr/No sequence name specified/ms,
+  'No sequence name';
+throws_ok { $transcript->set_seq_name('#invalid#') }
+qr/Invalid sequence name/ms, 'Invalid sequence name';
+
+# Test start attribute
+is( $transcript->start,               40352744, 'Get start' );
+is( $transcript->set_start(30352744), undef,    'Set start' );
+is( $transcript->start,               30352744, 'Get new start' );
+throws_ok { $transcript->set_start() } qr/No start specified/ms, 'No start';
+throws_ok { $transcript->set_start(-1) } qr/Invalid start/ms, 'Invalid start';
+
+# Test end attribute
+is( $transcript->end,               40354399, 'Get end' );
+is( $transcript->set_end(30354399), undef,    'Set end' );
+is( $transcript->end,               30354399, 'Get new end' );
+throws_ok { $transcript->set_end() } qr/No end specified/ms, 'No end';
+throws_ok { $transcript->set_end(-2) } qr/Invalid end/ms, 'Invalid end';
+
+# Test strand attribute
+is( $transcript->strand,         1,     'Get strand' );
+is( $transcript->set_strand(-1), undef, 'Set strand' );
+is( $transcript->strand,         -1,    'Get new strand' );
+throws_ok { $transcript->set_strand() } qr/No strand specified/ms, 'No strand';
+throws_ok { $transcript->set_strand(0) } qr/Invalid strand/ms, 'Invalid strand';
+
+# Mock gene object
+my $gene = Test::MockObject->new();
+$gene->set_isa('DETCT::Gene');
+$gene->set_always( 'name', 'cxc64' );
+
+# Test gene attribute
+is( $transcript->gene,            undef,   'Get gene' );
+is( $transcript->set_gene($gene), undef,   'Set gene' );
+is( $transcript->gene->name,      'cxc64', 'Get new gene' );
+throws_ok { $transcript->set_gene('invalid') } qr/Class of gene/ms,
+  'Invalid gene';