Skip to content

Commit

Permalink
Fix base class for genome window functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Jul 31, 2024
1 parent 94001ab commit 8c419c4
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 55 deletions.
57 changes: 57 additions & 0 deletions lib/genesis/population/window/base_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

#include <stdexcept>
#include <string>
#include <unordered_map>

#include "genesis/population/genome_region.hpp"

Expand All @@ -47,6 +48,13 @@ namespace population {
* @brief Base class for Window and WindowView, to share common functionality.
*
* See Window for usage and details on the functions offered here.
*
* The class also adds a special case for when we are streaming over a whole genome.
* In that case, we cannot use our usual notation of first and last positions on a chromosome,
* as we are using the whole genome instead. As the notation is however useful in all other cases,
* we want to keep it. A cleaner approach from a software design perspective would be to have base
* classes for both cases, but that would lead to having incompatible types of WindowStream classes,
* which would add too much complexity for the current use case.
*/
template<class D>
class BaseWindow
Expand Down Expand Up @@ -174,6 +182,48 @@ class BaseWindow
return last_position_ - first_position_ + 1;
}

// -------------------------------------------------------------------------
// Whole Genome
// -------------------------------------------------------------------------

/**
* @brief Return if this instance is intended to be used for a whole genome stream.
*/
bool is_whole_genome() const
{
return is_whole_genome_;
}


/**
* @brief Set whether this instance is intended to be used for a whole genome stream.
*/
void is_whole_genome( bool value )
{
is_whole_genome_ = value;
}

/**
* @brief Get the list of all chromosomes along the genome, with their length.
*
* This is based on the chromsomoes and their lengths as encountered in the input data,
* or on the sequence dictionary if provided to the GenomeWindowStream.
*
* Usage of this member is only valid if is_whole_genome() is set.
*/
std::unordered_map<std::string, size_t> const& chromosomes() const
{
return chromosomes_;
}

/**
* @brief Get the list of all chromosomes along the genome, with their length.
*/
std::unordered_map<std::string, size_t>& chromosomes()
{
return chromosomes_;
}

// -------------------------------------------------------------------------
// Modifiers and Helpers
// -------------------------------------------------------------------------
Expand All @@ -186,6 +236,8 @@ class BaseWindow
chromosome_ = "";
first_position_ = 0;
last_position_ = 0;
is_whole_genome_ = false;
chromosomes_.clear();
clear_();
}

Expand All @@ -209,10 +261,15 @@ class BaseWindow

private:

// Normal case of window within chromosome
std::string chromosome_;
size_t first_position_ = 0;
size_t last_position_ = 0;

// Special case of window over whole genome
bool is_whole_genome_ = false;
std::unordered_map<std::string, size_t> chromosomes_;

};

} // namespace population
Expand Down
55 changes: 0 additions & 55 deletions lib/genesis/population/window/window_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include <stdexcept>
#include <string>
#include <vector>
#include <unordered_map>

namespace genesis {
namespace population {
Expand Down Expand Up @@ -72,14 +71,6 @@ namespace population {
* Because of its streaming approach, its memory footprint is smaller than that of a Window,
* and hence allows to iterator whole chromosomes or genomes. On the flipside, its a single pass
* iterator with no random access to the data in the window.
*
*
* The class also adds a special case for when we are streaming over a whole genome.
* In that case, we cannot use our usual notation of first and last positions on a chromosome,
* as we are using the whole genome instead. As the notation is however useful in all other cases,
* we want to keep it. A cleaner approach from a software design perspective would be to have base
* classes for both cases, but that would lead to having incompatible types of WindowStream classes,
* which would add too much complexity for the current use case.
*/
template<class D>
class WindowView final : public BaseWindow<D>
Expand Down Expand Up @@ -322,48 +313,6 @@ class WindowView final : public BaseWindow<D>
return Iterator( nullptr );
}

// -------------------------------------------------------------------------
// Genome Window View
// -------------------------------------------------------------------------

/**
* @brief Return if this instance is intended to be used for a whole genome stream.
*/
bool is_whole_genome() const
{
return is_whole_genome_;
}


/**
* @brief Set whether this instance is intended to be used for a whole genome stream.
*/
void is_whole_genome( bool value )
{
is_whole_genome_ = value;
}

/**
* @brief Get the list of all chromosomes along the genome, with their length.
*
* This is based on the chromsomoes and their lengths as encountered in the input data,
* or on the sequence dictionary if provided to the GenomeWindowStream.
*
* Usage of this member is only valid if is_whole_genome() is set.
*/
std::unordered_map<std::string, size_t> const& chromosomes() const
{
return chromosomes_;
}

/**
* @brief Get the list of all chromosomes along the genome, with their length.
*/
std::unordered_map<std::string, size_t>& chromosomes()
{
return chromosomes_;
}

// -------------------------------------------------------------------------
// Data Members
// -------------------------------------------------------------------------
Expand All @@ -384,10 +333,6 @@ class WindowView final : public BaseWindow<D>

mutable bool started_ = false;

// Genome window view
bool is_whole_genome_ = false;
std::unordered_map<std::string, size_t> chromosomes_;

};

} // namespace population
Expand Down

0 comments on commit 8c419c4

Please sign in to comment.