Skip to content

Commit

Permalink
metadata: file struct to read and write to files, reading bag-info.txt
Browse files Browse the repository at this point in the history
need to do write
need to write some tests
  • Loading branch information
deadbaed committed Jul 31, 2024
1 parent ed6197d commit d410c24
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 24 deletions.
2 changes: 2 additions & 0 deletions src/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mod file;

#[cfg(feature = "date")]
use jiff::civil::Date;

Expand Down
59 changes: 59 additions & 0 deletions src/metadata/file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use super::{Metadata, MetadataError};
use std::path::Path;
use std::str::FromStr;
use tokio::fs;
use tokio::io::{AsyncBufReadExt, BufReader};

#[derive(Debug, PartialEq, Default)]
pub struct MetadataFile(Vec<Metadata>);

#[derive(thiserror::Error, Debug, PartialEq)]
pub enum MetadataFileError {
/// Metadata errors
#[error(transparent)]
Metadata(#[from] MetadataError),
/// Read file error
#[error("Failed to read file: `{0}`")]
ReadFile(std::io::ErrorKind),
}

impl MetadataFile {
pub async fn read(path: impl AsRef<Path>) -> Result<Self, MetadataFileError> {
let file = fs::File::open(path.as_ref())
.await
.map_err(|e| MetadataFileError::ReadFile(e.kind()))?;
let file = BufReader::new(file);
let mut lines = file.lines();

let mut tags = Vec::new();

while let Some(line) = lines
.next_line()
.await
.map_err(|e| MetadataFileError::ReadFile(e.kind()))?
{
tags.push(Metadata::from_str(&line)?);
}

Ok(Self(tags))
}

pub async fn write(&self, path: impl AsRef<Path>) -> Result<(), std::io::Error> {
let contents = self
.0
.iter()
.map(|tag| tag.to_string())
.collect::<Vec<_>>()
.join("\n");

fs::write(path.as_ref(), contents).await
}

pub fn add(&mut self, tag: Metadata) {
self.0.push(tag);
}

pub fn tags(&self) -> impl Iterator<Item = &Metadata> {
self.0.iter()
}
}
87 changes: 63 additions & 24 deletions src/read.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,40 @@
use crate::error::PayloadError;
use crate::manifest::Manifest;
use crate::metadata::{Metadata, KEY_ENCODING, KEY_VERSION};
use crate::metadata::{Metadata, MetadataFile, MetadataFileError, KEY_ENCODING, KEY_VERSION};
use crate::{BagIt, ChecksumAlgorithm};
use digest::Digest;
use std::path::Path;
use std::str::FromStr;
use tokio::fs;

#[derive(thiserror::Error, Debug, PartialEq)]
/// Possible errors when reading bag declaration file `bagit.txt`
pub enum BagDeclarationError {
/// Required metadata file is not present
#[error("Missing `bagit.txt` file")]
Missing,
/// Error when parsing file
#[error(transparent)]
Metadata(#[from] MetadataFileError),
/// Got wrong tag
#[error("Wrong tag {0}")]
Tag(&'static str),
/// Wrongly formatted `bagit.txt`
#[error("Wrong number of tags for `bagit.txt` file")]
NumberTags,
}

#[derive(thiserror::Error, Debug, PartialEq)]
/// Possible errors when reading a bagit container
pub enum ReadError {
/// Specified path is not a directory
#[error("Path is not a directory")]
NotDirectory,
/// Required metadata file is not present
#[error("Missing `bagit.txt` file")]
MissingBagItTxt,
/// Got wrong tag inside `bagit.txt`
#[error("Wrong bad declaration `bagit.txt` file on key {0}")]
BagDeclarationKey(&'static str),
/// Wrongly formatted `bagit.txt`
#[error("Wrong number of lines for `bagit.txt` file")]
BagDeclarationLines,
/// Error related to `bagit.txt`
#[error("Bag declaration `bagit.txt`: {0}")]
BagDeclaration(#[from] BagDeclarationError),
/// Error related to `bag-info.txt`
#[error("Bag info `bag-info.txt`: {0}")]
BagInfo(#[from] MetadataFileError),
/// Failed to gather list of potential checksum files
#[error("Listing checksum files")]
ListChecksumFiles(std::io::ErrorKind),
Expand Down Expand Up @@ -70,36 +83,42 @@ impl<'a, 'algo> BagIt<'a, 'algo> {
// Read `bagit.txt`
let path_bagit = bag_it_directory.as_ref().join("bagit.txt");
if !path_bagit.exists() {
return Err(ReadError::MissingBagItTxt);
return Err(ReadError::BagDeclaration(BagDeclarationError::Missing));
}

// Read whole file (it is supposed to be 2 small lines)
let bagit_file = fs::read_to_string(path_bagit)
let bagit_file = MetadataFile::read(path_bagit)
.await
.map_err(|e| ReadError::OpenFile(e.kind()))?;

let mut bagit_file = bagit_file
.lines()
// Attempt to parse metadata tags, keep only successful ones
.filter_map(|line| Metadata::from_str(line).ok());
.map_err(|e| ReadError::BagDeclaration(e.into()))?;
let mut bagit_file = bagit_file.tags();

// Expecting first tag to be BagIt version
match bagit_file.next() {
Some(Metadata::BagitVersion { .. }) => (),
_ => return Err(ReadError::BagDeclarationKey(KEY_VERSION)),
_ => return Err(BagDeclarationError::Tag(KEY_VERSION).into()),
}

// Expecting second tag to be Encoding (utf-8)
match bagit_file.next() {
Some(Metadata::Encoding) => (),
_ => return Err(ReadError::BagDeclarationKey(KEY_ENCODING)),
_ => return Err(BagDeclarationError::Tag(KEY_ENCODING).into()),
}

// Expecting no more tags
if bagit_file.next().is_some() {
return Err(ReadError::BagDeclarationLines);
return Err(BagDeclarationError::NumberTags.into());
}

// Get optional `bag-info.txt`
let path_baginfo = bag_it_directory.as_ref().join("bag-info.txt");
let bag_info = if path_baginfo.exists() {
Some(
MetadataFile::read(path_baginfo)
.await
.map_err(ReadError::BagInfo)?,
)
} else {
None
};

// Get all files in directory
let mut dir = fs::read_dir(bag_it_directory.as_ref())
.await
Expand All @@ -121,6 +140,26 @@ impl<'a, 'algo> BagIt<'a, 'algo> {
.get_validate_payloads::<ChecksumAlgo>(bag_it_directory.as_ref())
.await?;

// Optional if present: validate number of payload files and total file size
if let Some(bag_info) = bag_info {
for tag in bag_info.tags() {
if let Metadata::PayloadOctetStreamSummary {
octet_count,
stream_count,
} = tag
{
if *stream_count != payloads.len() {
// TODO: error
}

let payload_bytes_sum = payloads.iter().map(|payload| payload.bytes()).sum();
if *octet_count != payload_bytes_sum {
// TODO: error
}
}
}
}

// Optional if present: validate checksums from tag manifest
if let Some(tag_manifest) =
Manifest::find_tag_manifest(files_in_dir.as_ref(), checksum_algorithm).await?
Expand Down

0 comments on commit d410c24

Please sign in to comment.