TransWikia.com

Tar file utility - Rust

Code Review Asked by Darendal on August 4, 2020

I’ve been working on building a Rust version of GNU tar following the spec here.

As of right now, I’ve got the binary accepting a single argument, the path to the file/directory to tar, which is passed to a separate library which does the heavy lifting.

The library handles building the Tar record (1 per file), which consists of the header and the file data, if it’s not a directory. So far, I can successfully tar single files and multiple files in nested directories, and these tar files can be read by GNU tar as valid (and extracted). I don’t have any support for handling sym-links as of yet.

Next step is handling untar-ing a given archive.

I’m new to rust, having only done small toy projects in the past, and wanted feedback if I’m following Rust conventions or if there’s more idiomatic ways to write some of my code before I dive headlong into the next piece of functionality.

tar.rs

use crate::tar::tar_record::TarRecord;
use std::fs::File;
use std::io;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use walkdir::WalkDir;

const TAR_MAGIC: &str = "ustar";
const TAR_VERSION: u32 = 0u32;
const DEV_MAJOR_VERSION: u64 = 0o0;
const DEV_MINOR_VERSION: u64 = 0o0;

const BLOCK_SIZE: usize = 512;

const NAME_SIZE: usize = 100;
const PREFIX_SIZE: usize = 155;

mod tar_record;

pub struct Tar {
    files: Vec<TarRecord>,
}

impl Tar {
    pub fn new(path: PathBuf) -> Tar {
        let mut root = path.clone();
        root.pop();
        let root = root.as_path();

        if path.is_dir() {
            let files: Vec<TarRecord> = WalkDir::new(path)
                .into_iter()
                .filter_entry(|e| !crate::is_hidden(e))
                .filter_map(|e| e.ok())
                .map(|file| TarRecord::new(file.into_path(), root))
                .collect();

            return Tar { files };
        }

        let record = TarRecord::new(path, root);

        Tar {
            files: vec![record],
        }
    }

    pub fn write_tar(&self, path: &mut PathBuf) -> Result<(), io::Error> {
        let result_path = path;
        result_path.set_extension("tar");
        let mut writer = BufWriter::new(File::create(result_path).unwrap());

        for record in self.files.iter() {
            record.write_record(&mut writer)?
        }

        // write 2 empty blocks to signify end of TAR
        write!(writer, "{:<size$}", "", size = BLOCK_SIZE * 2)?;

        writer.flush()
    }
}

tar_record

use crate::tar::{
    BLOCK_SIZE, DEV_MAJOR_VERSION, DEV_MINOR_VERSION, NAME_SIZE, PREFIX_SIZE, TAR_MAGIC,
    TAR_VERSION,
};
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader, Write};
use std::os::macos::fs::MetadataExt;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use users::{get_group_by_gid, get_user_by_uid};

#[derive(Debug)]
pub struct TarRecord {
    name: String,
    mode: u32,
    user_id: u64,
    group_id: u64,
    size: u64,          // size of the file in bytes
    modified_time: i64, // Unix time file modified
    type_flag: TypeFlag,
    linkname: String,
    username: String,
    group_name: String,
    file: File,
}

impl TarRecord {
    pub fn new(path: PathBuf, root: &Path) -> TarRecord {
        let name = path
            .strip_prefix(root)
            .unwrap()
            .to_str()
            .unwrap()
            .to_string();

        println!("a {}", name);

        let file = File::open(path.clone()).unwrap();
        let metadata = file.metadata().unwrap();

        let user_id = metadata.st_uid();
        let group_id = metadata.st_gid();
        let modified_time = metadata.st_mtime();

        let type_flag;
        let size;
        if path.is_dir() {
            size = 0;
            type_flag = TypeFlag::Directory;
        } else {
            size = metadata.len();
            type_flag = TypeFlag::ARegFile;
        }

        let username = get_user_by_uid(user_id).unwrap();
        let group_name = get_group_by_gid(group_id).unwrap();

        TarRecord {
            name,
            mode: (metadata.permissions().mode() & 0o07777),
            user_id: user_id as u64,
            group_id: group_id as u64,
            size,
            modified_time,
            type_flag,
            linkname: "".to_string(),
            username: username.name().to_str().unwrap().to_string(),
            group_name: group_name.name().to_str().unwrap().to_string(),
            file,
        }
    }

    pub fn write_record(&self, writer: &mut impl Write) -> Result<(), io::Error> {
        self.write_header(writer)?;

        if self.type_flag != TypeFlag::Directory {
            self.write_file(writer)
        } else {
            Ok(())
        }
    }

    fn write_file(&self, writer: &mut impl Write) -> Result<(), io::Error> {
        let mut reader = BufReader::new(&self.file);
        loop {
            let buf = reader.fill_buf()?;
            let len = buf.len();
            if buf.is_empty() {
                break;
            }
            writer.write_all(buf)?;

            reader.consume(len)
        }
        let residual = BLOCK_SIZE - (self.size as usize % BLOCK_SIZE);
        if residual != BLOCK_SIZE {
            write!(writer, "{:<size$}", "", size = residual)?;
        }

        Ok(())
    }

    fn write_header(&self, writer: &mut impl Write) -> Result<(), io::Error> {
        let mut vec_writer: Vec<u8> = Vec::new();

        // Write all elements of the header to the vector
        write!(
            vec_writer,
            "{name:<name_size$}{mode:06o} {user_id:06o} {group_id:06o} {size:011o} {modified_time:011o} {checksum}{typeflag}{linkname:<100}{magic:<6}{version:02}{username:<32}{group_name:<32}{dev_major:06o} {dev_minor:06o} {prefix:<prefix_size$}",
            name = self.name,
            name_size = NAME_SIZE,
            mode = self.mode,
            user_id = self.user_id,
            group_id = self.group_id,
            size = self.size,
            modified_time = self.modified_time,
            checksum ="        ",
            typeflag = self.type_flag as u8,
            linkname = self.linkname,
            magic = TAR_MAGIC,
            version = TAR_VERSION,
            username = self.username,
            group_name = self.group_name,
            dev_major = DEV_MAJOR_VERSION,
            dev_minor = DEV_MINOR_VERSION,
            prefix = "",
            prefix_size = PREFIX_SIZE,
        )?;

        let sum: u64 = vec_writer.iter().map(|&x| x as u64).sum();

        let mut checksum: Vec<u8> = Vec::new();
        write!(checksum, "{:06o} ", sum)?;

        println!("Length is {}", vec_writer[148..156].len());
        println!("Length is {}", checksum[0..].len());

        vec_writer[148..156].swap_with_slice(&mut checksum[0..]);
        writer.write_all(&vec_writer)?;

        // Header is exactly 12 bytes shy of a single block.
        // Write 12 nulls to fill the block before moving on.
        write!(writer, "{:<size$}", "", size = 12)
    }
}

#[repr(u8)]
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
#[allow(dead_code)]
enum TypeFlag {
    ARegFile = b'',
    Link = 1,
    Directory = 5,
}

Utilities (library root)

use walkdir::DirEntry;

pub mod tar;

fn is_hidden(entry: &DirEntry) -> bool {
    entry
        .file_name()
        .to_str()
        .map(|s| s.starts_with('.'))
        .unwrap_or(false)
}

Add your own answers!

Ask a Question

Get help from others!

© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP