1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
use std::ffi::OsString;
use std::fs;
use std::io::Write;

/// Highest Brotli compression level.
///
/// Higher values provide better file size but slower compression.
// FIXME: Should probably become a compression parameter.
const BROTLI_QUALITY: u32 = 11;

/// An arbitrary window size for Brotli compression.
///
/// Higher values provide better file size but slower compression.
// FIXME: SHould probably become a compression parameter.
const BROTLI_LG_WINDOW_SIZE: u32 = 20;

/// Lazy initialization ensures that we do not create empty
/// sections in a compressed (rather, we skip the section
/// entirely) or empty files.
pub struct LazyStream {
    /// The uncompressed bytes written to this stream.
    buffer: Vec<u8>,

    /// An optional path to which to write an uncompressed copy
    /// of the data. If unspecified, nothing is written.
    dump_path: Option<std::path::PathBuf>,
}

impl LazyStream {
    /// Create a new LazyStream.
    ///
    /// If `dump_path.is_some()`, all data written to this stream
    /// will also be dumped in the file at `dump_path`.
    pub fn new(dump_path: Option<std::path::PathBuf>) -> Self {
        LazyStream {
            dump_path,
            buffer: vec![],
        }
    }

    /// Return the number of bytes written so far.
    pub fn bytes_written(&self) -> usize {
        self.buffer.len()
    }

    /// Get the data that needs to be actually written to disk.
    pub fn done(mut self) -> std::io::Result<Option<Vec<u8>>> {
        if self.buffer.len() == 0 {
            return Ok(None);
        }

        // Actually compress the data.
        // Note that the only way I have found to ensure that the `CompressorWriter` completely
        // flushes, regardless of the data, is to drop the `CompressorWriter`. The simplest way
        // to implement this is to create the compressor in the call to `done()`.
        let mut brotli_compressed = vec![];
        {
            let mut brotli = brotli::CompressorWriter::new(
                &mut brotli_compressed,
                self.buffer.len(),
                BROTLI_QUALITY,
                BROTLI_LG_WINDOW_SIZE,
            );
            brotli.write_all(&self.buffer).unwrap();
        }

        // Now dump to files if necessary.
        if let Some(mut path) = self.dump_path.take() {
            {
                let dir = path.parent().unwrap();
                fs::DirBuilder::new().recursive(true).create(dir)?;
            }

            // Prepare the file for raw dumping.
            fs::write(&path, &self.buffer)?;

            // Prepare the file for brotli-compressed dumping.
            // Create a double-extension ".foo.bro".
            let extension = match path.extension() {
                None => OsString::from("bro"),
                Some(ext) => {
                    let mut as_os_string = ext.to_os_string();
                    as_os_string.push(".bro");
                    as_os_string
                }
            };
            path.set_extension(extension);
            fs::write(path, &brotli_compressed)?;
        }

        Ok(Some(brotli_compressed))
    }
}
impl std::io::Write for LazyStream {
    /// Store the data for a later flush.
    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
        self.buffer.extend_from_slice(data);
        Ok(data.len())
    }

    /// Flush all output streams.
    fn flush(&mut self) -> std::io::Result<()> {
        Ok(())
    }
}