chore: add vendor dependencies for kauma build

This commit is contained in:
0xalivecow 2024-10-23 10:20:38 +02:00
parent 7c94e5d8fb
commit 067ef6141c
No known key found for this signature in database
1758 changed files with 398473 additions and 0 deletions

285
vendor/base64/src/alphabet.rs vendored Normal file
View file

@ -0,0 +1,285 @@
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
use crate::PAD_BYTE;
use core::{convert, fmt};
#[cfg(any(feature = "std", test))]
use std::error;
const ALPHABET_SIZE: usize = 64;
/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
///
/// Common alphabets are provided as constants, and custom alphabets
/// can be made via `from_str` or the `TryFrom<str>` implementation.
///
/// # Examples
///
/// Building and using a custom Alphabet:
///
/// ```
/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
///
/// let engine = base64::engine::GeneralPurpose::new(
/// &custom,
/// base64::engine::general_purpose::PAD);
/// ```
///
/// Building a const:
///
/// ```
/// use base64::alphabet::Alphabet;
///
/// static CUSTOM: Alphabet = {
/// // Result::unwrap() isn't const yet, but panic!() is OK
/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
/// Ok(x) => x,
/// Err(_) => panic!("creation of alphabet failed"),
/// }
/// };
/// ```
///
/// Building lazily:
///
/// ```
/// use base64::{
/// alphabet::Alphabet,
/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
/// };
/// use once_cell::sync::Lazy;
///
/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
/// );
/// ```
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Alphabet {
pub(crate) symbols: [u8; ALPHABET_SIZE],
}
impl Alphabet {
/// Performs no checks so that it can be const.
/// Used only for known-valid strings.
const fn from_str_unchecked(alphabet: &str) -> Self {
let mut symbols = [0_u8; ALPHABET_SIZE];
let source_bytes = alphabet.as_bytes();
// a way to copy that's allowed in const fn
let mut index = 0;
while index < ALPHABET_SIZE {
symbols[index] = source_bytes[index];
index += 1;
}
Self { symbols }
}
/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
///
/// The `=` byte is not allowed as it is used for padding.
pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
let bytes = alphabet.as_bytes();
if bytes.len() != ALPHABET_SIZE {
return Err(ParseAlphabetError::InvalidLength);
}
{
let mut index = 0;
while index < ALPHABET_SIZE {
let byte = bytes[index];
// must be ascii printable. 127 (DEL) is commonly considered printable
// for some reason but clearly unsuitable for base64.
if !(byte >= 32_u8 && byte <= 126_u8) {
return Err(ParseAlphabetError::UnprintableByte(byte));
}
// = is assumed to be padding, so cannot be used as a symbol
if byte == PAD_BYTE {
return Err(ParseAlphabetError::ReservedByte(byte));
}
// Check for duplicates while staying within what const allows.
// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
// microsecond range.
let mut probe_index = 0;
while probe_index < ALPHABET_SIZE {
if probe_index == index {
probe_index += 1;
continue;
}
let probe_byte = bytes[probe_index];
if byte == probe_byte {
return Err(ParseAlphabetError::DuplicatedByte(byte));
}
probe_index += 1;
}
index += 1;
}
}
Ok(Self::from_str_unchecked(alphabet))
}
/// Create a `&str` from the symbols in the `Alphabet`
pub fn as_str(&self) -> &str {
core::str::from_utf8(&self.symbols).unwrap()
}
}
impl convert::TryFrom<&str> for Alphabet {
type Error = ParseAlphabetError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Self::new(value)
}
}
/// Possible errors when constructing an [Alphabet] from a `str`.
#[derive(Debug, Eq, PartialEq)]
pub enum ParseAlphabetError {
/// Alphabets must be 64 ASCII bytes
InvalidLength,
/// All bytes must be unique
DuplicatedByte(u8),
/// All bytes must be printable (in the range `[32, 126]`).
UnprintableByte(u8),
/// `=` cannot be used
ReservedByte(u8),
}
impl fmt::Display for ParseAlphabetError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
}
}
}
#[cfg(any(feature = "std", test))]
impl error::Error for ParseAlphabetError {}
/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
///
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
);
/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
///
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
);
/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
///
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
);
/// The bcrypt alphabet.
pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
);
/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
///
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
);
/// The alphabet used in BinHex 4.0 files.
///
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
);
#[cfg(test)]
mod tests {
use crate::alphabet::*;
use core::convert::TryFrom as _;
#[test]
fn detects_duplicate_start() {
assert_eq!(
ParseAlphabetError::DuplicatedByte(b'A'),
Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
.unwrap_err()
);
}
#[test]
fn detects_duplicate_end() {
assert_eq!(
ParseAlphabetError::DuplicatedByte(b'/'),
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
.unwrap_err()
);
}
#[test]
fn detects_duplicate_middle() {
assert_eq!(
ParseAlphabetError::DuplicatedByte(b'Z'),
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
.unwrap_err()
);
}
#[test]
fn detects_length() {
assert_eq!(
ParseAlphabetError::InvalidLength,
Alphabet::new(
"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
)
.unwrap_err()
);
}
#[test]
fn detects_padding() {
assert_eq!(
ParseAlphabetError::ReservedByte(b'='),
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
.unwrap_err()
);
}
#[test]
fn detects_unprintable() {
// form feed
assert_eq!(
ParseAlphabetError::UnprintableByte(0xc),
Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
.unwrap_err()
);
}
#[test]
fn same_as_unchecked() {
assert_eq!(
STANDARD,
Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
.unwrap()
);
}
#[test]
fn str_same_as_input() {
let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let a = Alphabet::try_from(alphabet).unwrap();
assert_eq!(alphabet, a.as_str())
}
}

172
vendor/base64/src/chunked_encoder.rs vendored Normal file
View file

@ -0,0 +1,172 @@
use crate::{
encode::add_padding,
engine::{Config, Engine},
};
#[cfg(any(feature = "alloc", test))]
use alloc::string::String;
#[cfg(any(feature = "alloc", test))]
use core::str;
/// The output mechanism for ChunkedEncoder's encoded bytes.
pub trait Sink {
type Error;
/// Handle a chunk of encoded base64 data (as UTF-8 bytes)
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
}
/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
pub struct ChunkedEncoder<'e, E: Engine + ?Sized> {
engine: &'e E,
}
impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> {
ChunkedEncoder { engine }
}
pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
const BUF_SIZE: usize = 1024;
const CHUNK_SIZE: usize = BUF_SIZE / 4 * 3;
let mut buf = [0; BUF_SIZE];
for chunk in bytes.chunks(CHUNK_SIZE) {
let mut len = self.engine.internal_encode(chunk, &mut buf);
if chunk.len() != CHUNK_SIZE && self.engine.config().encode_padding() {
// Final, potentially partial, chunk.
// Only need to consider if padding is needed on a partial chunk since full chunk
// is a multiple of 3, which therefore won't be padded.
// Pad output to multiple of four bytes if required by config.
len += add_padding(len, &mut buf[len..]);
}
sink.write_encoded_bytes(&buf[..len])?;
}
Ok(())
}
}
// A really simple sink that just appends to a string
#[cfg(any(feature = "alloc", test))]
pub(crate) struct StringSink<'a> {
string: &'a mut String,
}
#[cfg(any(feature = "alloc", test))]
impl<'a> StringSink<'a> {
pub(crate) fn new(s: &mut String) -> StringSink {
StringSink { string: s }
}
}
#[cfg(any(feature = "alloc", test))]
impl<'a> Sink for StringSink<'a> {
type Error = ();
fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
self.string.push_str(str::from_utf8(s).unwrap());
Ok(())
}
}
#[cfg(test)]
pub mod tests {
use rand::{
distributions::{Distribution, Uniform},
Rng, SeedableRng,
};
use crate::{
alphabet::STANDARD,
engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig, PAD},
tests::random_engine,
};
use super::*;
#[test]
fn chunked_encode_empty() {
assert_eq!("", chunked_encode_str(&[], PAD));
}
#[test]
fn chunked_encode_intermediate_fast_loop() {
// > 8 bytes input, will enter the pretty fast loop
assert_eq!("Zm9vYmFyYmF6cXV4", chunked_encode_str(b"foobarbazqux", PAD));
}
#[test]
fn chunked_encode_fast_loop() {
// > 32 bytes input, will enter the uber fast loop
assert_eq!(
"Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", PAD)
);
}
#[test]
fn chunked_encode_slow_loop_only() {
// < 8 bytes input, slow loop only
assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", PAD));
}
#[test]
fn chunked_encode_matches_normal_encode_random_string_sink() {
let helper = StringSinkTestHelper;
chunked_encode_matches_normal_encode_random(&helper);
}
pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
let mut input_buf: Vec<u8> = Vec::new();
let mut output_buf = String::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
let input_len_range = Uniform::new(1, 10_000);
for _ in 0..20_000 {
input_buf.clear();
output_buf.clear();
let buf_len = input_len_range.sample(&mut rng);
for _ in 0..buf_len {
input_buf.push(rng.gen());
}
let engine = random_engine(&mut rng);
let chunk_encoded_string = sink_test_helper.encode_to_string(&engine, &input_buf);
engine.encode_string(&input_buf, &mut output_buf);
assert_eq!(output_buf, chunk_encoded_string, "input len={}", buf_len);
}
}
fn chunked_encode_str(bytes: &[u8], config: GeneralPurposeConfig) -> String {
let mut s = String::new();
let mut sink = StringSink::new(&mut s);
let engine = GeneralPurpose::new(&STANDARD, config);
let encoder = ChunkedEncoder::new(&engine);
encoder.encode(bytes, &mut sink).unwrap();
s
}
// An abstraction around sinks so that we can have tests that easily to any sink implementation
pub trait SinkTestHelper {
fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String;
}
struct StringSinkTestHelper;
impl SinkTestHelper for StringSinkTestHelper {
fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String {
let encoder = ChunkedEncoder::new(engine);
let mut s = String::new();
let mut sink = StringSink::new(&mut s);
encoder.encode(bytes, &mut sink).unwrap();
s
}
}
}

386
vendor/base64/src/decode.rs vendored Normal file
View file

@ -0,0 +1,386 @@
use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine};
#[cfg(any(feature = "alloc", test))]
use alloc::vec::Vec;
use core::fmt;
#[cfg(any(feature = "std", test))]
use std::error;
/// Errors that can occur while decoding.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DecodeError {
/// An invalid byte was found in the input. The offset and offending byte are provided.
///
/// Padding characters (`=`) interspersed in the encoded form are invalid, as they may only
/// be present as the last 0-2 bytes of input.
///
/// This error may also indicate that extraneous trailing input bytes are present, causing
/// otherwise valid padding to no longer be the last bytes of input.
InvalidByte(usize, u8),
/// The length of the input, as measured in valid base64 symbols, is invalid.
/// There must be 2-4 symbols in the last input quad.
InvalidLength(usize),
/// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
/// This is indicative of corrupted or truncated Base64.
/// Unlike [DecodeError::InvalidByte], which reports symbols that aren't in the alphabet,
/// this error is for symbols that are in the alphabet but represent nonsensical encodings.
InvalidLastSymbol(usize, u8),
/// The nature of the padding was not as configured: absent or incorrect when it must be
/// canonical, or present when it must be absent, etc.
InvalidPadding,
}
impl fmt::Display for DecodeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Self::InvalidByte(index, byte) => {
write!(f, "Invalid symbol {}, offset {}.", byte, index)
}
Self::InvalidLength(len) => write!(f, "Invalid input length: {}", len),
Self::InvalidLastSymbol(index, byte) => {
write!(f, "Invalid last symbol {}, offset {}.", byte, index)
}
Self::InvalidPadding => write!(f, "Invalid padding"),
}
}
}
#[cfg(any(feature = "std", test))]
impl error::Error for DecodeError {}
/// Errors that can occur while decoding into a slice.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DecodeSliceError {
/// A [DecodeError] occurred
DecodeError(DecodeError),
/// The provided slice is too small.
OutputSliceTooSmall,
}
impl fmt::Display for DecodeSliceError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::DecodeError(e) => write!(f, "DecodeError: {}", e),
Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
}
}
}
#[cfg(any(feature = "std", test))]
impl error::Error for DecodeSliceError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
DecodeSliceError::DecodeError(e) => Some(e),
DecodeSliceError::OutputSliceTooSmall => None,
}
}
}
impl From<DecodeError> for DecodeSliceError {
fn from(e: DecodeError) -> Self {
DecodeSliceError::DecodeError(e)
}
}
/// Decode base64 using the [`STANDARD` engine](STANDARD).
///
/// See [Engine::decode].
#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
#[cfg(any(feature = "alloc", test))]
pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
STANDARD.decode(input)
}
/// Decode from string reference as octets using the specified [Engine].
///
/// See [Engine::decode].
///Returns a `Result` containing a `Vec<u8>`.
#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
#[cfg(any(feature = "alloc", test))]
pub fn decode_engine<E: Engine, T: AsRef<[u8]>>(
input: T,
engine: &E,
) -> Result<Vec<u8>, DecodeError> {
engine.decode(input)
}
/// Decode from string reference as octets.
///
/// See [Engine::decode_vec].
#[cfg(any(feature = "alloc", test))]
#[deprecated(since = "0.21.0", note = "Use Engine::decode_vec")]
pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>(
input: T,
buffer: &mut Vec<u8>,
engine: &E,
) -> Result<(), DecodeError> {
engine.decode_vec(input, buffer)
}
/// Decode the input into the provided output slice.
///
/// See [Engine::decode_slice].
#[deprecated(since = "0.21.0", note = "Use Engine::decode_slice")]
pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
input: T,
output: &mut [u8],
engine: &E,
) -> Result<usize, DecodeSliceError> {
engine.decode_slice(input, output)
}
/// Returns a conservative estimate of the decoded size of `encoded_len` base64 symbols (rounded up
/// to the next group of 3 decoded bytes).
///
/// The resulting length will be a safe choice for the size of a decode buffer, but may have up to
/// 2 trailing bytes that won't end up being needed.
///
/// # Examples
///
/// ```
/// use base64::decoded_len_estimate;
///
/// assert_eq!(3, decoded_len_estimate(1));
/// assert_eq!(3, decoded_len_estimate(2));
/// assert_eq!(3, decoded_len_estimate(3));
/// assert_eq!(3, decoded_len_estimate(4));
/// // start of the next quad of encoded symbols
/// assert_eq!(6, decoded_len_estimate(5));
/// ```
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
STANDARD
.internal_decoded_len_estimate(encoded_len)
.decoded_len_estimate()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
alphabet,
engine::{general_purpose, Config, GeneralPurpose},
tests::{assert_encode_sanity, random_engine},
};
use rand::{
distributions::{Distribution, Uniform},
Rng, SeedableRng,
};
#[test]
fn decode_into_nonempty_vec_doesnt_clobber_existing_prefix() {
let mut orig_data = Vec::new();
let mut encoded_data = String::new();
let mut decoded_with_prefix = Vec::new();
let mut decoded_without_prefix = Vec::new();
let mut prefix = Vec::new();
let prefix_len_range = Uniform::new(0, 1000);
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decoded_with_prefix.clear();
decoded_without_prefix.clear();
prefix.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut encoded_data);
assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
let prefix_len = prefix_len_range.sample(&mut rng);
// fill the buf with a prefix
for _ in 0..prefix_len {
prefix.push(rng.gen());
}
decoded_with_prefix.resize(prefix_len, 0);
decoded_with_prefix.copy_from_slice(&prefix);
// decode into the non-empty buf
engine
.decode_vec(&encoded_data, &mut decoded_with_prefix)
.unwrap();
// also decode into the empty buf
engine
.decode_vec(&encoded_data, &mut decoded_without_prefix)
.unwrap();
assert_eq!(
prefix_len + decoded_without_prefix.len(),
decoded_with_prefix.len()
);
assert_eq!(orig_data, decoded_without_prefix);
// append plain decode onto prefix
prefix.append(&mut decoded_without_prefix);
assert_eq!(prefix, decoded_with_prefix);
}
}
#[test]
fn decode_slice_doesnt_clobber_existing_prefix_or_suffix() {
do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
e.decode_slice(input, output).unwrap()
})
}
#[test]
fn decode_slice_unchecked_doesnt_clobber_existing_prefix_or_suffix() {
do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
e.decode_slice_unchecked(input, output).unwrap()
})
}
#[test]
fn decode_engine_estimation_works_for_various_lengths() {
let engine = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD);
for num_prefix_quads in 0..100 {
for suffix in &["AA", "AAA", "AAAA"] {
let mut prefix = "AAAA".repeat(num_prefix_quads);
prefix.push_str(suffix);
// make sure no overflow (and thus a panic) occurs
let res = engine.decode(prefix);
assert!(res.is_ok());
}
}
}
#[test]
fn decode_slice_output_length_errors() {
for num_quads in 1..100 {
let input = "AAAA".repeat(num_quads);
let mut vec = vec![0; (num_quads - 1) * 3];
assert_eq!(
DecodeSliceError::OutputSliceTooSmall,
STANDARD.decode_slice(&input, &mut vec).unwrap_err()
);
vec.push(0);
assert_eq!(
DecodeSliceError::OutputSliceTooSmall,
STANDARD.decode_slice(&input, &mut vec).unwrap_err()
);
vec.push(0);
assert_eq!(
DecodeSliceError::OutputSliceTooSmall,
STANDARD.decode_slice(&input, &mut vec).unwrap_err()
);
vec.push(0);
// now it works
assert_eq!(
num_quads * 3,
STANDARD.decode_slice(&input, &mut vec).unwrap()
);
}
}
fn do_decode_slice_doesnt_clobber_existing_prefix_or_suffix<
F: Fn(&GeneralPurpose, &[u8], &mut [u8]) -> usize,
>(
call_decode: F,
) {
let mut orig_data = Vec::new();
let mut encoded_data = String::new();
let mut decode_buf = Vec::new();
let mut decode_buf_copy: Vec<u8> = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
decode_buf.clear();
decode_buf_copy.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut encoded_data);
assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
// fill the buffer with random garbage, long enough to have some room before and after
for _ in 0..5000 {
decode_buf.push(rng.gen());
}
// keep a copy for later comparison
decode_buf_copy.extend(decode_buf.iter());
let offset = 1000;
// decode into the non-empty buf
let decode_bytes_written =
call_decode(&engine, encoded_data.as_bytes(), &mut decode_buf[offset..]);
assert_eq!(orig_data.len(), decode_bytes_written);
assert_eq!(
orig_data,
&decode_buf[offset..(offset + decode_bytes_written)]
);
assert_eq!(&decode_buf_copy[0..offset], &decode_buf[0..offset]);
assert_eq!(
&decode_buf_copy[offset + decode_bytes_written..],
&decode_buf[offset + decode_bytes_written..]
);
}
}
}
#[allow(deprecated)]
#[cfg(test)]
mod coverage_gaming {
use super::*;
use std::error::Error;
#[test]
fn decode_error() {
let _ = format!("{:?}", DecodeError::InvalidPadding.clone());
let _ = format!(
"{} {} {} {}",
DecodeError::InvalidByte(0, 0),
DecodeError::InvalidLength(0),
DecodeError::InvalidLastSymbol(0, 0),
DecodeError::InvalidPadding,
);
}
#[test]
fn decode_slice_error() {
let _ = format!("{:?}", DecodeSliceError::OutputSliceTooSmall.clone());
let _ = format!(
"{} {}",
DecodeSliceError::OutputSliceTooSmall,
DecodeSliceError::DecodeError(DecodeError::InvalidPadding)
);
let _ = DecodeSliceError::OutputSliceTooSmall.source();
let _ = DecodeSliceError::DecodeError(DecodeError::InvalidPadding).source();
}
#[test]
fn deprecated_fns() {
let _ = decode("");
let _ = decode_engine("", &crate::prelude::BASE64_STANDARD);
let _ = decode_engine_vec("", &mut Vec::new(), &crate::prelude::BASE64_STANDARD);
let _ = decode_engine_slice("", &mut [], &crate::prelude::BASE64_STANDARD);
}
#[test]
fn decoded_len_est() {
assert_eq!(3, decoded_len_estimate(4));
}
}

88
vendor/base64/src/display.rs vendored Normal file
View file

@ -0,0 +1,88 @@
//! Enables base64'd output anywhere you might use a `Display` implementation, like a format string.
//!
//! ```
//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
//!
//! let data = vec![0x0, 0x1, 0x2, 0x3];
//! let wrapper = Base64Display::new(&data, &STANDARD);
//!
//! assert_eq!("base64: AAECAw==", format!("base64: {}", wrapper));
//! ```
use super::chunked_encoder::ChunkedEncoder;
use crate::engine::Engine;
use core::fmt::{Display, Formatter};
use core::{fmt, str};
/// A convenience wrapper for base64'ing bytes into a format string without heap allocation.
pub struct Base64Display<'a, 'e, E: Engine> {
bytes: &'a [u8],
chunked_encoder: ChunkedEncoder<'e, E>,
}
impl<'a, 'e, E: Engine> Base64Display<'a, 'e, E> {
/// Create a `Base64Display` with the provided engine.
pub fn new(bytes: &'a [u8], engine: &'e E) -> Base64Display<'a, 'e, E> {
Base64Display {
bytes,
chunked_encoder: ChunkedEncoder::new(engine),
}
}
}
impl<'a, 'e, E: Engine> Display for Base64Display<'a, 'e, E> {
fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> {
let mut sink = FormatterSink { f: formatter };
self.chunked_encoder.encode(self.bytes, &mut sink)
}
}
struct FormatterSink<'a, 'b: 'a> {
f: &'a mut Formatter<'b>,
}
impl<'a, 'b: 'a> super::chunked_encoder::Sink for FormatterSink<'a, 'b> {
type Error = fmt::Error;
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error> {
// Avoid unsafe. If max performance is needed, write your own display wrapper that uses
// unsafe here to gain about 10-15%.
self.f
.write_str(str::from_utf8(encoded).expect("base64 data was not utf8"))
}
}
#[cfg(test)]
mod tests {
use super::super::chunked_encoder::tests::{
chunked_encode_matches_normal_encode_random, SinkTestHelper,
};
use super::*;
use crate::engine::general_purpose::STANDARD;
#[test]
fn basic_display() {
assert_eq!(
"~$Zm9vYmFy#*",
format!("~${}#*", Base64Display::new(b"foobar", &STANDARD))
);
assert_eq!(
"~$Zm9vYmFyZg==#*",
format!("~${}#*", Base64Display::new(b"foobarf", &STANDARD))
);
}
#[test]
fn display_encode_matches_normal_encode() {
let helper = DisplaySinkTestHelper;
chunked_encode_matches_normal_encode_random(&helper);
}
struct DisplaySinkTestHelper;
impl SinkTestHelper for DisplaySinkTestHelper {
fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String {
format!("{}", Base64Display::new(bytes, engine))
}
}
}

492
vendor/base64/src/encode.rs vendored Normal file
View file

@ -0,0 +1,492 @@
#[cfg(any(feature = "alloc", test))]
use alloc::string::String;
use core::fmt;
#[cfg(any(feature = "std", test))]
use std::error;
#[cfg(any(feature = "alloc", test))]
use crate::engine::general_purpose::STANDARD;
use crate::engine::{Config, Engine};
use crate::PAD_BYTE;
/// Encode arbitrary octets as base64 using the [`STANDARD` engine](STANDARD).
///
/// See [Engine::encode].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
#[cfg(any(feature = "alloc", test))]
pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
STANDARD.encode(input)
}
///Encode arbitrary octets as base64 using the provided `Engine` into a new `String`.
///
/// See [Engine::encode].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
#[cfg(any(feature = "alloc", test))]
pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String {
engine.encode(input)
}
///Encode arbitrary octets as base64 into a supplied `String`.
///
/// See [Engine::encode_string].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode_string")]
#[cfg(any(feature = "alloc", test))]
pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>(
input: T,
output_buf: &mut String,
engine: &E,
) {
engine.encode_string(input, output_buf)
}
/// Encode arbitrary octets as base64 into a supplied slice.
///
/// See [Engine::encode_slice].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode_slice")]
pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>(
input: T,
output_buf: &mut [u8],
engine: &E,
) -> Result<usize, EncodeSliceError> {
engine.encode_slice(input, output_buf)
}
/// B64-encode and pad (if configured).
///
/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short
/// inputs.
///
/// `encoded_size` is the encoded size calculated for `input`.
///
/// `output` must be of size `encoded_size`.
///
/// All bytes in `output` will be written to since it is exactly the size of the output.
pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
input: &[u8],
output: &mut [u8],
engine: &E,
expected_encoded_size: usize,
) {
debug_assert_eq!(expected_encoded_size, output.len());
let b64_bytes_written = engine.internal_encode(input, output);
let padding_bytes = if engine.config().encode_padding() {
add_padding(b64_bytes_written, &mut output[b64_bytes_written..])
} else {
0
};
let encoded_bytes = b64_bytes_written
.checked_add(padding_bytes)
.expect("usize overflow when calculating b64 length");
debug_assert_eq!(expected_encoded_size, encoded_bytes);
}
/// Calculate the base64 encoded length for a given input length, optionally including any
/// appropriate padding bytes.
///
/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for
/// input lengths in approximately the top quarter of the range of `usize`.
pub const fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
let rem = bytes_len % 3;
let complete_input_chunks = bytes_len / 3;
// `?` is disallowed in const, and `let Some(_) = _ else` requires 1.65.0, whereas this
// messier syntax works on 1.48
let complete_chunk_output =
if let Some(complete_chunk_output) = complete_input_chunks.checked_mul(4) {
complete_chunk_output
} else {
return None;
};
if rem > 0 {
if padding {
complete_chunk_output.checked_add(4)
} else {
let encoded_rem = match rem {
1 => 2,
// only other possible remainder is 2
// can't use a separate _ => unreachable!() in const fns in ancient rust versions
_ => 3,
};
complete_chunk_output.checked_add(encoded_rem)
}
} else {
Some(complete_chunk_output)
}
}
/// Write padding characters.
/// `unpadded_output_len` is the size of the unpadded but base64 encoded data.
/// `output` is the slice where padding should be written, of length at least 2.
///
/// Returns the number of padding bytes written.
pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize {
let pad_bytes = (4 - (unpadded_output_len % 4)) % 4;
// for just a couple bytes, this has better performance than using
// .fill(), or iterating over mutable refs, which call memset()
#[allow(clippy::needless_range_loop)]
for i in 0..pad_bytes {
output[i] = PAD_BYTE;
}
pad_bytes
}
/// Errors that can occur while encoding into a slice.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum EncodeSliceError {
/// The provided slice is too small.
OutputSliceTooSmall,
}
impl fmt::Display for EncodeSliceError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
}
}
}
#[cfg(any(feature = "std", test))]
impl error::Error for EncodeSliceError {}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
alphabet,
engine::general_purpose::{GeneralPurpose, NO_PAD, STANDARD},
tests::{assert_encode_sanity, random_config, random_engine},
};
use rand::{
distributions::{Distribution, Uniform},
Rng, SeedableRng,
};
use std::str;
const URL_SAFE_NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
#[test]
fn encoded_size_correct_standard() {
assert_encoded_length(0, 0, &STANDARD, true);
assert_encoded_length(1, 4, &STANDARD, true);
assert_encoded_length(2, 4, &STANDARD, true);
assert_encoded_length(3, 4, &STANDARD, true);
assert_encoded_length(4, 8, &STANDARD, true);
assert_encoded_length(5, 8, &STANDARD, true);
assert_encoded_length(6, 8, &STANDARD, true);
assert_encoded_length(7, 12, &STANDARD, true);
assert_encoded_length(8, 12, &STANDARD, true);
assert_encoded_length(9, 12, &STANDARD, true);
assert_encoded_length(54, 72, &STANDARD, true);
assert_encoded_length(55, 76, &STANDARD, true);
assert_encoded_length(56, 76, &STANDARD, true);
assert_encoded_length(57, 76, &STANDARD, true);
assert_encoded_length(58, 80, &STANDARD, true);
}
#[test]
fn encoded_size_correct_no_pad() {
assert_encoded_length(0, 0, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(1, 2, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(2, 3, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(3, 4, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(4, 6, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(5, 7, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(6, 8, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(7, 10, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(8, 11, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(9, 12, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(54, 72, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(55, 74, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(56, 75, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(57, 76, &URL_SAFE_NO_PAD_ENGINE, false);
assert_encoded_length(58, 78, &URL_SAFE_NO_PAD_ENGINE, false);
}
#[test]
fn encoded_size_overflow() {
assert_eq!(None, encoded_len(usize::MAX, true));
}
#[test]
fn encode_engine_string_into_nonempty_buffer_doesnt_clobber_prefix() {
let mut orig_data = Vec::new();
let mut prefix = String::new();
let mut encoded_data_no_prefix = String::new();
let mut encoded_data_with_prefix = String::new();
let mut decoded = Vec::new();
let prefix_len_range = Uniform::new(0, 1000);
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
prefix.clear();
encoded_data_no_prefix.clear();
encoded_data_with_prefix.clear();
decoded.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
let prefix_len = prefix_len_range.sample(&mut rng);
for _ in 0..prefix_len {
// getting convenient random single-byte printable chars that aren't base64 is
// annoying
prefix.push('#');
}
encoded_data_with_prefix.push_str(&prefix);
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut encoded_data_no_prefix);
engine.encode_string(&orig_data, &mut encoded_data_with_prefix);
assert_eq!(
encoded_data_no_prefix.len() + prefix_len,
encoded_data_with_prefix.len()
);
assert_encode_sanity(
&encoded_data_no_prefix,
engine.config().encode_padding(),
input_len,
);
assert_encode_sanity(
&encoded_data_with_prefix[prefix_len..],
engine.config().encode_padding(),
input_len,
);
// append plain encode onto prefix
prefix.push_str(&encoded_data_no_prefix);
assert_eq!(prefix, encoded_data_with_prefix);
engine
.decode_vec(&encoded_data_no_prefix, &mut decoded)
.unwrap();
assert_eq!(orig_data, decoded);
}
}
#[test]
fn encode_engine_slice_into_nonempty_buffer_doesnt_clobber_suffix() {
let mut orig_data = Vec::new();
let mut encoded_data = Vec::new();
let mut encoded_data_original_state = Vec::new();
let mut decoded = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
orig_data.clear();
encoded_data.clear();
encoded_data_original_state.clear();
decoded.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
orig_data.push(rng.gen());
}
// plenty of existing garbage in the encoded buffer
for _ in 0..10 * input_len {
encoded_data.push(rng.gen());
}
encoded_data_original_state.extend_from_slice(&encoded_data);
let engine = random_engine(&mut rng);
let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
assert_eq!(
encoded_size,
engine.encode_slice(&orig_data, &mut encoded_data).unwrap()
);
assert_encode_sanity(
str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
engine.config().encode_padding(),
input_len,
);
assert_eq!(
&encoded_data[encoded_size..],
&encoded_data_original_state[encoded_size..]
);
engine
.decode_vec(&encoded_data[0..encoded_size], &mut decoded)
.unwrap();
assert_eq!(orig_data, decoded);
}
}
#[test]
fn encode_to_slice_random_valid_utf8() {
let mut input = Vec::new();
let mut output = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
input.clear();
output.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
input.push(rng.gen());
}
let config = random_config(&mut rng);
let engine = random_engine(&mut rng);
// fill up the output buffer with garbage
let encoded_size = encoded_len(input_len, config.encode_padding()).unwrap();
for _ in 0..encoded_size {
output.push(rng.gen());
}
let orig_output_buf = output.clone();
let bytes_written = engine.internal_encode(&input, &mut output);
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
}
}
#[test]
fn encode_with_padding_random_valid_utf8() {
let mut input = Vec::new();
let mut output = Vec::new();
let input_len_range = Uniform::new(0, 1000);
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..10_000 {
input.clear();
output.clear();
let input_len = input_len_range.sample(&mut rng);
for _ in 0..input_len {
input.push(rng.gen());
}
let engine = random_engine(&mut rng);
// fill up the output buffer with garbage
let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
for _ in 0..encoded_size + 1000 {
output.push(rng.gen());
}
let orig_output_buf = output.clone();
encode_with_padding(&input, &mut output[0..encoded_size], &engine, encoded_size);
// make sure the part beyond b64 is the same garbage it was before
assert_eq!(orig_output_buf[encoded_size..], output[encoded_size..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..encoded_size]).unwrap();
}
}
#[test]
fn add_padding_random_valid_utf8() {
let mut output = Vec::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
// cover our bases for length % 4
for unpadded_output_len in 0..20 {
output.clear();
// fill output with random
for _ in 0..100 {
output.push(rng.gen());
}
let orig_output_buf = output.clone();
let bytes_written = add_padding(unpadded_output_len, &mut output);
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
// make sure the encoded bytes are UTF-8
let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
}
}
fn assert_encoded_length<E: Engine>(
input_len: usize,
enc_len: usize,
engine: &E,
padded: bool,
) {
assert_eq!(enc_len, encoded_len(input_len, padded).unwrap());
let mut bytes: Vec<u8> = Vec::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..input_len {
bytes.push(rng.gen());
}
let encoded = engine.encode(&bytes);
assert_encode_sanity(&encoded, padded, input_len);
assert_eq!(enc_len, encoded.len());
}
#[test]
fn encode_imap() {
assert_eq!(
&GeneralPurpose::new(&alphabet::IMAP_MUTF7, NO_PAD).encode(b"\xFB\xFF"),
&GeneralPurpose::new(&alphabet::STANDARD, NO_PAD)
.encode(b"\xFB\xFF")
.replace('/', ",")
);
}
}

View file

@ -0,0 +1,357 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},
DecodeError, DecodeSliceError, PAD_BYTE,
};
#[doc(hidden)]
pub struct GeneralPurposeEstimate {
/// input len % 4
rem: usize,
conservative_decoded_len: usize,
}
impl GeneralPurposeEstimate {
pub(crate) fn new(encoded_len: usize) -> Self {
let rem = encoded_len % 4;
Self {
rem,
conservative_decoded_len: (encoded_len / 4 + (rem > 0) as usize) * 3,
}
}
}
impl DecodeEstimate for GeneralPurposeEstimate {
fn decoded_len_estimate(&self) -> usize {
self.conservative_decoded_len
}
}
/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
/// Returns the decode metadata, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
#[inline]
pub(crate) fn decode_helper(
input: &[u8],
estimate: GeneralPurposeEstimate,
output: &mut [u8],
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<DecodeMetadata, DecodeSliceError> {
let input_complete_nonterminal_quads_len =
complete_quads_len(input, estimate.rem, output.len(), decode_table)?;
const UNROLLED_INPUT_CHUNK_SIZE: usize = 32;
const UNROLLED_OUTPUT_CHUNK_SIZE: usize = UNROLLED_INPUT_CHUNK_SIZE / 4 * 3;
let input_complete_quads_after_unrolled_chunks_len =
input_complete_nonterminal_quads_len % UNROLLED_INPUT_CHUNK_SIZE;
let input_unrolled_loop_len =
input_complete_nonterminal_quads_len - input_complete_quads_after_unrolled_chunks_len;
// chunks of 32 bytes
for (chunk_index, chunk) in input[..input_unrolled_loop_len]
.chunks_exact(UNROLLED_INPUT_CHUNK_SIZE)
.enumerate()
{
let input_index = chunk_index * UNROLLED_INPUT_CHUNK_SIZE;
let chunk_output = &mut output[chunk_index * UNROLLED_OUTPUT_CHUNK_SIZE
..(chunk_index + 1) * UNROLLED_OUTPUT_CHUNK_SIZE];
decode_chunk_8(
&chunk[0..8],
input_index,
decode_table,
&mut chunk_output[0..6],
)?;
decode_chunk_8(
&chunk[8..16],
input_index + 8,
decode_table,
&mut chunk_output[6..12],
)?;
decode_chunk_8(
&chunk[16..24],
input_index + 16,
decode_table,
&mut chunk_output[12..18],
)?;
decode_chunk_8(
&chunk[24..32],
input_index + 24,
decode_table,
&mut chunk_output[18..24],
)?;
}
// remaining quads, except for the last possibly partial one, as it may have padding
let output_unrolled_loop_len = input_unrolled_loop_len / 4 * 3;
let output_complete_quad_len = input_complete_nonterminal_quads_len / 4 * 3;
{
let output_after_unroll = &mut output[output_unrolled_loop_len..output_complete_quad_len];
for (chunk_index, chunk) in input
[input_unrolled_loop_len..input_complete_nonterminal_quads_len]
.chunks_exact(4)
.enumerate()
{
let chunk_output = &mut output_after_unroll[chunk_index * 3..chunk_index * 3 + 3];
decode_chunk_4(
chunk,
input_unrolled_loop_len + chunk_index * 4,
decode_table,
chunk_output,
)?;
}
}
super::decode_suffix::decode_suffix(
input,
input_complete_nonterminal_quads_len,
output,
output_complete_quad_len,
decode_table,
decode_allow_trailing_bits,
padding_mode,
)
}
/// Returns the length of complete quads, except for the last one, even if it is complete.
///
/// Returns an error if the output len is not big enough for decoding those complete quads, or if
/// the input % 4 == 1, and that last byte is an invalid value other than a pad byte.
///
/// - `input` is the base64 input
/// - `input_len_rem` is input len % 4
/// - `output_len` is the length of the output slice
pub(crate) fn complete_quads_len(
input: &[u8],
input_len_rem: usize,
output_len: usize,
decode_table: &[u8; 256],
) -> Result<usize, DecodeSliceError> {
debug_assert!(input.len() % 4 == input_len_rem);
// detect a trailing invalid byte, like a newline, as a user convenience
if input_len_rem == 1 {
let last_byte = input[input.len() - 1];
// exclude pad bytes; might be part of padding that extends from earlier in the input
if last_byte != PAD_BYTE && decode_table[usize::from(last_byte)] == INVALID_VALUE {
return Err(DecodeError::InvalidByte(input.len() - 1, last_byte).into());
}
};
// skip last quad, even if it's complete, as it may have padding
let input_complete_nonterminal_quads_len = input
.len()
.saturating_sub(input_len_rem)
// if rem was 0, subtract 4 to avoid padding
.saturating_sub((input_len_rem == 0) as usize * 4);
debug_assert!(
input.is_empty() || (1..=4).contains(&(input.len() - input_complete_nonterminal_quads_len))
);
// check that everything except the last quad handled by decode_suffix will fit
if output_len < input_complete_nonterminal_quads_len / 4 * 3 {
return Err(DecodeSliceError::OutputSliceTooSmall);
};
Ok(input_complete_nonterminal_quads_len)
}
/// Decode 8 bytes of input into 6 bytes of output.
///
/// `input` is the 8 bytes to decode.
/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors
/// accurately)
/// `decode_table` is the lookup table for the particular base64 alphabet.
/// `output` will have its first 6 bytes overwritten
// yes, really inline (worth 30-50% speedup)
#[inline(always)]
fn decode_chunk_8(
input: &[u8],
index_at_start_of_input: usize,
decode_table: &[u8; 256],
output: &mut [u8],
) -> Result<(), DecodeError> {
let morsel = decode_table[usize::from(input[0])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0]));
}
let mut accum = u64::from(morsel) << 58;
let morsel = decode_table[usize::from(input[1])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 1,
input[1],
));
}
accum |= u64::from(morsel) << 52;
let morsel = decode_table[usize::from(input[2])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 2,
input[2],
));
}
accum |= u64::from(morsel) << 46;
let morsel = decode_table[usize::from(input[3])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 3,
input[3],
));
}
accum |= u64::from(morsel) << 40;
let morsel = decode_table[usize::from(input[4])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 4,
input[4],
));
}
accum |= u64::from(morsel) << 34;
let morsel = decode_table[usize::from(input[5])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 5,
input[5],
));
}
accum |= u64::from(morsel) << 28;
let morsel = decode_table[usize::from(input[6])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 6,
input[6],
));
}
accum |= u64::from(morsel) << 22;
let morsel = decode_table[usize::from(input[7])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 7,
input[7],
));
}
accum |= u64::from(morsel) << 16;
output[..6].copy_from_slice(&accum.to_be_bytes()[..6]);
Ok(())
}
/// Like [decode_chunk_8] but for 4 bytes of input and 3 bytes of output.
#[inline(always)]
fn decode_chunk_4(
input: &[u8],
index_at_start_of_input: usize,
decode_table: &[u8; 256],
output: &mut [u8],
) -> Result<(), DecodeError> {
let morsel = decode_table[usize::from(input[0])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0]));
}
let mut accum = u32::from(morsel) << 26;
let morsel = decode_table[usize::from(input[1])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 1,
input[1],
));
}
accum |= u32::from(morsel) << 20;
let morsel = decode_table[usize::from(input[2])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 2,
input[2],
));
}
accum |= u32::from(morsel) << 14;
let morsel = decode_table[usize::from(input[3])];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(
index_at_start_of_input + 3,
input[3],
));
}
accum |= u32::from(morsel) << 8;
output[..3].copy_from_slice(&accum.to_be_bytes()[..3]);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::engine::general_purpose::STANDARD;
#[test]
fn decode_chunk_8_writes_only_6_bytes() {
let input = b"Zm9vYmFy"; // "foobar"
let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];
decode_chunk_8(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output);
}
#[test]
fn decode_chunk_4_writes_only_3_bytes() {
let input = b"Zm9v"; // "foobar"
let mut output = [0_u8, 1, 2, 3];
decode_chunk_4(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', 3], &output);
}
#[test]
fn estimate_short_lengths() {
for (range, decoded_len_estimate) in [
(0..=0, 0),
(1..=4, 3),
(5..=8, 6),
(9..=12, 9),
(13..=16, 12),
(17..=20, 15),
] {
for encoded_len in range {
let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate());
}
}
}
#[test]
fn estimate_via_u128_inflation() {
// cover both ends of usize
(0..1000)
.chain(usize::MAX - 1000..=usize::MAX)
.for_each(|encoded_len| {
// inflate to 128 bit type to be able to safely use the easy formulas
let len_128 = encoded_len as u128;
let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(
(len_128 + 3) / 4 * 3,
estimate.conservative_decoded_len as u128
);
})
}
}

View file

@ -0,0 +1,162 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode},
DecodeError, DecodeSliceError, PAD_BYTE,
};
/// Decode the last 0-4 bytes, checking for trailing set bits and padding per the provided
/// parameters.
///
/// Returns the decode metadata representing the total number of bytes decoded, including the ones
/// indicated as already written by `output_index`.
pub(crate) fn decode_suffix(
input: &[u8],
input_index: usize,
output: &mut [u8],
mut output_index: usize,
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<DecodeMetadata, DecodeSliceError> {
debug_assert!((input.len() - input_index) <= 4);
// Decode any leftovers that might not be a complete input chunk of 4 bytes.
// Use a u32 as a stack-resident 4 byte buffer.
let mut morsels_in_leftover = 0;
let mut padding_bytes_count = 0;
// offset from input_index
let mut first_padding_offset: usize = 0;
let mut last_symbol = 0_u8;
let mut morsels = [0_u8; 4];
for (leftover_index, &b) in input[input_index..].iter().enumerate() {
// '=' padding
if b == PAD_BYTE {
// There can be bad padding bytes in a few ways:
// 1 - Padding with non-padding characters after it
// 2 - Padding after zero or one characters in the current quad (should only
// be after 2 or 3 chars)
// 3 - More than two characters of padding. If 3 or 4 padding chars
// are in the same quad, that implies it will be caught by #2.
// If it spreads from one quad to another, it will be an invalid byte
// in the first quad.
// 4 - Non-canonical padding -- 1 byte when it should be 2, etc.
// Per config, non-canonical but still functional non- or partially-padded base64
// may be treated as an error condition.
if leftover_index < 2 {
// Check for error #2.
// Either the previous byte was padding, in which case we would have already hit
// this case, or it wasn't, in which case this is the first such error.
debug_assert!(
leftover_index == 0 || (leftover_index == 1 && padding_bytes_count == 0)
);
let bad_padding_index = input_index + leftover_index;
return Err(DecodeError::InvalidByte(bad_padding_index, b).into());
}
if padding_bytes_count == 0 {
first_padding_offset = leftover_index;
}
padding_bytes_count += 1;
continue;
}
// Check for case #1.
// To make '=' handling consistent with the main loop, don't allow
// non-suffix '=' in trailing chunk either. Report error as first
// erroneous padding.
if padding_bytes_count > 0 {
return Err(
DecodeError::InvalidByte(input_index + first_padding_offset, PAD_BYTE).into(),
);
}
last_symbol = b;
// can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding.
// Pack the leftovers from left to right.
let morsel = decode_table[b as usize];
if morsel == INVALID_VALUE {
return Err(DecodeError::InvalidByte(input_index + leftover_index, b).into());
}
morsels[morsels_in_leftover] = morsel;
morsels_in_leftover += 1;
}
// If there was 1 trailing byte, and it was valid, and we got to this point without hitting
// an invalid byte, now we can report invalid length
if !input.is_empty() && morsels_in_leftover < 2 {
return Err(DecodeError::InvalidLength(input_index + morsels_in_leftover).into());
}
match padding_mode {
DecodePaddingMode::Indifferent => { /* everything we care about was already checked */ }
DecodePaddingMode::RequireCanonical => {
// allow empty input
if (padding_bytes_count + morsels_in_leftover) % 4 != 0 {
return Err(DecodeError::InvalidPadding.into());
}
}
DecodePaddingMode::RequireNone => {
if padding_bytes_count > 0 {
// check at the end to make sure we let the cases of padding that should be InvalidByte
// get hit
return Err(DecodeError::InvalidPadding.into());
}
}
}
// When encoding 1 trailing byte (e.g. 0xFF), 2 base64 bytes ("/w") are needed.
// / is the symbol for 63 (0x3F, bottom 6 bits all set) and w is 48 (0x30, top 2 bits
// of bottom 6 bits set).
// When decoding two symbols back to one trailing byte, any final symbol higher than
// w would still decode to the original byte because we only care about the top two
// bits in the bottom 6, but would be a non-canonical encoding. So, we calculate a
// mask based on how many bits are used for just the canonical encoding, and optionally
// error if any other bits are set. In the example of one encoded byte -> 2 symbols,
// 2 symbols can technically encode 12 bits, but the last 4 are non-canonical, and
// useless since there are no more symbols to provide the necessary 4 additional bits
// to finish the second original byte.
let leftover_bytes_to_append = morsels_in_leftover * 6 / 8;
// Put the up to 6 complete bytes as the high bytes.
// Gain a couple percent speedup from nudging these ORs to use more ILP with a two-way split.
let mut leftover_num = (u32::from(morsels[0]) << 26)
| (u32::from(morsels[1]) << 20)
| (u32::from(morsels[2]) << 14)
| (u32::from(morsels[3]) << 8);
// if there are bits set outside the bits we care about, last symbol encodes trailing bits that
// will not be included in the output
let mask = !0_u32 >> (leftover_bytes_to_append * 8);
if !decode_allow_trailing_bits && (leftover_num & mask) != 0 {
// last morsel is at `morsels_in_leftover` - 1
return Err(DecodeError::InvalidLastSymbol(
input_index + morsels_in_leftover - 1,
last_symbol,
)
.into());
}
// Strangely, this approach benchmarks better than writing bytes one at a time,
// or copy_from_slice into output.
for _ in 0..leftover_bytes_to_append {
let hi_byte = (leftover_num >> 24) as u8;
leftover_num <<= 8;
*output
.get_mut(output_index)
.ok_or(DecodeSliceError::OutputSliceTooSmall)? = hi_byte;
output_index += 1;
}
Ok(DecodeMetadata::new(
output_index,
if padding_bytes_count > 0 {
Some(input_index + first_padding_offset)
} else {
None
},
))
}

View file

@ -0,0 +1,352 @@
//! Provides the [GeneralPurpose] engine and associated config types.
use crate::{
alphabet,
alphabet::Alphabet,
engine::{Config, DecodeMetadata, DecodePaddingMode},
DecodeSliceError,
};
use core::convert::TryInto;
pub(crate) mod decode;
pub(crate) mod decode_suffix;
pub use decode::GeneralPurposeEstimate;
pub(crate) const INVALID_VALUE: u8 = 255;
/// A general-purpose base64 engine.
///
/// - It uses no vector CPU instructions, so it will work on any system.
/// - It is reasonably fast (~2-3GiB/s).
/// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation.
#[derive(Debug, Clone)]
pub struct GeneralPurpose {
encode_table: [u8; 64],
decode_table: [u8; 256],
config: GeneralPurposeConfig,
}
impl GeneralPurpose {
/// Create a `GeneralPurpose` engine from an [Alphabet].
///
/// While not very expensive to initialize, ideally these should be cached
/// if the engine will be used repeatedly.
pub const fn new(alphabet: &Alphabet, config: GeneralPurposeConfig) -> Self {
Self {
encode_table: encode_table(alphabet),
decode_table: decode_table(alphabet),
config,
}
}
}
impl super::Engine for GeneralPurpose {
type Config = GeneralPurposeConfig;
type DecodeEstimate = GeneralPurposeEstimate;
fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
let mut input_index: usize = 0;
const BLOCKS_PER_FAST_LOOP: usize = 4;
const LOW_SIX_BITS: u64 = 0x3F;
// we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need
// 2 trailing bytes to be available to read..
let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2);
let mut output_index = 0;
if last_fast_index > 0 {
while input_index <= last_fast_index {
// Major performance wins from letting the optimizer do the bounds check once, mostly
// on the output side
let input_chunk =
&input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))];
let output_chunk =
&mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)];
// Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent
// to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for
// large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect
// that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte
// SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once.
// Plus, single-digit percentage performance differences might well be quite different
// on different hardware.
let input_u64 = read_u64(&input_chunk[0..]);
output_chunk[0] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[1] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[2] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[3] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[4] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[5] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[6] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[7] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[6..]);
output_chunk[8] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[9] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[10] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[11] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[12] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[13] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[14] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[15] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[12..]);
output_chunk[16] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[17] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[18] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[19] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[20] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[21] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[22] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[23] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
let input_u64 = read_u64(&input_chunk[18..]);
output_chunk[24] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize];
output_chunk[25] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize];
output_chunk[26] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize];
output_chunk[27] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize];
output_chunk[28] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize];
output_chunk[29] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize];
output_chunk[30] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize];
output_chunk[31] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize];
output_index += BLOCKS_PER_FAST_LOOP * 8;
input_index += BLOCKS_PER_FAST_LOOP * 6;
}
}
// Encode what's left after the fast loop.
const LOW_SIX_BITS_U8: u8 = 0x3F;
let rem = input.len() % 3;
let start_of_rem = input.len() - rem;
// start at the first index not handled by fast loop, which may be 0.
while input_index < start_of_rem {
let input_chunk = &input[input_index..(input_index + 3)];
let output_chunk = &mut output[output_index..(output_index + 4)];
output_chunk[0] = self.encode_table[(input_chunk[0] >> 2) as usize];
output_chunk[1] = self.encode_table
[((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize];
output_chunk[2] = self.encode_table
[((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize];
output_chunk[3] = self.encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize];
input_index += 3;
output_index += 4;
}
if rem == 2 {
output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize];
output[output_index + 1] =
self.encode_table[((input[start_of_rem] << 4 | input[start_of_rem + 1] >> 4)
& LOW_SIX_BITS_U8) as usize];
output[output_index + 2] =
self.encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize];
output_index += 3;
} else if rem == 1 {
output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize];
output[output_index + 1] =
self.encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize];
output_index += 2;
}
output_index
}
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
GeneralPurposeEstimate::new(input_len)
}
fn internal_decode(
&self,
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<DecodeMetadata, DecodeSliceError> {
decode::decode_helper(
input,
estimate,
output,
&self.decode_table,
self.config.decode_allow_trailing_bits,
self.config.decode_padding_mode,
)
}
fn config(&self) -> &Self::Config {
&self.config
}
}
/// Returns a table mapping a 6-bit index to the ASCII byte encoding of the index
pub(crate) const fn encode_table(alphabet: &Alphabet) -> [u8; 64] {
// the encode table is just the alphabet:
// 6-bit index lookup -> printable byte
let mut encode_table = [0_u8; 64];
{
let mut index = 0;
while index < 64 {
encode_table[index] = alphabet.symbols[index];
index += 1;
}
}
encode_table
}
/// Returns a table mapping base64 bytes as the lookup index to either:
/// - [INVALID_VALUE] for bytes that aren't members of the alphabet
/// - a byte whose lower 6 bits are the value that was encoded into the index byte
pub(crate) const fn decode_table(alphabet: &Alphabet) -> [u8; 256] {
let mut decode_table = [INVALID_VALUE; 256];
// Since the table is full of `INVALID_VALUE` already, we only need to overwrite
// the parts that are valid.
let mut index = 0;
while index < 64 {
// The index in the alphabet is the 6-bit value we care about.
// Since the index is in 0-63, it is safe to cast to u8.
decode_table[alphabet.symbols[index] as usize] = index as u8;
index += 1;
}
decode_table
}
#[inline]
fn read_u64(s: &[u8]) -> u64 {
u64::from_be_bytes(s[..8].try_into().unwrap())
}
/// Contains configuration parameters for base64 encoding and decoding.
///
/// ```
/// # use base64::engine::GeneralPurposeConfig;
/// let config = GeneralPurposeConfig::new()
/// .with_encode_padding(false);
/// // further customize using `.with_*` methods as needed
/// ```
///
/// The constants [PAD] and [NO_PAD] cover most use cases.
///
/// To specify the characters used, see [Alphabet].
#[derive(Clone, Copy, Debug)]
pub struct GeneralPurposeConfig {
encode_padding: bool,
decode_allow_trailing_bits: bool,
decode_padding_mode: DecodePaddingMode,
}
impl GeneralPurposeConfig {
/// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and
/// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`.
///
/// This probably matches most people's expectations, but consider disabling padding to save
/// a few bytes unless you specifically need it for compatibility with some legacy system.
pub const fn new() -> Self {
Self {
// RFC states that padding must be applied by default
encode_padding: true,
decode_allow_trailing_bits: false,
decode_padding_mode: DecodePaddingMode::RequireCanonical,
}
}
/// Create a new config based on `self` with an updated `padding` setting.
///
/// If `padding` is `true`, encoding will append either 1 or 2 `=` padding characters as needed
/// to produce an output whose length is a multiple of 4.
///
/// Padding is not needed for correct decoding and only serves to waste bytes, but it's in the
/// [spec](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2).
///
/// For new applications, consider not using padding if the decoders you're using don't require
/// padding to be present.
pub const fn with_encode_padding(self, padding: bool) -> Self {
Self {
encode_padding: padding,
..self
}
}
/// Create a new config based on `self` with an updated `decode_allow_trailing_bits` setting.
///
/// Most users will not need to configure this. It's useful if you need to decode base64
/// produced by a buggy encoder that has bits set in the unused space on the last base64
/// character as per [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode).
/// If invalid trailing bits are present and this is `true`, those bits will
/// be silently ignored, else `DecodeError::InvalidLastSymbol` will be emitted.
pub const fn with_decode_allow_trailing_bits(self, allow: bool) -> Self {
Self {
decode_allow_trailing_bits: allow,
..self
}
}
/// Create a new config based on `self` with an updated `decode_padding_mode` setting.
///
/// Padding is not useful in terms of representing encoded data -- it makes no difference to
/// the decoder if padding is present or not, so if you have some un-padded input to decode, it
/// is perfectly fine to use `DecodePaddingMode::Indifferent` to prevent errors from being
/// emitted.
///
/// However, since in practice
/// [people who learned nothing from BER vs DER seem to expect base64 to have one canonical encoding](https://eprint.iacr.org/2022/361),
/// the default setting is the stricter `DecodePaddingMode::RequireCanonicalPadding`.
///
/// Or, if "canonical" in your circumstance means _no_ padding rather than padding to the
/// next multiple of four, there's `DecodePaddingMode::RequireNoPadding`.
pub const fn with_decode_padding_mode(self, mode: DecodePaddingMode) -> Self {
Self {
decode_padding_mode: mode,
..self
}
}
}
impl Default for GeneralPurposeConfig {
/// Delegates to [GeneralPurposeConfig::new].
fn default() -> Self {
Self::new()
}
}
impl Config for GeneralPurposeConfig {
fn encode_padding(&self) -> bool {
self.encode_padding
}
}
/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [PAD] config.
pub const STANDARD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, PAD);
/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [NO_PAD] config.
pub const STANDARD_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD);
/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [PAD] config.
pub const URL_SAFE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, PAD);
/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [NO_PAD] config.
pub const URL_SAFE_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
/// Include padding bytes when encoding, and require that they be present when decoding.
///
/// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves
/// little purpose in practice.
pub const PAD: GeneralPurposeConfig = GeneralPurposeConfig::new();
/// Don't add padding when encoding, and require no padding when decoding.
pub const NO_PAD: GeneralPurposeConfig = GeneralPurposeConfig::new()
.with_encode_padding(false)
.with_decode_padding_mode(DecodePaddingMode::RequireNone);

478
vendor/base64/src/engine/mod.rs vendored Normal file
View file

@ -0,0 +1,478 @@
//! Provides the [Engine] abstraction and out of the box implementations.
#[cfg(any(feature = "alloc", test))]
use crate::chunked_encoder;
use crate::{
encode::{encode_with_padding, EncodeSliceError},
encoded_len, DecodeError, DecodeSliceError,
};
#[cfg(any(feature = "alloc", test))]
use alloc::vec::Vec;
#[cfg(any(feature = "alloc", test))]
use alloc::{string::String, vec};
pub mod general_purpose;
#[cfg(test)]
mod naive;
#[cfg(test)]
mod tests;
pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
///
/// Different implementations offer different characteristics. The library currently ships with
/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
///
/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
/// issues, and to avoid repeating the cost of engine setup.
///
/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
// When adding an implementation of Engine, include them in the engine test suite:
// - add an implementation of [engine::tests::EngineWrapper]
// - add the implementation to the `all_engines` macro
// All tests run on all engines listed in the macro.
pub trait Engine: Send + Sync {
/// The config type used by this engine
type Config: Config;
/// The decode estimate used by this engine
type DecodeEstimate: DecodeEstimate;
/// This is not meant to be called directly; it is only for `Engine` implementors.
/// See the other `encode*` functions on this trait.
///
/// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
///
/// `output` will be long enough to hold the encoded data.
///
/// Returns the number of bytes written.
///
/// No padding should be written; that is handled separately.
///
/// Must not write any bytes into the output slice other than the encoded data.
#[doc(hidden)]
fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
/// This is not meant to be called directly; it is only for `Engine` implementors.
///
/// As an optimization to prevent the decoded length from being calculated twice, it is
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
#[doc(hidden)]
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
/// This is not meant to be called directly; it is only for `Engine` implementors.
/// See the other `decode*` functions on this trait.
///
/// Decode `input` base64 bytes into the `output` buffer.
///
/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
/// calculating it again (expensive on short inputs).`
///
/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
/// function must also handle the final possibly partial chunk.
/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
/// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
/// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
///
/// Decoding must not write any bytes into the output slice other than the decoded data.
///
/// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
/// errors unless the engine is configured otherwise.
#[doc(hidden)]
fn internal_decode(
&self,
input: &[u8],
output: &mut [u8],
decode_estimate: Self::DecodeEstimate,
) -> Result<DecodeMetadata, DecodeSliceError>;
/// Returns the config for this engine.
fn config(&self) -> &Self::Config;
/// Encode arbitrary octets as base64 using the provided `Engine`.
/// Returns a `String`.
///
/// # Example
///
/// ```rust
/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
///
/// let b64 = general_purpose::STANDARD.encode(b"hello world~");
/// println!("{}", b64);
///
/// const CUSTOM_ENGINE: engine::GeneralPurpose =
/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
///
/// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
/// ```
#[cfg(any(feature = "alloc", test))]
#[inline]
fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
where
E: Engine + ?Sized,
{
let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
.expect("integer overflow when calculating buffer size");
let mut buf = vec![0; encoded_size];
encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
String::from_utf8(buf).expect("Invalid UTF8")
}
inner(self, input.as_ref())
}
/// Encode arbitrary octets as base64 into a supplied `String`.
/// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
///
/// # Example
///
/// ```rust
/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
/// const CUSTOM_ENGINE: engine::GeneralPurpose =
/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
///
/// fn main() {
/// let mut buf = String::new();
/// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
/// println!("{}", buf);
///
/// buf.clear();
/// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
/// println!("{}", buf);
/// }
/// ```
#[cfg(any(feature = "alloc", test))]
#[inline]
fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
where
E: Engine + ?Sized,
{
let mut sink = chunked_encoder::StringSink::new(output_buf);
chunked_encoder::ChunkedEncoder::new(engine)
.encode(input_bytes, &mut sink)
.expect("Writing to a String shouldn't fail");
}
inner(self, input.as_ref(), output_buf)
}
/// Encode arbitrary octets as base64 into a supplied slice.
/// Writes into the supplied output buffer.
///
/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
/// or statically-allocated buffer).
///
/// # Example
///
#[cfg_attr(feature = "alloc", doc = "```")]
#[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
/// use base64::{Engine as _, engine::general_purpose};
/// let s = b"hello internet!";
/// let mut buf = Vec::new();
/// // make sure we'll have a slice big enough for base64 + padding
/// buf.resize(s.len() * 4 / 3 + 4, 0);
///
/// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
///
/// // shorten our vec down to just what was written
/// buf.truncate(bytes_written);
///
/// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
/// ```
#[inline]
fn encode_slice<T: AsRef<[u8]>>(
&self,
input: T,
output_buf: &mut [u8],
) -> Result<usize, EncodeSliceError> {
fn inner<E>(
engine: &E,
input_bytes: &[u8],
output_buf: &mut [u8],
) -> Result<usize, EncodeSliceError>
where
E: Engine + ?Sized,
{
let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
.expect("usize overflow when calculating buffer size");
if output_buf.len() < encoded_size {
return Err(EncodeSliceError::OutputSliceTooSmall);
}
let b64_output = &mut output_buf[0..encoded_size];
encode_with_padding(input_bytes, b64_output, engine, encoded_size);
Ok(encoded_size)
}
inner(self, input.as_ref(), output_buf)
}
/// Decode the input into a new `Vec`.
///
/// # Example
///
/// ```rust
/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
///
/// let bytes = general_purpose::STANDARD
/// .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
/// println!("{:?}", bytes);
///
/// // custom engine setup
/// let bytes_url = engine::GeneralPurpose::new(
/// &alphabet::URL_SAFE,
/// general_purpose::NO_PAD)
/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
/// println!("{:?}", bytes_url);
/// ```
#[cfg(any(feature = "alloc", test))]
#[inline]
fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
where
E: Engine + ?Sized,
{
let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
let mut buffer = vec![0; estimate.decoded_len_estimate()];
let bytes_written = engine
.internal_decode(input_bytes, &mut buffer, estimate)
.map_err(|e| match e {
DecodeSliceError::DecodeError(e) => e,
DecodeSliceError::OutputSliceTooSmall => {
unreachable!("Vec is sized conservatively")
}
})?
.decoded_len;
buffer.truncate(bytes_written);
Ok(buffer)
}
inner(self, input.as_ref())
}
/// Decode the `input` into the supplied `buffer`.
///
/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
/// Returns a `Result` containing an empty tuple, aka `()`.
///
/// # Example
///
/// ```rust
/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
/// const CUSTOM_ENGINE: engine::GeneralPurpose =
/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
///
/// fn main() {
/// use base64::Engine;
/// let mut buffer = Vec::<u8>::new();
/// // with the default engine
/// general_purpose::STANDARD
/// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
/// println!("{:?}", buffer);
///
/// buffer.clear();
///
/// // with a custom engine
/// CUSTOM_ENGINE.decode_vec(
/// "aGVsbG8gaW50ZXJuZXR-Cg==",
/// &mut buffer,
/// ).unwrap();
/// println!("{:?}", buffer);
/// }
/// ```
#[cfg(any(feature = "alloc", test))]
#[inline]
fn decode_vec<T: AsRef<[u8]>>(
&self,
input: T,
buffer: &mut Vec<u8>,
) -> Result<(), DecodeError> {
fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
where
E: Engine + ?Sized,
{
let starting_output_len = buffer.len();
let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
let total_len_estimate = estimate
.decoded_len_estimate()
.checked_add(starting_output_len)
.expect("Overflow when calculating output buffer length");
buffer.resize(total_len_estimate, 0);
let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
let bytes_written = engine
.internal_decode(input_bytes, buffer_slice, estimate)
.map_err(|e| match e {
DecodeSliceError::DecodeError(e) => e,
DecodeSliceError::OutputSliceTooSmall => {
unreachable!("Vec is sized conservatively")
}
})?
.decoded_len;
buffer.truncate(starting_output_len + bytes_written);
Ok(())
}
inner(self, input.as_ref(), buffer)
}
/// Decode the input into the provided output slice.
///
/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
/// the estimated decoded length.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
///
/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
/// if the output buffer is too small.
#[inline]
fn decode_slice<T: AsRef<[u8]>>(
&self,
input: T,
output: &mut [u8],
) -> Result<usize, DecodeSliceError> {
fn inner<E>(
engine: &E,
input_bytes: &[u8],
output: &mut [u8],
) -> Result<usize, DecodeSliceError>
where
E: Engine + ?Sized,
{
engine
.internal_decode(
input_bytes,
output,
engine.internal_decoded_len_estimate(input_bytes.len()),
)
.map(|dm| dm.decoded_len)
}
inner(self, input.as_ref(), output)
}
/// Decode the input into the provided output slice.
///
/// Returns the number of bytes written to the slice.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
///
/// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
/// buffer is too small.
///
/// # Panics
///
/// Panics if the provided output buffer is too small for the decoded data.
#[inline]
fn decode_slice_unchecked<T: AsRef<[u8]>>(
&self,
input: T,
output: &mut [u8],
) -> Result<usize, DecodeError> {
fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
where
E: Engine + ?Sized,
{
engine
.internal_decode(
input_bytes,
output,
engine.internal_decoded_len_estimate(input_bytes.len()),
)
.map(|dm| dm.decoded_len)
.map_err(|e| match e {
DecodeSliceError::DecodeError(e) => e,
DecodeSliceError::OutputSliceTooSmall => {
panic!("Output slice is too small")
}
})
}
inner(self, input.as_ref(), output)
}
}
/// The minimal level of configuration that engines must support.
pub trait Config {
/// Returns `true` if padding should be added after the encoded output.
///
/// Padding is added outside the engine's encode() since the engine may be used
/// to encode only a chunk of the overall output, so it can't always know when
/// the output is "done" and would therefore need padding (if configured).
// It could be provided as a separate parameter when encoding, but that feels like
// leaking an implementation detail to the user, and it's hopefully more convenient
// to have to only pass one thing (the engine) to any part of the API.
fn encode_padding(&self) -> bool;
}
/// The decode estimate used by an engine implementation. Users do not need to interact with this;
/// it is only for engine implementors.
///
/// Implementors may store relevant data here when constructing this to avoid having to calculate
/// them again during actual decoding.
pub trait DecodeEstimate {
/// Returns a conservative (err on the side of too big) estimate of the decoded length to use
/// for pre-allocating buffers, etc.
///
/// The estimate must be no larger than the next largest complete triple of decoded bytes.
/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
fn decoded_len_estimate(&self) -> usize;
}
/// Controls how pad bytes are handled when decoding.
///
/// Each [Engine] must support at least the behavior indicated by
/// [DecodePaddingMode::RequireCanonical], and may support other modes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DecodePaddingMode {
/// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
Indifferent,
/// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
RequireCanonical,
/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
RequireNone,
}
/// Metadata about the result of a decode operation
#[derive(PartialEq, Eq, Debug)]
pub struct DecodeMetadata {
/// Number of decoded bytes output
pub(crate) decoded_len: usize,
/// Offset of the first padding byte in the input, if any
pub(crate) padding_offset: Option<usize>,
}
impl DecodeMetadata {
pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
Self {
decoded_len: decoded_bytes,
padding_offset: padding_index,
}
}
}

195
vendor/base64/src/engine/naive.rs vendored Normal file
View file

@ -0,0 +1,195 @@
use crate::{
alphabet::Alphabet,
engine::{
general_purpose::{self, decode_table, encode_table},
Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
},
DecodeError, DecodeSliceError,
};
use std::ops::{BitAnd, BitOr, Shl, Shr};
/// Comparatively simple implementation that can be used as something to compare against in tests
pub struct Naive {
encode_table: [u8; 64],
decode_table: [u8; 256],
config: NaiveConfig,
}
impl Naive {
const ENCODE_INPUT_CHUNK_SIZE: usize = 3;
const DECODE_INPUT_CHUNK_SIZE: usize = 4;
pub const fn new(alphabet: &Alphabet, config: NaiveConfig) -> Self {
Self {
encode_table: encode_table(alphabet),
decode_table: decode_table(alphabet),
config,
}
}
fn decode_byte_into_u32(&self, offset: usize, byte: u8) -> Result<u32, DecodeError> {
let decoded = self.decode_table[byte as usize];
if decoded == general_purpose::INVALID_VALUE {
return Err(DecodeError::InvalidByte(offset, byte));
}
Ok(decoded as u32)
}
}
impl Engine for Naive {
type Config = NaiveConfig;
type DecodeEstimate = NaiveEstimate;
fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
// complete chunks first
const LOW_SIX_BITS: u32 = 0x3F;
let rem = input.len() % Self::ENCODE_INPUT_CHUNK_SIZE;
// will never underflow
let complete_chunk_len = input.len() - rem;
let mut input_index = 0_usize;
let mut output_index = 0_usize;
if let Some(last_complete_chunk_index) =
complete_chunk_len.checked_sub(Self::ENCODE_INPUT_CHUNK_SIZE)
{
while input_index <= last_complete_chunk_index {
let chunk = &input[input_index..input_index + Self::ENCODE_INPUT_CHUNK_SIZE];
// populate low 24 bits from 3 bytes
let chunk_int: u32 =
(chunk[0] as u32).shl(16) | (chunk[1] as u32).shl(8) | (chunk[2] as u32);
// encode 4x 6-bit output bytes
output[output_index] = self.encode_table[chunk_int.shr(18) as usize];
output[output_index + 1] =
self.encode_table[chunk_int.shr(12_u8).bitand(LOW_SIX_BITS) as usize];
output[output_index + 2] =
self.encode_table[chunk_int.shr(6_u8).bitand(LOW_SIX_BITS) as usize];
output[output_index + 3] =
self.encode_table[chunk_int.bitand(LOW_SIX_BITS) as usize];
input_index += Self::ENCODE_INPUT_CHUNK_SIZE;
output_index += 4;
}
}
// then leftovers
if rem == 2 {
let chunk = &input[input_index..input_index + 2];
// high six bits of chunk[0]
output[output_index] = self.encode_table[chunk[0].shr(2) as usize];
// bottom 2 bits of [0], high 4 bits of [1]
output[output_index + 1] =
self.encode_table[(chunk[0].shl(4_u8).bitor(chunk[1].shr(4_u8)) as u32)
.bitand(LOW_SIX_BITS) as usize];
// bottom 4 bits of [1], with the 2 bottom bits as zero
output[output_index + 2] =
self.encode_table[(chunk[1].shl(2_u8) as u32).bitand(LOW_SIX_BITS) as usize];
output_index += 3;
} else if rem == 1 {
let byte = input[input_index];
output[output_index] = self.encode_table[byte.shr(2) as usize];
output[output_index + 1] =
self.encode_table[(byte.shl(4_u8) as u32).bitand(LOW_SIX_BITS) as usize];
output_index += 2;
}
output_index
}
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
NaiveEstimate::new(input_len)
}
fn internal_decode(
&self,
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<DecodeMetadata, DecodeSliceError> {
let complete_nonterminal_quads_len = general_purpose::decode::complete_quads_len(
input,
estimate.rem,
output.len(),
&self.decode_table,
)?;
const BOTTOM_BYTE: u32 = 0xFF;
for (chunk_index, chunk) in input[..complete_nonterminal_quads_len]
.chunks_exact(4)
.enumerate()
{
let input_index = chunk_index * 4;
let output_index = chunk_index * 3;
let decoded_int: u32 = self.decode_byte_into_u32(input_index, chunk[0])?.shl(18)
| self
.decode_byte_into_u32(input_index + 1, chunk[1])?
.shl(12)
| self.decode_byte_into_u32(input_index + 2, chunk[2])?.shl(6)
| self.decode_byte_into_u32(input_index + 3, chunk[3])?;
output[output_index] = decoded_int.shr(16_u8).bitand(BOTTOM_BYTE) as u8;
output[output_index + 1] = decoded_int.shr(8_u8).bitand(BOTTOM_BYTE) as u8;
output[output_index + 2] = decoded_int.bitand(BOTTOM_BYTE) as u8;
}
general_purpose::decode_suffix::decode_suffix(
input,
complete_nonterminal_quads_len,
output,
complete_nonterminal_quads_len / 4 * 3,
&self.decode_table,
self.config.decode_allow_trailing_bits,
self.config.decode_padding_mode,
)
}
fn config(&self) -> &Self::Config {
&self.config
}
}
pub struct NaiveEstimate {
/// remainder from dividing input by `Naive::DECODE_CHUNK_SIZE`
rem: usize,
/// Length of input that is in complete `Naive::DECODE_CHUNK_SIZE`-length chunks
complete_chunk_len: usize,
}
impl NaiveEstimate {
fn new(input_len: usize) -> Self {
let rem = input_len % Naive::DECODE_INPUT_CHUNK_SIZE;
let complete_chunk_len = input_len - rem;
Self {
rem,
complete_chunk_len,
}
}
}
impl DecodeEstimate for NaiveEstimate {
fn decoded_len_estimate(&self) -> usize {
((self.complete_chunk_len / 4) + ((self.rem > 0) as usize)) * 3
}
}
#[derive(Clone, Copy, Debug)]
pub struct NaiveConfig {
pub encode_padding: bool,
pub decode_allow_trailing_bits: bool,
pub decode_padding_mode: DecodePaddingMode,
}
impl Config for NaiveConfig {
fn encode_padding(&self) -> bool {
self.encode_padding
}
}

1579
vendor/base64/src/engine/tests.rs vendored Normal file

File diff suppressed because it is too large Load diff

277
vendor/base64/src/lib.rs vendored Normal file
View file

@ -0,0 +1,277 @@
//! Correct, fast, and configurable [base64][] decoding and encoding. Base64
//! transports binary data efficiently in contexts where only plain text is
//! allowed.
//!
//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64
//!
//! # Usage
//!
//! Use an [`Engine`] to decode or encode base64, configured with the base64
//! alphabet and padding behavior best suited to your application.
//!
//! ## Engine setup
//!
//! There is more than one way to encode a stream of bytes as “base64”.
//! Different applications use different encoding
//! [alphabets][alphabet::Alphabet] and
//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig].
//!
//! ### Encoding alphabet
//!
//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and
//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ
//! in their choice of their final 2.
//!
//! Most applications use the [standard][alphabet::STANDARD] alphabet specified
//! in [RFC 4648][rfc-alphabet]. If thats all you need, you can get started
//! quickly by using the pre-configured
//! [`STANDARD`][engine::general_purpose::STANDARD] engine, which is also available
//! in the [`prelude`] module as shown here, if you prefer a minimal `use`
//! footprint.
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::prelude::*;
//!
//! # fn main() -> Result<(), base64::DecodeError> {
//! assert_eq!(BASE64_STANDARD.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
//! assert_eq!(BASE64_STANDARD.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA=");
//! # Ok(())
//! # }
//! ```
//!
//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
//!
//! Other common alphabets are available in the [`alphabet`] module.
//!
//! #### URL-safe alphabet
//!
//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens,
//! which cannot be safely used in URLs without encoding them as `%2B` and
//! `%2F`.
//!
//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE],
//! which uses `-` and `_` instead. To use that alternative alphabet, use the
//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine. This example doesn't
//! use [`prelude`] to show what a more explicit `use` would look like.
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::{engine::general_purpose::URL_SAFE, Engine as _};
//!
//! # fn main() -> Result<(), base64::DecodeError> {
//! assert_eq!(URL_SAFE.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
//! assert_eq!(URL_SAFE.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA=");
//! # Ok(())
//! # }
//! ```
//!
//! ### Padding characters
//!
//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary
//! data, and every 3 bytes of input binary data will encode to 4 base64
//! characters (8 bits × 3 = 6 bits × 4 = 24 bits).
//!
//! When the input is not an even multiple of 3 bytes in length, [canonical][]
//! base64 encoders insert padding characters at the end, so that the output
//! length is always a multiple of 4:
//!
//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::{engine::general_purpose::STANDARD, Engine as _};
//!
//! assert_eq!(STANDARD.encode(b""), "");
//! assert_eq!(STANDARD.encode(b"f"), "Zg==");
//! assert_eq!(STANDARD.encode(b"fo"), "Zm8=");
//! assert_eq!(STANDARD.encode(b"foo"), "Zm9v");
//! ```
//!
//! Canonical encoding ensures that base64 encodings will be exactly the same,
//! byte-for-byte, regardless of input length. But the `=` padding characters
//! arent necessary for decoding, and they may be omitted by using a
//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration:
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
//!
//! assert_eq!(STANDARD_NO_PAD.encode(b""), "");
//! assert_eq!(STANDARD_NO_PAD.encode(b"f"), "Zg");
//! assert_eq!(STANDARD_NO_PAD.encode(b"fo"), "Zm8");
//! assert_eq!(STANDARD_NO_PAD.encode(b"foo"), "Zm9v");
//! ```
//!
//! The pre-configured `NO_PAD` engines will reject inputs containing padding
//! `=` characters. To encode without padding and still accept padding while
//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with
//! that [padding mode][engine::DecodePaddingMode].
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding));
//! ```
//!
//! ### Further customization
//!
//! Decoding and encoding behavior can be customized by creating an
//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and
//! [padding configuration][engine::GeneralPurposeConfig]:
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::{engine, alphabet, Engine as _};
//!
//! // bizarro-world base64: +/ as the first symbols instead of the last
//! let alphabet =
//! alphabet::Alphabet::new("+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
//! .unwrap();
//!
//! // a very weird config that encodes with padding but requires no padding when decoding...?
//! let crazy_config = engine::GeneralPurposeConfig::new()
//! .with_decode_allow_trailing_bits(true)
//! .with_encode_padding(true)
//! .with_decode_padding_mode(engine::DecodePaddingMode::RequireNone);
//!
//! let crazy_engine = engine::GeneralPurpose::new(&alphabet, crazy_config);
//!
//! let encoded = crazy_engine.encode(b"abc 123");
//!
//! ```
//!
//! ## Memory allocation
//!
//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods
//! allocate memory for their results `decode` returns a `Vec<u8>` and
//! `encode` returns a `String`. To instead decode or encode into a buffer that
//! you allocated, use one of the alternative methods:
//!
//! #### Decoding
//!
//! | Method | Output | Allocates memory |
//! | -------------------------- | ----------------------------- | ----------------------------- |
//! | [`Engine::decode`] | returns a new `Vec<u8>` | always |
//! | [`Engine::decode_vec`] | appends to provided `Vec<u8>` | if `Vec` lacks capacity |
//! | [`Engine::decode_slice`] | writes to provided `&[u8]` | never
//!
//! #### Encoding
//!
//! | Method | Output | Allocates memory |
//! | -------------------------- | ---------------------------- | ------------------------------ |
//! | [`Engine::encode`] | returns a new `String` | always |
//! | [`Engine::encode_string`] | appends to provided `String` | if `String` lacks capacity |
//! | [`Engine::encode_slice`] | writes to provided `&[u8]` | never |
//!
//! ## Input and output
//!
//! The `base64` crate can [decode][Engine::decode()] and
//! [encode][Engine::encode()] values in memory, or
//! [`DecoderReader`][read::DecoderReader] and
//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and
//! encoding for any [readable][std::io::Read] or [writable][std::io::Write]
//! byte stream.
//!
//! #### Decoding
//!
#![cfg_attr(feature = "std", doc = "```")]
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! # use std::io;
//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut input = io::stdin();
//! let mut decoder = DecoderReader::new(&mut input, &STANDARD);
//! io::copy(&mut decoder, &mut io::stdout())?;
//! # Ok(())
//! # }
//! ```
//!
//! #### Encoding
//!
#![cfg_attr(feature = "std", doc = "```")]
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! # use std::io;
//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut output = io::stdout();
//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD);
//! io::copy(&mut io::stdin(), &mut encoder)?;
//! # Ok(())
//! # }
//! ```
//!
//! #### Display
//!
//! If you only need a base64 representation for implementing the
//! [`Display`][std::fmt::Display] trait, use
//! [`Base64Display`][display::Base64Display]:
//!
//! ```
//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
//!
//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD);
//! assert_eq!("base64: AAECAw==", format!("base64: {}", value));
//! ```
//!
//! # Panics
//!
//! If length calculations result in overflowing `usize`, a panic will result.
#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))]
#![deny(
missing_docs,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
unused_results,
variant_size_differences
)]
#![forbid(unsafe_code)]
// Allow globally until https://github.com/rust-lang/rust-clippy/issues/8768 is resolved.
// The desired state is to allow it only for the rstest_reuse import.
#![allow(clippy::single_component_path_imports)]
#![cfg_attr(not(any(feature = "std", test)), no_std)]
#[cfg(any(feature = "alloc", test))]
extern crate alloc;
// has to be included at top level because of the way rstest_reuse defines its macros
#[cfg(test)]
use rstest_reuse;
mod chunked_encoder;
pub mod display;
#[cfg(any(feature = "std", test))]
pub mod read;
#[cfg(any(feature = "std", test))]
pub mod write;
pub mod engine;
pub use engine::Engine;
pub mod alphabet;
mod encode;
#[allow(deprecated)]
#[cfg(any(feature = "alloc", test))]
pub use crate::encode::{encode, encode_engine, encode_engine_string};
#[allow(deprecated)]
pub use crate::encode::{encode_engine_slice, encoded_len, EncodeSliceError};
mod decode;
#[allow(deprecated)]
#[cfg(any(feature = "alloc", test))]
pub use crate::decode::{decode, decode_engine, decode_engine_vec};
#[allow(deprecated)]
pub use crate::decode::{decode_engine_slice, decoded_len_estimate, DecodeError, DecodeSliceError};
pub mod prelude;
#[cfg(test)]
mod tests;
const PAD_BYTE: u8 = b'=';

20
vendor/base64/src/prelude.rs vendored Normal file
View file

@ -0,0 +1,20 @@
//! Preconfigured engines for common use cases.
//!
//! These are re-exports of `const` engines in [crate::engine::general_purpose], renamed with a `BASE64_`
//! prefix for those who prefer to `use` the entire path to a name.
//!
//! # Examples
//!
#![cfg_attr(feature = "alloc", doc = "```")]
#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::prelude::{Engine as _, BASE64_STANDARD_NO_PAD};
//!
//! assert_eq!("c29tZSBieXRlcw", &BASE64_STANDARD_NO_PAD.encode(b"some bytes"));
//! ```
pub use crate::engine::Engine;
pub use crate::engine::general_purpose::STANDARD as BASE64_STANDARD;
pub use crate::engine::general_purpose::STANDARD_NO_PAD as BASE64_STANDARD_NO_PAD;
pub use crate::engine::general_purpose::URL_SAFE as BASE64_URL_SAFE;
pub use crate::engine::general_purpose::URL_SAFE_NO_PAD as BASE64_URL_SAFE_NO_PAD;

335
vendor/base64/src/read/decoder.rs vendored Normal file
View file

@ -0,0 +1,335 @@
use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE};
use std::{cmp, fmt, io};
// This should be large, but it has to fit on the stack.
pub(crate) const BUF_SIZE: usize = 1024;
// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
const BASE64_CHUNK_SIZE: usize = 4;
const DECODED_CHUNK_SIZE: usize = 3;
/// A `Read` implementation that decodes base64 data read from an underlying reader.
///
/// # Examples
///
/// ```
/// use std::io::Read;
/// use std::io::Cursor;
/// use base64::engine::general_purpose;
///
/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
/// let mut decoder = base64::read::DecoderReader::new(
/// &mut wrapped_reader,
/// &general_purpose::STANDARD);
///
/// // handle errors as you normally would
/// let mut result = Vec::new();
/// decoder.read_to_end(&mut result).unwrap();
///
/// assert_eq!(b"asdf", &result[..]);
///
/// ```
pub struct DecoderReader<'e, E: Engine, R: io::Read> {
engine: &'e E,
/// Where b64 data is read from
inner: R,
/// Holds b64 data read from the delegate reader.
b64_buffer: [u8; BUF_SIZE],
/// The start of the pending buffered data in `b64_buffer`.
b64_offset: usize,
/// The amount of buffered b64 data after `b64_offset` in `b64_len`.
b64_len: usize,
/// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
/// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
/// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to
/// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
/// into here, which seems like a lot of complexity for 1 extra byte of storage.
decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE],
/// Index of start of decoded data in `decoded_chunk_buffer`
decoded_offset: usize,
/// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer`
decoded_len: usize,
/// Input length consumed so far.
/// Used to provide accurate offsets in errors
input_consumed_len: usize,
/// offset of previously seen padding, if any
padding_offset: Option<usize>,
}
// exclude b64_buffer as it's uselessly large
impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("DecoderReader")
.field("b64_offset", &self.b64_offset)
.field("b64_len", &self.b64_len)
.field("decoded_chunk_buffer", &self.decoded_chunk_buffer)
.field("decoded_offset", &self.decoded_offset)
.field("decoded_len", &self.decoded_len)
.field("input_consumed_len", &self.input_consumed_len)
.field("padding_offset", &self.padding_offset)
.finish()
}
}
impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
/// Create a new decoder that will read from the provided reader `r`.
pub fn new(reader: R, engine: &'e E) -> Self {
DecoderReader {
engine,
inner: reader,
b64_buffer: [0; BUF_SIZE],
b64_offset: 0,
b64_len: 0,
decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE],
decoded_offset: 0,
decoded_len: 0,
input_consumed_len: 0,
padding_offset: None,
}
}
/// Write as much as possible of the decoded buffer into the target buffer.
/// Must only be called when there is something to write and space to write into.
/// Returns a Result with the number of (decoded) bytes copied.
fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
debug_assert!(self.decoded_len > 0);
debug_assert!(!buf.is_empty());
let copy_len = cmp::min(self.decoded_len, buf.len());
debug_assert!(copy_len > 0);
debug_assert!(copy_len <= self.decoded_len);
buf[..copy_len].copy_from_slice(
&self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len],
);
self.decoded_offset += copy_len;
self.decoded_len -= copy_len;
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
Ok(copy_len)
}
/// Read into the remaining space in the buffer after the current contents.
/// Must only be called when there is space to read into in the buffer.
/// Returns the number of bytes read.
fn read_from_delegate(&mut self) -> io::Result<usize> {
debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
let read = self
.inner
.read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
self.b64_len += read;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
Ok(read)
}
/// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
/// caller's responsibility to choose the number of b64 bytes to decode correctly.
///
/// Returns a Result with the number of decoded bytes written to `buf`.
///
/// # Panics
///
/// panics if `buf` is too small
fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
debug_assert!(self.b64_len >= b64_len_to_decode);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(!buf.is_empty());
let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
let decode_metadata = self
.engine
.internal_decode(
b64_to_decode,
buf,
self.engine.internal_decoded_len_estimate(b64_len_to_decode),
)
.map_err(|dse| match dse {
DecodeSliceError::DecodeError(de) => {
match de {
DecodeError::InvalidByte(offset, byte) => {
match (byte, self.padding_offset) {
// if there was padding in a previous block of decoding that happened to
// be correct, and we now find more padding that happens to be incorrect,
// to be consistent with non-reader decodes, record the error at the first
// padding
(PAD_BYTE, Some(first_pad_offset)) => {
DecodeError::InvalidByte(first_pad_offset, PAD_BYTE)
}
_ => {
DecodeError::InvalidByte(self.input_consumed_len + offset, byte)
}
}
}
DecodeError::InvalidLength(len) => {
DecodeError::InvalidLength(self.input_consumed_len + len)
}
DecodeError::InvalidLastSymbol(offset, byte) => {
DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte)
}
DecodeError::InvalidPadding => DecodeError::InvalidPadding,
}
}
DecodeSliceError::OutputSliceTooSmall => {
unreachable!("buf is sized correctly in calling code")
}
})
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
if let Some(offset) = self.padding_offset {
// we've already seen padding
if decode_metadata.decoded_len > 0 {
// we read more after already finding padding; report error at first padding byte
return Err(io::Error::new(
io::ErrorKind::InvalidData,
DecodeError::InvalidByte(offset, PAD_BYTE),
));
}
}
self.padding_offset = self.padding_offset.or(decode_metadata
.padding_offset
.map(|offset| self.input_consumed_len + offset));
self.input_consumed_len += b64_len_to_decode;
self.b64_offset += b64_len_to_decode;
self.b64_len -= b64_len_to_decode;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
Ok(decode_metadata.decoded_len)
}
/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
/// input from.
///
/// Because `DecoderReader` performs internal buffering, the state of the inner reader is
/// unspecified. This function is mainly provided because the inner reader type may provide
/// additional functionality beyond the `Read` implementation which may still be useful.
pub fn into_inner(self) -> R {
self.inner
}
}
impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
/// Decode input from the wrapped reader.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
/// written in `buf`.
///
/// Where possible, this function buffers base64 to minimize the number of read() calls to the
/// delegate reader.
///
/// # Errors
///
/// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
/// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if buf.is_empty() {
return Ok(0);
}
// offset == BUF_SIZE when we copied it all last time
debug_assert!(self.b64_offset <= BUF_SIZE);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(if self.b64_offset == BUF_SIZE {
self.b64_len == 0
} else {
self.b64_len <= BUF_SIZE
});
debug_assert!(if self.decoded_len == 0 {
// can be = when we were able to copy the complete chunk
self.decoded_offset <= DECODED_CHUNK_SIZE
} else {
self.decoded_offset < DECODED_CHUNK_SIZE
});
// We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
// byte into the provided buf, so the effective length should only be 3 momentarily between
// when we decode and when we copy into the target buffer.
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
if self.decoded_len > 0 {
// we have a few leftover decoded bytes; flush that rather than pull in more b64
self.flush_decoded_buf(buf)
} else {
let mut at_eof = false;
while self.b64_len < BASE64_CHUNK_SIZE {
// Copy any bytes we have to the start of the buffer.
self.b64_buffer
.copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
self.b64_offset = 0;
// then fill in more data
let read = self.read_from_delegate()?;
if read == 0 {
// we never read into an empty buf, so 0 => we've hit EOF
at_eof = true;
break;
}
}
if self.b64_len == 0 {
debug_assert!(at_eof);
// we must be at EOF, and we have no data left to decode
return Ok(0);
};
debug_assert!(if at_eof {
// if we are at eof, we may not have a complete chunk
self.b64_len > 0
} else {
// otherwise, we must have at least one chunk
self.b64_len >= BASE64_CHUNK_SIZE
});
debug_assert_eq!(0, self.decoded_len);
if buf.len() < DECODED_CHUNK_SIZE {
// caller requested an annoyingly short read
// have to write to a tmp buf first to avoid double mutable borrow
let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
// if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
// to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
// tokens, not 1, since 1 token can't decode to 1 byte).
let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
self.decoded_offset = 0;
self.decoded_len = decoded;
// can be less than 3 on last block due to padding
debug_assert!(decoded <= 3);
self.flush_decoded_buf(buf)
} else {
let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
.checked_mul(BASE64_CHUNK_SIZE)
.expect("too many chunks");
debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
let b64_bytes_available_to_decode = if at_eof {
self.b64_len
} else {
// only use complete chunks
self.b64_len - self.b64_len % 4
};
let actual_decode_len = cmp::min(
b64_bytes_that_can_decode_into_buf,
b64_bytes_available_to_decode,
);
self.decode_to_buf(actual_decode_len, buf)
}
}
}
}

487
vendor/base64/src/read/decoder_tests.rs vendored Normal file
View file

@ -0,0 +1,487 @@
use std::{
cmp,
io::{self, Read as _},
iter,
};
use rand::{Rng as _, RngCore as _};
use super::decoder::{DecoderReader, BUF_SIZE};
use crate::{
alphabet,
engine::{general_purpose::STANDARD, Engine, GeneralPurpose},
tests::{random_alphabet, random_config, random_engine},
DecodeError, PAD_BYTE,
};
#[test]
fn simple() {
let tests: &[(&[u8], &[u8])] = &[
(&b"0"[..], &b"MA=="[..]),
(b"01", b"MDE="),
(b"012", b"MDEy"),
(b"0123", b"MDEyMw=="),
(b"01234", b"MDEyMzQ="),
(b"012345", b"MDEyMzQ1"),
(b"0123456", b"MDEyMzQ1Ng=="),
(b"01234567", b"MDEyMzQ1Njc="),
(b"012345678", b"MDEyMzQ1Njc4"),
(b"0123456789", b"MDEyMzQ1Njc4OQ=="),
][..];
for (text_expected, base64data) in tests.iter() {
// Read n bytes at a time.
for n in 1..base64data.len() + 1 {
let mut wrapped_reader = io::Cursor::new(base64data);
let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD);
// handle errors as you normally would
let mut text_got = Vec::new();
let mut buffer = vec![0u8; n];
while let Ok(read) = decoder.read(&mut buffer[..]) {
if read == 0 {
break;
}
text_got.extend_from_slice(&buffer[..read]);
}
assert_eq!(
text_got,
*text_expected,
"\nGot: {}\nExpected: {}",
String::from_utf8_lossy(&text_got[..]),
String::from_utf8_lossy(text_expected)
);
}
}
}
// Make sure we error out on trailing junk.
#[test]
fn trailing_junk() {
let tests: &[&[u8]] = &[&b"MDEyMzQ1Njc4*!@#$%^&"[..], b"MDEyMzQ1Njc4OQ== "][..];
for base64data in tests.iter() {
// Read n bytes at a time.
for n in 1..base64data.len() + 1 {
let mut wrapped_reader = io::Cursor::new(base64data);
let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD);
// handle errors as you normally would
let mut buffer = vec![0u8; n];
let mut saw_error = false;
loop {
match decoder.read(&mut buffer[..]) {
Err(_) => {
saw_error = true;
break;
}
Ok(0) => break,
Ok(_len) => (),
}
}
assert!(saw_error);
}
}
}
#[test]
fn handles_short_read_from_delegate() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
bytes.truncate(size);
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
let engine = random_engine(&mut rng);
engine.encode_string(&bytes[..], &mut b64);
let mut wrapped_reader = io::Cursor::new(b64.as_bytes());
let mut short_reader = RandomShortRead {
delegate: &mut wrapped_reader,
rng: &mut rng,
};
let mut decoder = DecoderReader::new(&mut short_reader, &engine);
let decoded_len = decoder.read_to_end(&mut decoded).unwrap();
assert_eq!(size, decoded_len);
assert_eq!(&bytes[..], &decoded[..]);
}
}
#[test]
fn read_in_short_increments() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
// leave room to play around with larger buffers
decoded.extend(iter::repeat(0).take(size * 3));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let engine = random_engine(&mut rng);
engine.encode_string(&bytes[..], &mut b64);
let mut wrapped_reader = io::Cursor::new(&b64[..]);
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut decoder);
}
}
#[test]
fn read_in_short_increments_with_short_delegate_reads() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
decoded.clear();
let size = rng.gen_range(0..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
// leave room to play around with larger buffers
decoded.extend(iter::repeat(0).take(size * 3));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let engine = random_engine(&mut rng);
engine.encode_string(&bytes[..], &mut b64);
let mut base_reader = io::Cursor::new(&b64[..]);
let mut decoder = DecoderReader::new(&mut base_reader, &engine);
let mut short_reader = RandomShortRead {
delegate: &mut decoder,
rng: &mut rand::thread_rng(),
};
consume_with_short_reads_and_validate(
&mut rng,
&bytes[..],
&mut decoded,
&mut short_reader,
);
}
}
#[test]
fn reports_invalid_last_symbol_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut b64_bytes = Vec::new();
let mut decoded = Vec::new();
let mut bulk_decoded = Vec::new();
for _ in 0..1_000 {
bytes.clear();
b64.clear();
b64_bytes.clear();
let size = rng.gen_range(1..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
decoded.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..]);
assert_eq!(size, bytes.len());
let config = random_config(&mut rng);
let alphabet = random_alphabet(&mut rng);
// changing padding will cause invalid padding errors when we twiddle the last byte
let engine = GeneralPurpose::new(alphabet, config.with_encode_padding(false));
engine.encode_string(&bytes[..], &mut b64);
b64_bytes.extend(b64.bytes());
assert_eq!(b64_bytes.len(), b64.len());
// change the last character to every possible symbol. Should behave the same as bulk
// decoding whether invalid or valid.
for &s1 in alphabet.symbols.iter() {
decoded.clear();
bulk_decoded.clear();
// replace the last
*b64_bytes.last_mut().unwrap() = s1;
let bulk_res = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded);
let mut wrapped_reader = io::Cursor::new(&b64_bytes[..]);
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
let stream_res = decoder.read_to_end(&mut decoded).map(|_| ()).map_err(|e| {
e.into_inner()
.and_then(|e| e.downcast::<DecodeError>().ok())
});
assert_eq!(bulk_res.map_err(|e| Some(Box::new(e))), stream_res);
}
}
}
#[test]
fn reports_invalid_byte_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut stream_decoded = Vec::new();
let mut bulk_decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
stream_decoded.clear();
bulk_decoded.clear();
let size = rng.gen_range(1..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
let engine = GeneralPurpose::new(&alphabet::STANDARD, random_config(&mut rng));
engine.encode_string(&bytes[..], &mut b64);
// replace one byte, somewhere, with '*', which is invalid
let bad_byte_pos = rng.gen_range(0..b64.len());
let mut b64_bytes = b64.bytes().collect::<Vec<u8>>();
b64_bytes[bad_byte_pos] = b'*';
let mut wrapped_reader = io::Cursor::new(b64_bytes.clone());
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
let read_decode_err = decoder
.read_to_end(&mut stream_decoded)
.map_err(|e| {
let kind = e.kind();
let inner = e
.into_inner()
.and_then(|e| e.downcast::<DecodeError>().ok());
inner.map(|i| (*i, kind))
})
.err()
.and_then(|o| o);
let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded).err();
// it's tricky to predict where the invalid data's offset will be since if it's in the last
// chunk it will be reported at the first padding location because it's treated as invalid
// padding. So, we just check that it's the same as it is for decoding all at once.
assert_eq!(
bulk_decode_err.map(|e| (e, io::ErrorKind::InvalidData)),
read_decode_err
);
}
}
#[test]
fn internal_padding_error_with_short_read_concatenated_texts_invalid_byte_error() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut reader_decoded = Vec::new();
let mut bulk_decoded = Vec::new();
// encodes with padding, requires that padding be present so we don't get InvalidPadding
// just because padding is there at all
let engine = STANDARD;
for _ in 0..10_000 {
bytes.clear();
b64.clear();
reader_decoded.clear();
bulk_decoded.clear();
// at least 2 bytes so there can be a split point between bytes
let size = rng.gen_range(2..(10 * BUF_SIZE));
bytes.resize(size, 0);
rng.fill_bytes(&mut bytes[..size]);
// Concatenate two valid b64s, yielding padding in the middle.
// This avoids scenarios that are challenging to assert on, like random padding location
// that might be InvalidLastSymbol when decoded at certain buffer sizes but InvalidByte
// when done all at once.
let split = loop {
// find a split point that will produce padding on the first part
let s = rng.gen_range(1..size);
if s % 3 != 0 {
// short enough to need padding
break s;
};
};
engine.encode_string(&bytes[..split], &mut b64);
assert!(b64.contains('='), "split: {}, b64: {}", split, b64);
let bad_byte_pos = b64.find('=').unwrap();
engine.encode_string(&bytes[split..], &mut b64);
let b64_bytes = b64.as_bytes();
// short read to make it plausible for padding to happen on a read boundary
let read_len = rng.gen_range(1..10);
let mut wrapped_reader = ShortRead {
max_read_len: read_len,
delegate: io::Cursor::new(&b64_bytes),
};
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
let read_decode_err = decoder
.read_to_end(&mut reader_decoded)
.map_err(|e| {
*e.into_inner()
.and_then(|e| e.downcast::<DecodeError>().ok())
.unwrap()
})
.unwrap_err();
let bulk_decode_err = engine.decode_vec(b64_bytes, &mut bulk_decoded).unwrap_err();
assert_eq!(
bulk_decode_err,
read_decode_err,
"read len: {}, bad byte pos: {}, b64: {}",
read_len,
bad_byte_pos,
std::str::from_utf8(b64_bytes).unwrap()
);
assert_eq!(
DecodeError::InvalidByte(
split / 3 * 4
+ match split % 3 {
1 => 2,
2 => 3,
_ => unreachable!(),
},
PAD_BYTE
),
read_decode_err
);
}
}
#[test]
fn internal_padding_anywhere_error() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
let mut reader_decoded = Vec::new();
// encodes with padding, requires that padding be present so we don't get InvalidPadding
// just because padding is there at all
let engine = STANDARD;
for _ in 0..10_000 {
bytes.clear();
b64.clear();
reader_decoded.clear();
bytes.resize(10 * BUF_SIZE, 0);
rng.fill_bytes(&mut bytes[..]);
// Just shove a padding byte in there somewhere.
// The specific error to expect is challenging to predict precisely because it
// will vary based on the position of the padding in the quad and the read buffer
// length, but SOMETHING should go wrong.
engine.encode_string(&bytes[..], &mut b64);
let mut b64_bytes = b64.as_bytes().to_vec();
// put padding somewhere other than the last quad
b64_bytes[rng.gen_range(0..bytes.len() - 4)] = PAD_BYTE;
// short read to make it plausible for padding to happen on a read boundary
let read_len = rng.gen_range(1..10);
let mut wrapped_reader = ShortRead {
max_read_len: read_len,
delegate: io::Cursor::new(&b64_bytes),
};
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
let result = decoder.read_to_end(&mut reader_decoded);
assert!(result.is_err());
}
}
fn consume_with_short_reads_and_validate<R: io::Read>(
rng: &mut rand::rngs::ThreadRng,
expected_bytes: &[u8],
decoded: &mut [u8],
short_reader: &mut R,
) {
let mut total_read = 0_usize;
loop {
assert!(
total_read <= expected_bytes.len(),
"tr {} size {}",
total_read,
expected_bytes.len()
);
if total_read == expected_bytes.len() {
assert_eq!(expected_bytes, &decoded[..total_read]);
// should be done
assert_eq!(0, short_reader.read(&mut *decoded).unwrap());
// didn't write anything
assert_eq!(expected_bytes, &decoded[..total_read]);
break;
}
let decode_len = rng.gen_range(1..cmp::max(2, expected_bytes.len() * 2));
let read = short_reader
.read(&mut decoded[total_read..total_read + decode_len])
.unwrap();
total_read += read;
}
}
/// Limits how many bytes a reader will provide in each read call.
/// Useful for shaking out code that may work fine only with typical input sources that always fill
/// the buffer.
struct RandomShortRead<'a, 'b, R: io::Read, N: rand::Rng> {
delegate: &'b mut R,
rng: &'a mut N,
}
impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R, N> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, io::Error> {
// avoid 0 since it means EOF for non-empty buffers
let effective_len = cmp::min(self.rng.gen_range(1..20), buf.len());
self.delegate.read(&mut buf[..effective_len])
}
}
struct ShortRead<R: io::Read> {
delegate: R,
max_read_len: usize,
}
impl<R: io::Read> io::Read for ShortRead<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let len = self.max_read_len.max(buf.len());
self.delegate.read(&mut buf[..len])
}
}

6
vendor/base64/src/read/mod.rs vendored Normal file
View file

@ -0,0 +1,6 @@
//! Implementations of `io::Read` to transparently decode base64.
mod decoder;
pub use self::decoder::DecoderReader;
#[cfg(test)]
mod decoder_tests;

117
vendor/base64/src/tests.rs vendored Normal file
View file

@ -0,0 +1,117 @@
use std::str;
use rand::{
distributions,
distributions::{Distribution as _, Uniform},
seq::SliceRandom,
Rng, SeedableRng,
};
use crate::{
alphabet,
encode::encoded_len,
engine::{
general_purpose::{GeneralPurpose, GeneralPurposeConfig},
Config, DecodePaddingMode, Engine,
},
};
#[test]
fn roundtrip_random_config_short() {
// exercise the slower encode/decode routines that operate on shorter buffers more vigorously
roundtrip_random_config(Uniform::new(0, 50), 10_000);
}
#[test]
fn roundtrip_random_config_long() {
roundtrip_random_config(Uniform::new(0, 1000), 10_000);
}
pub fn assert_encode_sanity(encoded: &str, padded: bool, input_len: usize) {
let input_rem = input_len % 3;
let expected_padding_len = if input_rem > 0 {
if padded {
3 - input_rem
} else {
0
}
} else {
0
};
let expected_encoded_len = encoded_len(input_len, padded).unwrap();
assert_eq!(expected_encoded_len, encoded.len());
let padding_len = encoded.chars().filter(|&c| c == '=').count();
assert_eq!(expected_padding_len, padding_len);
let _ = str::from_utf8(encoded.as_bytes()).expect("Base64 should be valid utf8");
}
fn roundtrip_random_config(input_len_range: Uniform<usize>, iterations: u32) {
let mut input_buf: Vec<u8> = Vec::new();
let mut encoded_buf = String::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
for _ in 0..iterations {
input_buf.clear();
encoded_buf.clear();
let input_len = input_len_range.sample(&mut rng);
let engine = random_engine(&mut rng);
for _ in 0..input_len {
input_buf.push(rng.gen());
}
engine.encode_string(&input_buf, &mut encoded_buf);
assert_encode_sanity(&encoded_buf, engine.config().encode_padding(), input_len);
assert_eq!(input_buf, engine.decode(&encoded_buf).unwrap());
}
}
pub fn random_config<R: Rng>(rng: &mut R) -> GeneralPurposeConfig {
let mode = rng.gen();
GeneralPurposeConfig::new()
.with_encode_padding(match mode {
DecodePaddingMode::Indifferent => rng.gen(),
DecodePaddingMode::RequireCanonical => true,
DecodePaddingMode::RequireNone => false,
})
.with_decode_padding_mode(mode)
.with_decode_allow_trailing_bits(rng.gen())
}
impl distributions::Distribution<DecodePaddingMode> for distributions::Standard {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> DecodePaddingMode {
match rng.gen_range(0..=2) {
0 => DecodePaddingMode::Indifferent,
1 => DecodePaddingMode::RequireCanonical,
_ => DecodePaddingMode::RequireNone,
}
}
}
pub fn random_alphabet<R: Rng>(rng: &mut R) -> &'static alphabet::Alphabet {
ALPHABETS.choose(rng).unwrap()
}
pub fn random_engine<R: Rng>(rng: &mut R) -> GeneralPurpose {
let alphabet = random_alphabet(rng);
let config = random_config(rng);
GeneralPurpose::new(alphabet, config)
}
const ALPHABETS: &[alphabet::Alphabet] = &[
alphabet::URL_SAFE,
alphabet::STANDARD,
alphabet::CRYPT,
alphabet::BCRYPT,
alphabet::IMAP_MUTF7,
alphabet::BIN_HEX,
];

407
vendor/base64/src/write/encoder.rs vendored Normal file
View file

@ -0,0 +1,407 @@
use crate::engine::Engine;
use std::{
cmp, fmt, io,
io::{ErrorKind, Result},
};
pub(crate) const BUF_SIZE: usize = 1024;
/// The most bytes whose encoding will fit in `BUF_SIZE`
const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
///
/// Because base64 has special handling for the end of the input data (padding, etc), there's a
/// `finish()` method on this type that encodes any leftover input bytes and adds padding if
/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
/// any error that occurs when invoking the underlying writer will be suppressed. If you want to
/// handle such errors, call `finish()` yourself.
///
/// # Examples
///
/// ```
/// use std::io::Write;
/// use base64::engine::general_purpose;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
/// returned as per the contract of `Write`.
///
/// # Performance
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
///
/// # Limitations
///
/// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their
/// implications for a buffering implementation, these methods may not behave as expected. In
/// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`.
/// See the documentation of the `Write` trait implementation for further details.
pub struct EncoderWriter<'e, E: Engine, W: io::Write> {
engine: &'e E,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
/// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
extra_input_occupied_len: usize,
/// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
/// did not write last time.
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}
impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
self.extra_input,
self.extra_input_occupied_len,
&self.output[0..5],
self.output_occupied_len
)
}
}
impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> {
/// Create a new encoder that will write to the provided delegate writer.
pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> {
EncoderWriter {
engine,
delegate: Some(delegate),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
panicked: false,
}
}
/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called");
};
self.write_final_leftovers()?;
let writer = self.delegate.take().expect("Writer must be present");
Ok(writer)
}
/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}
self.write_all_encoded_output()?;
if self.extra_input_occupied_len > 0 {
let encoded_len = self
.engine
.encode_slice(
&self.extra_input[..self.extra_input_occupied_len],
&mut self.output[..],
)
.expect("buffer is large enough");
self.output_occupied_len = encoded_len;
self.write_all_encoded_output()?;
// write succeeded, do not write the encoding of extra again if finish() is retried
self.extra_input_occupied_len = 0;
}
Ok(())
}
/// Write as much of the encoded output to the delegate writer as it will accept, and store the
/// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
///
/// # Errors
///
/// Errors from the delegate writer are returned. In the case of an error,
/// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;
res.map(|consumed| {
debug_assert!(consumed <= current_output_len);
if consumed < current_output_len {
self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
// If we're blocking on I/O, the minor inefficiency of copying bytes to the
// start of the buffer is the least of our concerns...
// TODO Rotate moves more than we need to; copy_within now stable.
self.output.rotate_left(consumed);
} else {
self.output_occupied_len = 0;
}
})
}
/// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
///
/// This is basically write_all for the remaining buffered data but without the undesirable
/// abort-on-`Ok(0)` behavior.
///
/// # Errors
///
/// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
/// `Interrupted`, in which case the error is ignored and writes will continue.
fn write_all_encoded_output(&mut self) -> Result<()> {
while self.output_occupied_len > 0 {
let remaining_len = self.output_occupied_len;
match self.write_to_delegate(remaining_len) {
// try again on interrupts ala write_all
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
// other errors return
Err(e) => return Err(e),
// success no-ops because remaining length is already updated
Ok(_) => {}
};
}
debug_assert_eq!(0, self.output_occupied_len);
Ok(())
}
/// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output
/// to.
///
/// Normally this method should not be needed, since `finish()` returns the inner writer if
/// it completes successfully. That will also ensure all data has been flushed, which the
/// `into_inner()` function does *not* do.
///
/// Calling this method after `finish()` has completed successfully will panic, since the
/// writer has already been returned.
///
/// This method may be useful if the writer implements additional APIs beyond the `Write`
/// trait. Note that the inner writer might be in an error state or have an incomplete
/// base64 string written to it.
pub fn into_inner(mut self) -> W {
self.delegate
.take()
.expect("Encoder has already had finish() called")
}
}
impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
/// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
/// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
/// <https://github.com/rust-lang/rust/issues/56889> for more on that.
///
/// If the previous call to `write` provided more (encoded) data than the delegate writer could
/// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
/// data is present, subsequent calls to `write` will try to write the remaining buffered data
/// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
/// an error.
///
/// # Errors
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}
if input.is_empty() {
return Ok(0);
}
// The contract of `Write::write` places some constraints on this implementation:
// - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
// iterate over the input and encode multiple chunks.
// - Errors mean that "no bytes were written to this writer", so we need to reset the
// internal state to what it was before the error occurred
// before reading any input, write any leftover encoded output from last time
if self.output_occupied_len > 0 {
let current_len = self.output_occupied_len;
return self
.write_to_delegate(current_len)
// did not read any input
.map(|_| 0);
}
debug_assert_eq!(0, self.output_occupied_len);
// how many bytes, if any, were read into `extra` to create a triple to encode
let mut extra_input_read_len = 0;
let mut input = input;
let orig_extra_len = self.extra_input_occupied_len;
let mut encoded_size = 0;
// always a multiple of MIN_ENCODE_CHUNK_SIZE
let mut max_input_len = MAX_INPUT_LEN;
// process leftover un-encoded input from last write
if self.extra_input_occupied_len > 0 {
debug_assert!(self.extra_input_occupied_len < 3);
if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
// Fill up `extra`, encode that into `output`, and consume as much of the rest of
// `input` as possible.
// We could write just the encoding of `extra` by itself but then we'd have to
// return after writing only 4 bytes, which is inefficient if the underlying writer
// would make a syscall.
extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
debug_assert!(extra_input_read_len > 0);
// overwrite only bytes that weren't already used. If we need to rollback extra_len
// (when the subsequent write errors), the old leading bytes will still be there.
self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
.copy_from_slice(&input[0..extra_input_read_len]);
let len = self.engine.internal_encode(
&self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
&mut self.output[..],
);
debug_assert_eq!(4, len);
input = &input[extra_input_read_len..];
// consider extra to be used up, since we encoded it
self.extra_input_occupied_len = 0;
// don't clobber where we just encoded to
encoded_size = 4;
// and don't read more than can be encoded
max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
// fall through to normal encoding
} else {
// `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
// 1 byte in each.
debug_assert_eq!(1, input.len());
debug_assert_eq!(1, self.extra_input_occupied_len);
self.extra_input[self.extra_input_occupied_len] = input[0];
self.extra_input_occupied_len += 1;
return Ok(1);
};
} else if input.len() < MIN_ENCODE_CHUNK_SIZE {
// `extra` is empty, and `input` fits inside it
self.extra_input[0..input.len()].copy_from_slice(input);
self.extra_input_occupied_len = input.len();
return Ok(input.len());
};
// either 0 or 1 complete chunks encoded from extra
debug_assert!(encoded_size == 0 || encoded_size == 4);
debug_assert!(
// didn't encode extra input
MAX_INPUT_LEN == max_input_len
// encoded one triple
|| MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
);
// encode complete triples only
let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
encoded_size += self.engine.internal_encode(
&input[..(input_chunks_to_encode_len)],
&mut self.output[encoded_size..],
);
// not updating `self.output_occupied_len` here because if the below write fails, it should
// "never take place" -- the buffer contents we encoded are ignored and perhaps retried
// later, if the consumer chooses.
self.write_to_delegate(encoded_size)
// no matter whether we wrote the full encoded buffer or not, we consumed the same
// input
.map(|_| extra_input_read_len + input_chunks_to_encode_len)
.map_err(|e| {
// in case we filled and encoded `extra`, reset extra_len
self.extra_input_occupied_len = orig_extra_len;
e
})
}
/// Because this is usually treated as OK to call multiple times, it will *not* flush any
/// incomplete chunks of input or write padding.
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}
impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.write_final_leftovers();
}
}
}

View file

@ -0,0 +1,207 @@
use super::encoder::EncoderWriter;
use crate::engine::Engine;
use std::io;
/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
/// resulting base64 utf8 `&str` in a [StrConsumer] implementation (typically `String`), which is
/// then exposed via `into_inner()`.
///
/// # Examples
///
/// Buffer base64 in a new String:
///
/// ```
/// use std::io::Write;
/// use base64::engine::general_purpose;
///
/// let mut enc = base64::write::EncoderStringWriter::new(&general_purpose::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // get the resulting String
/// let b64_string = enc.into_inner();
///
/// assert_eq!("YXNkZg==", &b64_string);
/// ```
///
/// Or, append to an existing `String`, which implements `StrConsumer`:
///
/// ```
/// use std::io::Write;
/// use base64::engine::general_purpose;
///
/// let mut buf = String::from("base64: ");
///
/// let mut enc = base64::write::EncoderStringWriter::from_consumer(
/// &mut buf,
/// &general_purpose::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // release the &mut reference on buf
/// let _ = enc.into_inner();
///
/// assert_eq!("base64: YXNkZg==", &buf);
/// ```
///
/// # Performance
///
/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain
/// bytes to a `io::Write`.
pub struct EncoderStringWriter<'e, E: Engine, S: StrConsumer> {
encoder: EncoderWriter<'e, E, Utf8SingleCodeUnitWriter<S>>,
}
impl<'e, E: Engine, S: StrConsumer> EncoderStringWriter<'e, E, S> {
/// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
pub fn from_consumer(str_consumer: S, engine: &'e E) -> Self {
EncoderStringWriter {
encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, engine),
}
}
/// Encode all remaining buffered data, including any trailing incomplete input triples and
/// associated padding.
///
/// Returns the base64-encoded form of the accumulated written data.
pub fn into_inner(mut self) -> S {
self.encoder
.finish()
.expect("Writing to a consumer should never fail")
.str_consumer
}
}
impl<'e, E: Engine> EncoderStringWriter<'e, E, String> {
/// Create a EncoderStringWriter that will encode into a new `String` with the provided config.
pub fn new(engine: &'e E) -> Self {
EncoderStringWriter::from_consumer(String::new(), engine)
}
}
impl<'e, E: Engine, S: StrConsumer> io::Write for EncoderStringWriter<'e, E, S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.encoder.flush()
}
}
/// An abstraction around consuming `str`s produced by base64 encoding.
pub trait StrConsumer {
/// Consume the base64 encoded data in `buf`
fn consume(&mut self, buf: &str);
}
/// As for io::Write, `StrConsumer` is implemented automatically for `&mut S`.
impl<S: StrConsumer + ?Sized> StrConsumer for &mut S {
fn consume(&mut self, buf: &str) {
(**self).consume(buf);
}
}
/// Pushes the str onto the end of the String
impl StrConsumer for String {
fn consume(&mut self, buf: &str) {
self.push_str(buf);
}
}
/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units.
///
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
struct Utf8SingleCodeUnitWriter<S: StrConsumer> {
str_consumer: S,
}
impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
// Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer
// length
let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8");
self.str_consumer.consume(s);
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
// no op
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::{
engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter,
};
use rand::Rng;
use std::cmp;
use std::io::Write;
#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut normal_encoded = String::new();
let size = 5_000;
for i in 0..size {
orig_data.clear();
normal_encoded.clear();
orig_data.resize(size, 0);
rng.fill(&mut orig_data[..]);
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
let mut stream_encoder = EncoderStringWriter::new(&engine);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();
let stream_encoded = stream_encoder.into_inner();
assert_eq!(normal_encoded, stream_encoded);
}
}
#[test]
fn incremental_writes() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut normal_encoded = String::new();
let size = 5_000;
for _ in 0..size {
orig_data.clear();
normal_encoded.clear();
orig_data.resize(size, 0);
rng.fill(&mut orig_data[..]);
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
let mut stream_encoder = EncoderStringWriter::new(&engine);
// write small nibbles of data
let mut offset = 0;
while offset < size {
let nibble_size = cmp::min(rng.gen_range(0..=64), size - offset);
let len = stream_encoder
.write(&orig_data[offset..offset + nibble_size])
.unwrap();
offset += len;
}
let stream_encoded = stream_encoder.into_inner();
assert_eq!(normal_encoded, stream_encoded);
}
}
}

554
vendor/base64/src/write/encoder_tests.rs vendored Normal file
View file

@ -0,0 +1,554 @@
use std::io::{Cursor, Write};
use std::{cmp, io, str};
use rand::Rng;
use crate::{
alphabet::{STANDARD, URL_SAFE},
engine::{
general_purpose::{GeneralPurpose, NO_PAD, PAD},
Engine,
},
tests::random_engine,
};
use super::EncoderWriter;
const URL_SAFE_ENGINE: GeneralPurpose = GeneralPurpose::new(&URL_SAFE, PAD);
const NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&STANDARD, NO_PAD);
#[test]
fn encode_three_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
let sz = enc.write(b"abc").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes());
}
#[test]
fn encode_nine_bytes_two_writes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
let sz = enc.write(b"abcdef").unwrap();
assert_eq!(sz, 6);
let sz = enc.write(b"ghi").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(
&c.get_ref()[..],
URL_SAFE_ENGINE.encode("abcdefghi").as_bytes()
);
}
#[test]
fn encode_one_then_two_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bc").unwrap();
assert_eq!(sz, 2);
}
assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes());
}
#[test]
fn encode_one_then_five_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bcdef").unwrap();
assert_eq!(sz, 5);
}
assert_eq!(
&c.get_ref()[..],
URL_SAFE_ENGINE.encode("abcdef").as_bytes()
);
}
#[test]
fn encode_1_2_3_bytes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
let sz = enc.write(b"a").unwrap();
assert_eq!(sz, 1);
let sz = enc.write(b"bc").unwrap();
assert_eq!(sz, 2);
let sz = enc.write(b"def").unwrap();
assert_eq!(sz, 3);
}
assert_eq!(
&c.get_ref()[..],
URL_SAFE_ENGINE.encode("abcdef").as_bytes()
);
}
#[test]
fn encode_with_padding() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
enc.write_all(b"abcd").unwrap();
enc.flush().unwrap();
}
assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abcd").as_bytes());
}
#[test]
fn encode_with_padding_multiple_writes() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
assert_eq!(2, enc.write(b"bc").unwrap());
assert_eq!(3, enc.write(b"def").unwrap());
assert_eq!(1, enc.write(b"g").unwrap());
enc.flush().unwrap();
}
assert_eq!(
&c.get_ref()[..],
URL_SAFE_ENGINE.encode("abcdefg").as_bytes()
);
}
#[test]
fn finish_writes_extra_byte() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
assert_eq!(6, enc.write(b"abcdef").unwrap());
// will be in extra
assert_eq!(1, enc.write(b"g").unwrap());
// 1 trailing byte = 2 encoded chars
let _ = enc.finish().unwrap();
}
assert_eq!(
&c.get_ref()[..],
URL_SAFE_ENGINE.encode("abcdefg").as_bytes()
);
}
#[test]
fn write_partial_chunk_encodes_partial_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
// nothing encoded yet
assert_eq!(2, enc.write(b"ab").unwrap());
// encoded here
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("ab").as_bytes());
assert_eq!(3, c.get_ref().len());
}
#[test]
fn write_1_chunk_encodes_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(3, enc.write(b"abc").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_1_chunk_and_partial_encodes_only_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
// "d" not consumed since it's not a full chunk
assert_eq!(3, enc.write(b"abcd").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_2_partials_to_exactly_complete_chunk_encodes_complete_chunk() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
assert_eq!(2, enc.write(b"bc").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_encodes_complete_chunk_without_consuming_remaining(
) {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
// doesn't consume "d"
assert_eq!(2, enc.write(b"bcd").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
assert_eq!(4, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_and_another_chunk_encodes_complete_chunks() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
// completes partial chunk, and another chunk
assert_eq!(5, enc.write(b"bcdef").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes());
assert_eq!(8, c.get_ref().len());
}
#[test]
fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_partial_chunk_encodes_only_complete_chunks(
) {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
// completes partial chunk, and another chunk, with one more partial chunk that's not
// consumed
assert_eq!(5, enc.write(b"bcdefe").unwrap());
let _ = enc.finish().unwrap();
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes());
assert_eq!(8, c.get_ref().len());
}
#[test]
fn drop_calls_finish_for_you() {
let mut c = Cursor::new(Vec::new());
{
let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
assert_eq!(1, enc.write(b"a").unwrap());
}
assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("a").as_bytes());
assert_eq!(2, c.get_ref().len());
}
#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
let size = 5_000;
for i in 0..size {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
for _ in 0..size {
orig_data.push(rng.gen());
}
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
{
let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
#[test]
fn encode_random_config_matches_normal_encode_reasonable_input_len() {
// choose up to 2 * buf size, so ~half the time it'll use a full buffer
do_encode_random_config_matches_normal_encode(super::encoder::BUF_SIZE * 2);
}
#[test]
fn encode_random_config_matches_normal_encode_tiny_input_len() {
do_encode_random_config_matches_normal_encode(10);
}
#[test]
fn retrying_writes_that_error_with_interrupted_works() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100..20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
// encode via the stream encoder
{
let mut interrupt_rng = rand::thread_rng();
let mut interrupting_writer = InterruptingWriter {
w: &mut stream_encoded,
rng: &mut interrupt_rng,
fraction: 0.8,
};
let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, &engine);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
// use short inputs since we want to use `extra` a lot as that's what needs rollback
// when errors occur
let input_len: usize = cmp::min(rng.gen_range(0..10), orig_len - bytes_consumed);
retry_interrupted_write_all(
&mut stream_encoder,
&orig_data[bytes_consumed..bytes_consumed + input_len],
)
.unwrap();
bytes_consumed += input_len;
}
loop {
let res = stream_encoder.finish();
match res {
Ok(_) => break,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => panic!("{:?}", e), // bail
},
}
}
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
#[test]
fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_encoded_data() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100..20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
// encode via the stream encoder
{
let mut partial_rng = rand::thread_rng();
let mut partial_writer = PartialInterruptingWriter {
w: &mut stream_encoded,
rng: &mut partial_rng,
full_input_fraction: 0.1,
no_interrupt_fraction: 0.1,
};
let mut stream_encoder = EncoderWriter::new(&mut partial_writer, &engine);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
// use at most medium-length inputs to exercise retry logic more aggressively
let input_len: usize = cmp::min(rng.gen_range(0..100), orig_len - bytes_consumed);
let res =
stream_encoder.write(&orig_data[bytes_consumed..bytes_consumed + input_len]);
// retry on interrupt
match res {
Ok(len) => bytes_consumed += len,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => {
panic!("should not see other errors");
}
},
}
}
let _ = stream_encoder.finish().unwrap();
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
/// Retry writes until all the data is written or an error that isn't Interrupted is returned.
fn retry_interrupted_write_all<W: Write>(w: &mut W, buf: &[u8]) -> io::Result<()> {
let mut bytes_consumed = 0;
while bytes_consumed < buf.len() {
let res = w.write(&buf[bytes_consumed..]);
match res {
Ok(len) => bytes_consumed += len,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
_ => return Err(e),
},
}
}
Ok(())
}
fn do_encode_random_config_matches_normal_encode(max_input_len: usize) {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut stream_encoded = Vec::<u8>::new();
let mut normal_encoded = String::new();
for _ in 0..1_000 {
orig_data.clear();
stream_encoded.clear();
normal_encoded.clear();
let orig_len: usize = rng.gen_range(100..20_000);
for _ in 0..orig_len {
orig_data.push(rng.gen());
}
// encode the normal way
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
// encode via the stream encoder
{
let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine);
let mut bytes_consumed = 0;
while bytes_consumed < orig_len {
let input_len: usize =
cmp::min(rng.gen_range(0..max_input_len), orig_len - bytes_consumed);
// write a little bit of the data
stream_encoder
.write_all(&orig_data[bytes_consumed..bytes_consumed + input_len])
.unwrap();
bytes_consumed += input_len;
}
let _ = stream_encoder.finish().unwrap();
assert_eq!(orig_len, bytes_consumed);
}
assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap());
}
}
/// A `Write` implementation that returns Interrupted some fraction of the time, randomly.
struct InterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> {
w: &'a mut W,
rng: &'a mut R,
/// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `Write` methods will return
/// an `Interrupted` error
fraction: f64,
}
impl<'a, W: Write, R: Rng> Write for InterruptingWriter<'a, W, R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.rng.gen_range(0.0..1.0) <= self.fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
self.w.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
if self.rng.gen_range(0.0..1.0) <= self.fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
self.w.flush()
}
}
/// A `Write` implementation that sometimes will only write part of its input.
struct PartialInterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> {
w: &'a mut W,
rng: &'a mut R,
/// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `write()` will write all its
/// input. Otherwise, it will write a random substring
full_input_fraction: f64,
no_interrupt_fraction: f64,
}
impl<'a, W: Write, R: Rng> Write for PartialInterruptingWriter<'a, W, R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.rng.gen_range(0.0..1.0) > self.no_interrupt_fraction {
return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted"));
}
if self.rng.gen_range(0.0..1.0) <= self.full_input_fraction || buf.is_empty() {
// pass through the buf untouched
self.w.write(buf)
} else {
// only use a prefix of it
self.w
.write(&buf[0..(self.rng.gen_range(0..(buf.len() - 1)))])
}
}
fn flush(&mut self) -> io::Result<()> {
self.w.flush()
}
}

11
vendor/base64/src/write/mod.rs vendored Normal file
View file

@ -0,0 +1,11 @@
//! Implementations of `io::Write` to transparently handle base64.
mod encoder;
mod encoder_string_writer;
pub use self::{
encoder::EncoderWriter,
encoder_string_writer::{EncoderStringWriter, StrConsumer},
};
#[cfg(test)]
mod encoder_tests;