diff --git a/Cargo.lock b/Cargo.lock index 9c73674c84e..e094f1f482c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1362,12 +1362,6 @@ dependencies = [ "maybe-uninit", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.8.0" @@ -1522,17 +1516,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "twox-hash" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04f8ab788026715fa63b31960869617cba39117e520eb415b0139543e325ab59" -dependencies = [ - "cfg-if 0.1.10", - "rand 0.7.3", - "static_assertions", -] - [[package]] name = "typenum" version = "1.13.0" @@ -2301,10 +2284,11 @@ name = "uu_sort" version = "0.0.6" dependencies = [ "clap", + "fnv", "itertools 0.8.2", "rand 0.7.3", + "rayon", "semver", - "twox-hash", "uucore", "uucore_procs", ] diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 7a6f95c416b..814e4bbbaf6 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -15,9 +15,10 @@ edition = "2018" path = "src/sort.rs" [dependencies] +rayon = "1.5" rand = "0.7" clap = "2.33" -twox-hash = "1.6.0" +fnv = "1.0.7" itertools = "0.8.0" semver = "0.9.0" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 6c29ad98d55..36e6ad71ed4 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -7,23 +7,29 @@ // * file that was distributed with this source code. #![allow(dead_code)] +// Although these links don't always seem to describe reality, check out the POSIX and GNU specs: +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html +// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html + // spell-checker:ignore (ToDO) outfile nondictionary #[macro_use] extern crate uucore; use clap::{App, Arg}; +use fnv::FnvHasher; use itertools::Itertools; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; +use rayon::prelude::*; use semver::Version; use std::cmp::Ordering; use std::collections::BinaryHeap; +use std::env; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; use std::mem::replace; use std::path::Path; -use twox_hash::XxHash64; use uucore::fs::is_stdin_interactive; // for Iterator::dedup() static NAME: &str = "sort"; @@ -33,27 +39,37 @@ static VERSION: &str = env!("CARGO_PKG_VERSION"); static OPT_HUMAN_NUMERIC_SORT: &str = "human-numeric-sort"; static OPT_MONTH_SORT: &str = "month-sort"; static OPT_NUMERIC_SORT: &str = "numeric-sort"; +static OPT_GENERAL_NUMERIC_SORT: &str = "general-numeric-sort"; static OPT_VERSION_SORT: &str = "version-sort"; static OPT_DICTIONARY_ORDER: &str = "dictionary-order"; static OPT_MERGE: &str = "merge"; static OPT_CHECK: &str = "check"; +static OPT_CHECK_SILENT: &str = "check-silent"; static OPT_IGNORE_CASE: &str = "ignore-case"; static OPT_IGNORE_BLANKS: &str = "ignore-blanks"; +static OPT_IGNORE_NONPRINTING: &str = "ignore-nonprinting"; static OPT_OUTPUT: &str = "output"; static OPT_REVERSE: &str = "reverse"; static OPT_STABLE: &str = "stable"; static OPT_UNIQUE: &str = "unique"; static OPT_RANDOM: &str = "random-sort"; +static OPT_ZERO_TERMINATED: &str = "zero-terminated"; +static OPT_PARALLEL: &str = "parallel"; +static OPT_FILES0_FROM: &str = "files0-from"; static ARG_FILES: &str = "files"; static DECIMAL_PT: char = '.'; static THOUSANDS_SEP: char = ','; +static NEGATIVE: char = '-'; +static POSITIVE: char = '+'; + #[derive(Eq, Ord, PartialEq, PartialOrd)] enum SortMode { Numeric, HumanNumeric, + GeneralNumeric, Month, Version, Default, @@ -67,10 +83,13 @@ struct Settings { stable: bool, unique: bool, check: bool, + check_silent: bool, random: bool, - compare_fns: Vec Ordering>, + compare_fn: fn(&str, &str) -> Ordering, transform_fns: Vec String>, + threads: String, salt: String, + zero_terminated: bool, } impl Default for Settings { @@ -83,10 +102,13 @@ impl Default for Settings { stable: false, unique: false, check: false, + check_silent: false, random: false, - compare_fns: Vec::new(), + compare_fn: default_compare, transform_fns: Vec::new(), + threads: String::new(), salt: String::new(), + zero_terminated: false, } } } @@ -206,6 +228,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(OPT_NUMERIC_SORT) .help("compare according to string numerical value"), ) + .arg( + Arg::with_name(OPT_GENERAL_NUMERIC_SORT) + .short("g") + .long(OPT_GENERAL_NUMERIC_SORT) + .help("compare according to string general numerical value"), + ) .arg( Arg::with_name(OPT_VERSION_SORT) .short("V") @@ -230,12 +258,24 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(OPT_CHECK) .help("check for sorted input; do not sort"), ) + .arg( + Arg::with_name(OPT_CHECK_SILENT) + .short("C") + .long(OPT_CHECK_SILENT) + .help("exit successfully if the given file is already sorted, and exit with status 1 otherwise. "), + ) .arg( Arg::with_name(OPT_IGNORE_CASE) .short("f") .long(OPT_IGNORE_CASE) .help("fold lower case to upper case characters"), ) + .arg( + Arg::with_name(OPT_IGNORE_NONPRINTING) + .short("-i") + .long(OPT_IGNORE_NONPRINTING) + .help("ignore nonprinting characters"), + ) .arg( Arg::with_name(OPT_IGNORE_BLANKS) .short("b") @@ -274,18 +314,65 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(OPT_UNIQUE) .help("output only the first of an equal run"), ) + .arg( + Arg::with_name(OPT_ZERO_TERMINATED) + .short("z") + .long(OPT_ZERO_TERMINATED) + .help("line delimiter is NUL, not newline"), + ) + .arg( + Arg::with_name(OPT_PARALLEL) + .long(OPT_PARALLEL) + .help("change the number of threads running concurrently to N") + .takes_value(true) + .value_name("NUM_THREADS"), + ) + .arg( + Arg::with_name(OPT_FILES0_FROM) + .long(OPT_FILES0_FROM) + .help("read input from the files specified by NUL-terminated NUL_FILES") + .takes_value(true) + .value_name("NUL_FILES") + .multiple(true), + ) .arg(Arg::with_name(ARG_FILES).multiple(true).takes_value(true)) .get_matches_from(args); - let mut files: Vec = matches - .values_of(ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); + // check whether user specified a zero terminated list of files for input, otherwise read files from args + let mut files: Vec = if matches.is_present(OPT_FILES0_FROM) { + let files0_from: Vec = matches + .values_of(OPT_FILES0_FROM) + .map(|v| v.map(ToString::to_string).collect()) + .unwrap_or_default(); + + let mut files = Vec::new(); + for path in &files0_from { + let (reader, _) = open(path.as_str()).expect("Could not read from file specified."); + let buf_reader = BufReader::new(reader); + for line in buf_reader.split(b'\0') { + if let Ok(n) = line { + files.push( + std::str::from_utf8(&n) + .expect("Could not parse zero terminated string from input.") + .to_string(), + ); + } + } + } + files + } else { + matches + .values_of(ARG_FILES) + .map(|v| v.map(ToString::to_string).collect()) + .unwrap_or_default() + }; settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) { SortMode::HumanNumeric } else if matches.is_present(OPT_MONTH_SORT) { SortMode::Month + } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) { + SortMode::GeneralNumeric } else if matches.is_present(OPT_NUMERIC_SORT) { SortMode::Numeric } else if matches.is_present(OPT_VERSION_SORT) { @@ -294,12 +381,29 @@ pub fn uumain(args: impl uucore::Args) -> i32 { SortMode::Default }; + if matches.is_present(OPT_PARALLEL) { + // "0" is default - threads = num of cores + settings.threads = matches + .value_of(OPT_PARALLEL) + .map(String::from) + .unwrap_or("0".to_string()); + env::set_var("RAYON_NUM_THREADS", &settings.threads); + } + if matches.is_present(OPT_DICTIONARY_ORDER) { settings.transform_fns.push(remove_nondictionary_chars); + } else if matches.is_present(OPT_IGNORE_NONPRINTING) { + settings.transform_fns.push(remove_nonprinting_chars); } + settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.merge = matches.is_present(OPT_MERGE); + settings.check = matches.is_present(OPT_CHECK); + if matches.is_present(OPT_CHECK_SILENT) { + settings.check_silent = matches.is_present(OPT_CHECK_SILENT); + settings.check = true; + }; if matches.is_present(OPT_IGNORE_CASE) { settings.transform_fns.push(|s| s.to_uppercase()); @@ -327,20 +431,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!(1, "sort: extra operand `{}' not allowed with -c", files[1]) } - settings.compare_fns.push(match settings.mode { + settings.compare_fn = match settings.mode { SortMode::Numeric => numeric_compare, + SortMode::GeneralNumeric => general_numeric_compare, SortMode::HumanNumeric => human_numeric_size_compare, SortMode::Month => month_compare, SortMode::Version => version_compare, SortMode::Default => default_compare, - }); - - if !settings.stable { - match settings.mode { - SortMode::Default => {} - _ => settings.compare_fns.push(default_compare), - } - } + }; exec(files, &mut settings) } @@ -359,67 +457,79 @@ fn exec(files: Vec, settings: &mut Settings) -> i32 { if settings.merge { file_merger.push_file(buf_reader.lines()); - } else if settings.check { - return exec_check_file(buf_reader.lines(), &settings); + } else if settings.zero_terminated { + for line in buf_reader.split(b'\0') { + if let Ok(n) = line { + lines.push( + std::str::from_utf8(&n) + .expect("Could not parse string from zero terminated input.") + .to_string(), + ); + } + } } else { for line in buf_reader.lines() { if let Ok(n) = line { lines.push(n); - } else { - break; } } } } - sort_by(&mut lines, &settings); + if settings.check { + return exec_check_file(lines, &settings); + } else { + sort_by(&mut lines, &settings); + } if settings.merge { if settings.unique { - print_sorted(file_merger.dedup(), &settings.outfile) + print_sorted(file_merger.dedup(), &settings) } else { - print_sorted(file_merger, &settings.outfile) + print_sorted(file_merger, &settings) } - } else if settings.unique && settings.mode == SortMode::Numeric { + } else if settings.mode == SortMode::Month && settings.unique { print_sorted( lines .iter() - .dedup_by(|a, b| num_sort_dedup(a) == num_sort_dedup(b)), - &settings.outfile, + .dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)), + &settings, ) } else if settings.unique { - print_sorted(lines.iter().dedup(), &settings.outfile) + print_sorted( + lines + .iter() + .dedup_by(|a, b| get_nums_dedup(a) == get_nums_dedup(b)), + &settings, + ) } else { - print_sorted(lines.iter(), &settings.outfile) + print_sorted(lines.iter(), &settings) } 0 } -fn exec_check_file(lines: Lines>>, settings: &Settings) -> i32 { +fn exec_check_file(unwrapped_lines: Vec, settings: &Settings) -> i32 { // errors yields the line before each disorder, // plus the last line (quirk of .coalesce()) - let unwrapped_lines = lines.filter_map(|maybe_line| { - if let Ok(line) = maybe_line { - Some(line) - } else { - None - } - }); - let mut errors = unwrapped_lines - .enumerate() - .coalesce(|(last_i, last_line), (i, line)| { - if compare_by(&last_line, &line, &settings) == Ordering::Greater { - Err(((last_i, last_line), (i, line))) - } else { - Ok((i, line)) - } - }); + let mut errors = + unwrapped_lines + .iter() + .enumerate() + .coalesce(|(last_i, last_line), (i, line)| { + if compare_by(&last_line, &line, &settings) == Ordering::Greater { + Err(((last_i, last_line), (i, line))) + } else { + Ok((i, line)) + } + }); if let Some((first_error_index, _line)) = errors.next() { // Check for a second "error", as .coalesce() always returns the last // line, no matter what our merging function does. if let Some(_last_line_or_next_error) = errors.next() { - println!("sort: disorder in line {}", first_error_index); + if !settings.check_silent { + println!("sort: disorder in line {}", first_error_index); + }; 1 } else { // first "error" was actually the last line. @@ -431,8 +541,9 @@ fn exec_check_file(lines: Lines>>, settings: &Settings) } } +#[inline(always)] fn transform(line: &str, settings: &Settings) -> String { - let mut transformed = line.to_string(); + let mut transformed = line.to_owned(); for transform_fn in &settings.transform_fns { transformed = transform_fn(&transformed); } @@ -440,8 +551,9 @@ fn transform(line: &str, settings: &Settings) -> String { transformed } +#[inline(always)] fn sort_by(lines: &mut Vec, settings: &Settings) { - lines.sort_by(|a, b| compare_by(a, b, &settings)) + lines.par_sort_by(|a, b| compare_by(a, b, &settings)) } fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { @@ -454,72 +566,198 @@ fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { (a, b) }; - for compare_fn in &settings.compare_fns { - let cmp: Ordering = if settings.random { - random_shuffle(a, b, settings.salt.clone()) + // 1st Compare + let mut cmp: Ordering = if settings.random { + random_shuffle(a, b, settings.salt.clone()) + } else { + (settings.compare_fn)(a, b) + }; + + // Call "last resort compare" on any equal + if cmp == Ordering::Equal { + if settings.random || settings.stable || settings.unique { + cmp = Ordering::Equal } else { - compare_fn(a, b) + cmp = default_compare(a, b) }; - if cmp != Ordering::Equal { - if settings.reverse { - return cmp.reverse(); - } else { - return cmp; - } - } + }; + + if settings.reverse { + return cmp.reverse(); + } else { + return cmp; } - Ordering::Equal } +// Test output against BSDs and GNU with their locale +// env var set to lc_ctype=utf-8 to enjoy the exact same output. +#[inline(always)] fn default_compare(a: &str, b: &str) -> Ordering { a.cmp(b) } -fn get_leading_number(a: &str) -> &str { +// This function does the initial detection of numeric lines. +// Lines starting with a number or positive or negative sign. +// It also strips the string of any thing that could never +// be a number for the purposes of any type of numeric comparison. +#[inline(always)] +fn leading_num_common(a: &str) -> &str { let mut s = ""; - for c in a.chars() { - if !c.is_numeric() && !c.eq(&'-') && !c.eq(&' ') && !c.eq(&'.') && !c.eq(&',') { - s = a.trim().split(c).next().unwrap(); + for (idx, c) in a.char_indices() { + // check whether char is numeric, whitespace or decimal point or thousand seperator + if !c.is_numeric() + && !c.is_whitespace() + && !c.eq(&DECIMAL_PT) + && !c.eq(&THOUSANDS_SEP) + // check for e notation + && !c.eq(&'e') + && !c.eq(&'E') + // check whether first char is + or - + && !a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) + && !a.chars().nth(0).unwrap_or('\0').eq(&NEGATIVE) + { + // Strip string of non-numeric trailing chars + s = &a[..idx]; break; } - s = a.trim(); + // If line is not a number line, return the line as is + s = a; } - return s; + s } -// Matches GNU behavior, see: -// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -// Specifically *not* the same as sort -n | uniq -fn num_sort_dedup(a: &str) -> &str { - // Empty lines are dumped - if a.is_empty() { - return "0"; - // And lines that don't begin numerically are dumped - } else if !a.trim().chars().nth(0).unwrap_or('\0').is_numeric() { - return "0"; +// This function cleans up the initial comparison done by leading_num_common for a numeric compare. +// GNU sort does its numeric comparison through strnumcmp. However, we don't have or +// may not want to use libc. Instead we emulate the GNU sort numeric compare by ignoring +// those leading number lines GNU sort would not recognize. GNU numeric compare would +// not recognize a positive sign or scientific/E notation so we strip those elements here. +fn get_leading_num(a: &str) -> &str { + let mut s = ""; + let b = leading_num_common(a); + + // GNU numeric sort doesn't recognize '+' or 'e' notation so we strip + for (idx, c) in b.char_indices() { + if c.eq(&'e') || c.eq(&'E') || b.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) { + s = &b[..idx]; + break; + } + // If no further processing needed to be done, return the line as-is to be sorted + s = b; + } + + // And empty number or non-number lines are to be treated as ‘0’ but only for numeric sort + // All '0'-ed lines will be sorted later, but only amongst themselves, during the so-called 'last resort comparison.' + if s.is_empty() { + s = "0"; + }; + s +} + +// This function cleans up the initial comparison done by leading_num_common for a general numeric compare. +// In contrast to numeric compare, GNU general numeric/FP sort *should* recognize positive signs and +// scientific notation, so we strip those lines only after the end of the following numeric string. +// For example, 5e10KFD would be 5e10 or 5x10^10 and +10000HFKJFK would become 10000. +fn get_leading_gen(a: &str) -> String { + // Make this iter peekable to see if next char is numeric + let mut p_iter = leading_num_common(a).chars().peekable(); + let mut r = String::new(); + // Cleanup raw stripped strings + for c in p_iter.to_owned() { + let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric(); + // Only general numeric recognizes e notation and, see block below, the '+' sign + if (c.eq(&'e') && !next_char_numeric) + || (c.eq(&'E') && !next_char_numeric) + { + r = a.split(c).next().unwrap_or("").to_owned(); + break; + // If positive sign and next char is not numeric, split at postive sign at keep trailing numbers + // There is a more elegant way to do this in Rust 1.45, std::str::strip_prefix + } else if c.eq(&POSITIVE) && !next_char_numeric { + let mut v: Vec<&str> = a.split(c).collect(); + let x = v.split_off(1); + r = x.join(""); + break; + // If no further processing needed to be done, return the line as-is to be sorted + } else { + r = a.to_owned(); + } + } + r +} + +fn get_months_dedup(a: &str) -> String { + let pattern = if a.trim().len().ge(&3) { + // Split at 3rd char and get first element of tuple ".0" + a.split_at(3).0 } else { - // Prepare lines for comparison of only the numerical leading numbers - return get_leading_number(a); + "" }; + + let month = match pattern.to_uppercase().as_ref() { + "JAN" => Month::January, + "FEB" => Month::February, + "MAR" => Month::March, + "APR" => Month::April, + "MAY" => Month::May, + "JUN" => Month::June, + "JUL" => Month::July, + "AUG" => Month::August, + "SEP" => Month::September, + "OCT" => Month::October, + "NOV" => Month::November, + "DEC" => Month::December, + _ => Month::Unknown, + }; + + if month == Month::Unknown { + "".to_owned() + } else { + pattern.to_uppercase() + } +} + +// *For all dedups/uniques we must compare leading numbers* +// Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq" +// See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html +fn get_nums_dedup(a: &str) -> &str { + // Trim and remove any leading zeros + let s = a.trim().trim_start_matches('0'); + + // Get first char + let c = s.chars().nth(0).unwrap_or('\0'); + + // Empty lines and non-number lines are treated as the same for dedup + if s.is_empty() { + "" + } else if !c.eq(&NEGATIVE) && !c.is_numeric() { + "" + // Prepare lines for comparison of only the numerical leading numbers + } else { + get_leading_num(s) + } } /// Parse the beginning string into an f64, returning -inf instead of NaN on errors. +#[inline(always)] fn permissive_f64_parse(a: &str) -> f64 { + // Remove thousands seperators + let a = a.replace(THOUSANDS_SEP, ""); + // GNU sort treats "NaN" as non-number in numeric, so it needs special care. - match a.parse::() { + // *Keep this trim before parse* despite what POSIX may say about -b and -n + // because GNU and BSD both seem to require it to match their behavior + match a.trim().parse::() { Ok(a) if a.is_nan() => std::f64::NEG_INFINITY, Ok(a) => a, Err(_) => std::f64::NEG_INFINITY, } } -/// Compares two floats, with errors and non-numerics assumed to be -inf. -/// Stops coercing at the first non-numeric char. fn numeric_compare(a: &str, b: &str) -> Ordering { #![allow(clippy::comparison_chain)] - let sa = get_leading_number(a); - let sb = get_leading_number(b); + let sa = get_leading_num(a); + let sb = get_leading_num(b); let fa = permissive_f64_parse(sa); let fb = permissive_f64_parse(sb); @@ -534,19 +772,50 @@ fn numeric_compare(a: &str, b: &str) -> Ordering { } } +/// Compares two floats, with errors and non-numerics assumed to be -inf. +/// Stops coercing at the first non-numeric char. +fn general_numeric_compare(a: &str, b: &str) -> Ordering { + #![allow(clippy::comparison_chain)] + + let sa = get_leading_gen(a); + let sb = get_leading_gen(b); + + let fa = permissive_f64_parse(&sa); + let fb = permissive_f64_parse(&sb); + + // f64::cmp isn't implemented (due to NaN issues); implement directly instead + if fa > fb { + Ordering::Greater + } else if fa < fb { + Ordering::Less + } else { + Ordering::Equal + } +} + +// GNU/BSD does not handle converting numbers to an equal scale +// properly. GNU/BSD simply recognize that there is a human scale and sorts +// those numbers ahead of other number inputs. There are perhaps limits +// to the type of behavior we should emulate, and this might be such a limit. +// Properly handling these units seems like a value add to me. And when sorting +// these types of numbers, we rarely care about pure performance. fn human_numeric_convert(a: &str) -> f64 { - let int_str = get_leading_number(a); - let (_, s) = a.split_at(int_str.len()); - let int_part = permissive_f64_parse(int_str); - let suffix: f64 = match s.parse().unwrap_or('\0') { - 'K' => 1000f64, + let num_str = get_leading_num(a); + let suffix = a.trim_start_matches(num_str); + let num_part = permissive_f64_parse(num_str); + let suffix: f64 = match suffix.parse().unwrap_or('\0') { + // SI Units + 'K' => 1E3, 'M' => 1E6, 'G' => 1E9, 'T' => 1E12, 'P' => 1E15, - _ => 1f64, + 'E' => 1E18, + 'Z' => 1E21, + 'Y' => 1E24, + _ => 1f64, }; - int_part * suffix + num_part * suffix } /// Compare two strings as if they are human readable sizes. @@ -555,6 +824,7 @@ fn human_numeric_size_compare(a: &str, b: &str) -> Ordering { #![allow(clippy::comparison_chain)] let fa = human_numeric_convert(a); let fb = human_numeric_convert(b); + // f64::cmp isn't implemented (due to NaN issues); implement directly instead if fa > fb { Ordering::Greater @@ -565,16 +835,6 @@ fn human_numeric_size_compare(a: &str, b: &str) -> Ordering { } } -fn random_shuffle(a: &str, b: &str, salt: String) -> Ordering { - #![allow(clippy::comparison_chain)] - let salt_slice = salt.as_str(); - - let da = hash(&[a, salt_slice].concat()); - let db = hash(&[b, salt_slice].concat()); - - da.cmp(&db) -} - fn get_rand_string() -> String { thread_rng() .sample_iter(&Alphanumeric) @@ -583,12 +843,22 @@ fn get_rand_string() -> String { .collect::() } -fn hash(t: &T) -> u64 { - let mut s: XxHash64 = Default::default(); +fn get_hash(t: &T) -> u64 { + let mut s: FnvHasher = Default::default(); t.hash(&mut s); s.finish() } +fn random_shuffle(a: &str, b: &str, x: String) -> Ordering { + #![allow(clippy::comparison_chain)] + let salt_slice = x.as_str(); + + let da = get_hash(&[a, salt_slice].concat()); + let db = get_hash(&[b, salt_slice].concat()); + + da.cmp(&db) +} + #[derive(Eq, Ord, PartialEq, PartialOrd)] enum Month { Unknown, @@ -608,13 +878,15 @@ enum Month { /// Parse the beginning string into a Month, returning Month::Unknown on errors. fn month_parse(line: &str) -> Month { - match line - .split_whitespace() - .next() - .unwrap() - .to_uppercase() - .as_ref() - { + // GNU splits at any 3 letter match "JUNNNN" is JUN + let pattern = if line.trim().len().ge(&3) { + // Split a 3 and get first element of tuple ".0" + line.split_at(3).0 + } else { + "" + }; + + match pattern.to_uppercase().as_ref() { "JAN" => Month::January, "FEB" => Month::February, "MAR" => Month::March, @@ -632,7 +904,16 @@ fn month_parse(line: &str) -> Month { } fn month_compare(a: &str, b: &str) -> Ordering { - month_parse(a).cmp(&month_parse(b)) + let ma = month_parse(a); + let mb = month_parse(b); + + if ma > mb { + Ordering::Greater + } else if ma < mb { + Ordering::Less + } else { + Ordering::Equal + } } fn version_compare(a: &str, b: &str) -> Ordering { @@ -650,19 +931,26 @@ fn version_compare(a: &str, b: &str) -> Ordering { } fn remove_nondictionary_chars(s: &str) -> String { - // Using 'is_ascii_whitespace()' instead of 'is_whitespace()', because it - // uses only symbols compatible with UNIX sort (space, tab, newline). - // 'is_whitespace()' uses more symbols as whitespace (e.g. vertical tab). + // According to GNU, dictionary chars are those of ASCII + // and a blank is a space or a tab + s.chars() + .filter(|c| c.is_ascii_alphanumeric() || c.is_ascii_whitespace()) + .collect::() +} + +fn remove_nonprinting_chars(s: &str) -> String { + // However, GNU says nonprinting chars are more permissive. + // All of ASCII except control chars ie, escape, newline s.chars() - .filter(|c| c.is_alphanumeric() || c.is_ascii_whitespace()) + .filter(|c| c.is_ascii() && !c.is_ascii_control()) .collect::() } -fn print_sorted>(iter: T, outfile: &Option) +fn print_sorted>(iter: T, settings: &Settings) where S: std::fmt::Display, { - let mut file: Box = match *outfile { + let mut file: Box = match settings.outfile { Some(ref filename) => match File::create(Path::new(&filename)) { Ok(f) => Box::new(BufWriter::new(f)) as Box, Err(e) => { @@ -673,9 +961,16 @@ where None => Box::new(stdout()) as Box, }; - for line in iter { - let str = format!("{}\n", line); - crash_if_err!(1, file.write_all(str.as_bytes())) + if settings.zero_terminated { + for line in iter { + let str = format!("{}\0", line); + crash_if_err!(1, file.write_all(str.as_bytes())); + } + } else { + for line in iter { + let str = format!("{}\n", line); + crash_if_err!(1, file.write_all(str.as_bytes())); + } } } @@ -700,6 +995,22 @@ mod tests { use super::*; + #[test] + fn test_get_hash() { + let a = "Ted".to_string(); + + assert_eq!(2646829031758483623, get_hash(&a)); + } + + #[test] + fn test_random_shuffle() { + let a = "Ted"; + let b = "Ted"; + let c = get_rand_string(); + + assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); + } + #[test] fn test_default_compare() { let a = "your own"; @@ -746,13 +1057,4 @@ mod tests { assert_eq!(Ordering::Less, version_compare(a, b)); } - - #[test] - fn test_random_compare() { - let a = "9"; - let b = "9"; - let c = get_rand_string(); - - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); - } } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 2bac71def9e..43aaf1da1cf 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1,58 +1,228 @@ use crate::common::util::*; +#[test] +fn test_check_zero_terminated_failure() { + new_ucmd!() + .arg("-z") + .arg("-c") + .arg("zero-terminated.txt") + .fails() + .stdout_is("sort: disorder in line 0\n"); +} + +#[test] +fn test_check_zero_terminated_success() { + new_ucmd!() + .arg("-z") + .arg("-c") + .arg("zero-terminated.expected") + .succeeds(); +} + +#[test] +fn test_random_shuffle_len() { + // check whether output is the same length as the input + const FILE: &'static str = "default_unsorted_ints.expected"; + let (at, _ucmd) = at_and_ucmd!(); + let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; + let expected = at.read(FILE); + + assert_ne!(result, expected); + assert_eq!(result.len(), expected.len()); +} + +#[test] +fn test_random_shuffle_contains_all_lines() { + // check whether lines of input are all in output + const FILE: &'static str = "default_unsorted_ints.expected"; + let (at, _ucmd) = at_and_ucmd!(); + let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; + let expected = at.read(FILE); + let result_sorted = new_ucmd!().pipe_in(result.clone()).run().stdout; + + assert_ne!(result, expected); + assert_eq!(result_sorted, expected); +} + +#[test] +fn test_random_shuffle_contains_two_runs_not_the_same() { + // check to verify that two random shuffles are not equal; this has the + // potential to fail in the unlikely event that random order is the same + // as the starting order, or if both random sorts end up having the same order. + const FILE: &'static str = "default_unsorted_ints.expected"; + let (at, _ucmd) = at_and_ucmd!(); + let result = new_ucmd!().arg("-R").arg(FILE).run().stdout; + let expected = at.read(FILE); + let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout; + + assert_ne!(result, expected); + assert_ne!(result, unexpected); +} + #[test] fn test_numeric_floats_and_ints() { - for numeric_sort_param in vec!["-n", "--numeric-sort"] { - let input = "1.444\n8.013\n1\n-8\n1.04\n-1"; + test_helper("numeric_floats_and_ints", "-n"); +} + +#[test] +fn test_numeric_floats() { + test_helper("numeric_floats", "-n"); +} + +#[test] +fn test_numeric_floats_with_nan() { + test_helper("numeric_floats_with_nan", "-n"); +} + +#[test] +fn test_numeric_unfixed_floats() { + test_helper("numeric_unfixed_floats", "-n"); +} + +#[test] +fn test_numeric_fixed_floats() { + test_helper("numeric_fixed_floats", "-n"); +} + +#[test] +fn test_numeric_unsorted_ints() { + test_helper("numeric_unsorted_ints", "-n"); +} + +#[test] +fn test_human_block_sizes() { + test_helper("human_block_sizes", "-h"); +} + +#[test] +fn test_month_default() { + test_helper("month_default", "-M"); +} + +#[test] +fn test_month_stable() { + test_helper("month_stable", "-Ms"); +} + +#[test] +fn test_default_unsorted_ints() { + test_helper("default_unsorted_ints", ""); +} + +#[test] +fn test_numeric_unique_ints() { + test_helper("numeric_unsorted_ints_unique", "-nu"); +} + +#[test] +fn test_version() { + test_helper("version", "-V"); +} + +#[test] +fn test_ignore_case() { + test_helper("ignore_case", "-f"); +} + +#[test] +fn test_dictionary_order() { + test_helper("dictionary_order", "-d"); +} + +#[test] +fn test_dictionary_order2() { + for non_dictionary_order2_param in vec!["-d"] { new_ucmd!() - .arg(numeric_sort_param) - .pipe_in(input) + .pipe_in("a👦🏻aa b\naaaa b") + .arg(non_dictionary_order2_param) .succeeds() - .stdout_only("-8\n-1\n1\n1.04\n1.444\n8.013\n"); + .stdout_only("a👦🏻aa b\naaaa b\n"); } } #[test] -fn test_numeric_floats() { - for numeric_sort_param in vec!["-n", "--numeric-sort"] { - let input = "1.444\n8.013\n1.58590\n-8.90880\n1.040000000\n-.05"; +fn test_non_printing_chars() { + for non_printing_chars_param in vec!["-i"] { new_ucmd!() - .arg(numeric_sort_param) - .pipe_in(input) + .pipe_in("a👦🏻aa b\naaaa b") + .arg(non_printing_chars_param) .succeeds() - .stdout_only("-8.90880\n-.05\n1.040000000\n1.444\n1.58590\n8.013\n"); + .stdout_only("aaaa b\na👦🏻aa b\n"); } } #[test] -fn test_numeric_floats_with_nan() { - for numeric_sort_param in vec!["-n", "--numeric-sort"] { - let input = "1.444\n1.0/0.0\n1.58590\n-8.90880\n1.040000000\n-.05"; +fn test_exponents_positive_general_fixed() { + for exponents_positive_general_param in vec!["-g"] { new_ucmd!() - .arg(numeric_sort_param) - .pipe_in(input) + .pipe_in("100E6\n\n50e10\n+100000\n\n10000K78\n10E\n\n\n1000EDKLD\n\n\n100E6\n\n50e10\n+100000\n\n") + .arg(exponents_positive_general_param) .succeeds() - .stdout_only("-8.90880\n-.05\n1.0/0.0\n1.040000000\n1.444\n1.58590\n"); + .stdout_only("\n\n\n\n\n\n\n\n10000K78\n1000EDKLD\n10E\n+100000\n+100000\n100E6\n100E6\n50e10\n50e10\n"); } } #[test] -fn test_numeric_unfixed_floats() { - test_helper("numeric_fixed_floats", "-n"); +fn test_exponents_positive_numeric() { + test_helper("exponents-positive-numeric", "-n"); } #[test] -fn test_numeric_fixed_floats() { - test_helper("numeric_fixed_floats", "-n"); +fn test_months_dedup() { + test_helper("months-dedup", "-Mu"); } #[test] -fn test_numeric_unsorted_ints() { - test_helper("numeric_unsorted_ints", "-n"); +fn test_mixed_floats_ints_chars_numeric() { + test_helper("mixed_floats_ints_chars_numeric", "-n"); } #[test] -fn test_human_block_sizes() { +fn test_mixed_floats_ints_chars_numeric_unique() { + test_helper("mixed_floats_ints_chars_numeric_unique", "-nu"); +} + +#[test] +fn test_mixed_floats_ints_chars_numeric_reverse() { + test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur"); +} + +#[test] +fn test_mixed_floats_ints_chars_numeric_stable() { + test_helper("mixed_floats_ints_chars_numeric_stable", "-ns"); +} + +#[test] +fn test_numeric_floats_and_ints2() { + for numeric_sort_param in vec!["-n", "--numeric-sort"] { + let input = "1.444\n8.013\n1\n-8\n1.04\n-1"; + new_ucmd!() + .arg(numeric_sort_param) + .pipe_in(input) + .succeeds() + .stdout_only("-8\n-1\n1\n1.04\n1.444\n8.013\n"); + } +} + +#[test] +fn test_numeric_floats2() { + for numeric_sort_param in vec!["-n", "--numeric-sort"] { + let input = "1.444\n8.013\n1.58590\n-8.90880\n1.040000000\n-.05"; + new_ucmd!() + .arg(numeric_sort_param) + .pipe_in(input) + .succeeds() + .stdout_only("-8.90880\n-.05\n1.040000000\n1.444\n1.58590\n8.013\n"); + } +} + +#[test] +fn test_numeric_floats_with_nan2() { + test_helper("numeric-floats-with-nan2", "-n"); +} + +#[test] +fn test_human_block_sizes2() { for human_numeric_sort_param in vec!["-h", "--human-numeric-sort"] { let input = "8981K\n909991M\n-8T\n21G\n0.8M"; new_ucmd!() @@ -64,7 +234,7 @@ fn test_human_block_sizes() { } #[test] -fn test_month_default() { +fn test_month_default2() { for month_sort_param in vec!["-M", "--month-sort"] { let input = "JAn\nMAY\n000may\nJun\nFeb"; new_ucmd!() @@ -76,12 +246,7 @@ fn test_month_default() { } #[test] -fn test_month_stable() { - test_helper("month_stable", "-Ms"); -} - -#[test] -fn test_default_unsorted_ints() { +fn test_default_unsorted_ints2() { let input = "9\n1909888\n000\n1\n2"; new_ucmd!() .pipe_in(input) @@ -90,7 +255,7 @@ fn test_default_unsorted_ints() { } #[test] -fn test_numeric_unique_ints() { +fn test_numeric_unique_ints2() { for numeric_unique_sort_param in vec!["-nu"] { let input = "9\n9\n8\n1\n"; new_ucmd!() @@ -102,18 +267,8 @@ fn test_numeric_unique_ints() { } #[test] -fn test_version() { - test_helper("version", "-V"); -} - -#[test] -fn test_ignore_case() { - test_helper("ignore_case", "-f"); -} - -#[test] -fn test_dictionary_order() { - test_helper("dictionary_order", "-d"); +fn test_zero_terminated() { + test_helper("zero-terminated", "-z"); } #[test] @@ -192,6 +347,15 @@ fn test_check() { .stdout_is(""); } +#[test] +fn test_check_silent() { + new_ucmd!() + .arg("-C") + .arg("check_fail.txt") + .fails() + .stdout_is(""); +} + fn test_helper(file_name: &str, args: &str) { new_ucmd!() .arg(args) diff --git a/tests/fixtures/sort/exponents-positive-general.expected b/tests/fixtures/sort/exponents-positive-general.expected new file mode 100644 index 00000000000..3dbc92fe5cc --- /dev/null +++ b/tests/fixtures/sort/exponents-positive-general.expected @@ -0,0 +1,12 @@ + + + + + + +10E +1000EDKLD +10000K78 ++100000 +100E6 +50e10 diff --git a/tests/fixtures/sort/exponents-positive-general.txt b/tests/fixtures/sort/exponents-positive-general.txt new file mode 100644 index 00000000000..23ea527718a --- /dev/null +++ b/tests/fixtures/sort/exponents-positive-general.txt @@ -0,0 +1,12 @@ +10000K78 +10E + + +1000EDKLD + + +100E6 + +50e10 ++100000 + diff --git a/tests/fixtures/sort/exponents-positive-numeric.expected b/tests/fixtures/sort/exponents-positive-numeric.expected new file mode 100644 index 00000000000..174088f6358 --- /dev/null +++ b/tests/fixtures/sort/exponents-positive-numeric.expected @@ -0,0 +1,12 @@ + + + + + + ++100000 +10E +50e10 +100E6 +1000EDKLD +10000K78 diff --git a/tests/fixtures/sort/exponents-positive-numeric.txt b/tests/fixtures/sort/exponents-positive-numeric.txt new file mode 100644 index 00000000000..23ea527718a --- /dev/null +++ b/tests/fixtures/sort/exponents-positive-numeric.txt @@ -0,0 +1,12 @@ +10000K78 +10E + + +1000EDKLD + + +100E6 + +50e10 ++100000 + diff --git a/tests/fixtures/sort/human-mixed-inputs-reverse.expected b/tests/fixtures/sort/human-mixed-inputs-reverse.expected new file mode 100644 index 00000000000..463f44a2aef --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-reverse.expected @@ -0,0 +1,37 @@ +.2T +2G +100M +7800900K +51887300- +1890777 +56908-90078 +6780.0009866 +6780.000986 +789----009999 90-0 90-0 +1 +0001 +apr +MAY +JUNNNN +JAN +AUG +APR +0000000 +00 + + + + + + + + + + + + + + + + +-1.4 diff --git a/tests/fixtures/sort/human-mixed-inputs-reverse.txt b/tests/fixtures/sort/human-mixed-inputs-reverse.txt new file mode 100644 index 00000000000..ebef388b954 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-reverse.txt @@ -0,0 +1,37 @@ +JAN + +0000000 + +00 + +0001 + +1 + +-1.4 + +JUNNNN +AUG + +apr + +APR + + +MAY +1890777 + +56908-90078 + +51887300- + +6780.0009866 + +789----009999 90-0 90-0 + +6780.000986 + +100M +7800900K +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs-stable.expected b/tests/fixtures/sort/human-mixed-inputs-stable.expected new file mode 100644 index 00000000000..e1c85b8ce66 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-stable.expected @@ -0,0 +1,37 @@ +-1.4 +JAN + +0000000 + +00 + + + + +JUNNNN +AUG + +apr + +APR + + +MAY + + + + + + +0001 +1 +789----009999 90-0 90-0 +6780.000986 +6780.0009866 +56908-90078 +1890777 +51887300- +7800900K +100M +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs-stable.txt b/tests/fixtures/sort/human-mixed-inputs-stable.txt new file mode 100644 index 00000000000..ebef388b954 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-stable.txt @@ -0,0 +1,37 @@ +JAN + +0000000 + +00 + +0001 + +1 + +-1.4 + +JUNNNN +AUG + +apr + +APR + + +MAY +1890777 + +56908-90078 + +51887300- + +6780.0009866 + +789----009999 90-0 90-0 + +6780.000986 + +100M +7800900K +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs-unique.expected b/tests/fixtures/sort/human-mixed-inputs-unique.expected new file mode 100644 index 00000000000..50f53b6a05f --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-unique.expected @@ -0,0 +1,13 @@ +-1.4 +JAN +0001 +789----009999 90-0 90-0 +6780.000986 +6780.0009866 +56908-90078 +1890777 +51887300- +7800900K +100M +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs-unique.txt b/tests/fixtures/sort/human-mixed-inputs-unique.txt new file mode 100644 index 00000000000..ebef388b954 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs-unique.txt @@ -0,0 +1,37 @@ +JAN + +0000000 + +00 + +0001 + +1 + +-1.4 + +JUNNNN +AUG + +apr + +APR + + +MAY +1890777 + +56908-90078 + +51887300- + +6780.0009866 + +789----009999 90-0 90-0 + +6780.000986 + +100M +7800900K +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs.expected b/tests/fixtures/sort/human-mixed-inputs.expected new file mode 100644 index 00000000000..3f5692b7bf2 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs.expected @@ -0,0 +1,37 @@ +-1.4 + + + + + + + + + + + + + + + + +00 +0000000 +APR +AUG +JAN +JUNNNN +MAY +apr +0001 +1 +789----009999 90-0 90-0 +6780.000986 +6780.0009866 +56908-90078 +1890777 +51887300- +7800900K +100M +2G +.2T diff --git a/tests/fixtures/sort/human-mixed-inputs.txt b/tests/fixtures/sort/human-mixed-inputs.txt new file mode 100644 index 00000000000..ce5986d6e66 --- /dev/null +++ b/tests/fixtures/sort/human-mixed-inputs.txt @@ -0,0 +1,46 @@ +JAN + +0000000 + +00 + +0001 + +1 + +-1.4 + +JUNNNN +AUG + +apr + +APR + + +MAY +1890777 + +56908-90078 + +51887300- + +6780.0009866 + +789----009999 90-0 90-0 + +6780.000986 + +1M +10M +100M +1000M +10000M + +7800900K +780090K +78009K +7800K +780K +2G +.2T diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected new file mode 100644 index 00000000000..a781a36bba8 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected @@ -0,0 +1,30 @@ +-2028789030 +-896689 +-8.90880 +-1 +-.05 + + + + + + + + +000 +CARAvan +00000001 +1 +1.040000000 +1.444 +1.58590 +8.013 +45 +46.89 + 4567. + 37800 +576,446.88800000 +576,446.890 +4798908.340000000000 +4798908.45 +4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected new file mode 100644 index 00000000000..6b024210bd9 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse.expected @@ -0,0 +1,30 @@ +4798908.8909800 +4798908.45 +4798908.340000000000 +576,446.890 +576,446.88800000 + 37800 + 4567. +46.89 +45 +8.013 +1.58590 +1.444 +1.040000000 +1 +00000001 +CARAvan +000 + + + + + + + + +-.05 +-1 +-8.90880 +-896689 +-2028789030 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected new file mode 100644 index 00000000000..cb1028f0ece --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.expected @@ -0,0 +1,30 @@ +4798908.8909800 +4798908.45 +4798908.340000000000 +576,446.890 +576,446.88800000 + 37800 + 4567. +46.89 +45 +8.013 +1.58590 +1.444 +1.040000000 +1 +00000001 + + + + + +CARAvan + + + +000 +-.05 +-1 +-8.90880 +-896689 +-2028789030 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_reverse_stable.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected new file mode 100644 index 00000000000..63a3e646db1 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected @@ -0,0 +1,30 @@ +-2028789030 +-896689 +-8.90880 +-1 +-.05 + + + + + +CARAvan + + + +000 +1 +00000001 +1.040000000 +1.444 +1.58590 +8.013 +45 +46.89 + 4567. + 37800 +576,446.88800000 +576,446.890 +4798908.340000000000 +4798908.45 +4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected new file mode 100644 index 00000000000..cb27c6664ce --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected @@ -0,0 +1,20 @@ +-2028789030 +-896689 +-8.90880 +-1 +-.05 + +1 +1.040000000 +1.444 +1.58590 +8.013 +45 +46.89 + 4567. + 37800 +576,446.88800000 +576,446.890 +4798908.340000000000 +4798908.45 +4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected new file mode 100644 index 00000000000..bbce169347f --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected @@ -0,0 +1,20 @@ +4798908.8909800 +4798908.45 +4798908.340000000000 +576,446.890 +576,446.88800000 + 37800 + 4567. +46.89 +45 +8.013 +1.58590 +1.444 +1.040000000 +1 + +-.05 +-1 +-8.90880 +-896689 +-2028789030 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected new file mode 100644 index 00000000000..bbce169347f --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.expected @@ -0,0 +1,20 @@ +4798908.8909800 +4798908.45 +4798908.340000000000 +576,446.890 +576,446.88800000 + 37800 + 4567. +46.89 +45 +8.013 +1.58590 +1.444 +1.040000000 +1 + +-.05 +-1 +-8.90880 +-896689 +-2028789030 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt new file mode 100644 index 00000000000..a5813ea3a68 --- /dev/null +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_stable.txt @@ -0,0 +1,30 @@ +576,446.890 +576,446.88800000 + + + 4567. +45 +46.89 +-1 +1 +00000001 +4798908.340000000000 +4798908.45 +4798908.8909800 + + + 37800 + +-2028789030 +-896689 +CARAvan + +-8.90880 +-.05 +1.444 +1.58590 +1.040000000 + +8.013 + +000 diff --git a/tests/fixtures/sort/months-dedup.expected b/tests/fixtures/sort/months-dedup.expected new file mode 100644 index 00000000000..dfb69349276 --- /dev/null +++ b/tests/fixtures/sort/months-dedup.expected @@ -0,0 +1,6 @@ + +JAN +apr +MAY +JUNNNN +AUG diff --git a/tests/fixtures/sort/months-dedup.txt b/tests/fixtures/sort/months-dedup.txt new file mode 100644 index 00000000000..ebef388b954 --- /dev/null +++ b/tests/fixtures/sort/months-dedup.txt @@ -0,0 +1,37 @@ +JAN + +0000000 + +00 + +0001 + +1 + +-1.4 + +JUNNNN +AUG + +apr + +APR + + +MAY +1890777 + +56908-90078 + +51887300- + +6780.0009866 + +789----009999 90-0 90-0 + +6780.000986 + +100M +7800900K +2G +.2T diff --git a/tests/fixtures/sort/numeric-floats-with-nan2.expected b/tests/fixtures/sort/numeric-floats-with-nan2.expected new file mode 100644 index 00000000000..51c9985c3e1 --- /dev/null +++ b/tests/fixtures/sort/numeric-floats-with-nan2.expected @@ -0,0 +1,23 @@ +-8.90880 +-.05 + + + + + + + + + + + + + + +Karma +1 +1.0/0.0 +1.040000000 +1.2 +1.444 +1.58590 diff --git a/tests/fixtures/sort/numeric-floats-with-nan2.txt b/tests/fixtures/sort/numeric-floats-with-nan2.txt new file mode 100644 index 00000000000..9b78741fe6c --- /dev/null +++ b/tests/fixtures/sort/numeric-floats-with-nan2.txt @@ -0,0 +1,23 @@ +Karma + +1.0/0.0 + + +-8.90880 + + +-.05 + + +1.040000000 + +1.444 + + +1.58590 + + +1 + +1.2 + diff --git a/tests/fixtures/sort/zero-terminated.expected b/tests/fixtures/sort/zero-terminated.expected new file mode 100644 index 00000000000..4e53b304bb3 Binary files /dev/null and b/tests/fixtures/sort/zero-terminated.expected differ diff --git a/tests/fixtures/sort/zero-terminated.txt b/tests/fixtures/sort/zero-terminated.txt new file mode 100644 index 00000000000..5c547c851ce Binary files /dev/null and b/tests/fixtures/sort/zero-terminated.txt differ