From 959300043ec22f0243734b8f012da7279e8a88a5 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 22:22:01 +0900 Subject: [PATCH 01/11] feat(sort): add locale-aware numeric sorting support Implement NumericLocaleSettings to handle thousands separators and decimal points based on locale. Update tokenization logic to accommodate blank thousands separators for numeric and human-numeric modes, improving parsing of locale-specific numbers. Also refactor numeric locale detection for safety/readability and clean up related initialization/spell-checker ignore. --- src/uu/sort/src/sort.rs | 173 +++++++++++++++++++++++++++++++++------- 1 file changed, 146 insertions(+), 27 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 01ddc63fbe0..68a2c4d8632 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -7,7 +7,7 @@ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html // https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef GETFD +// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef GETFD localeconv mod buffer_hint; mod check; @@ -284,9 +284,35 @@ pub struct GlobalSettings { buffer_size_is_explicit: bool, compress_prog: Option, merge_batch_size: usize, + numeric_locale: NumericLocaleSettings, precomputed: Precomputed, } +#[derive(Clone, Copy, Debug)] +struct NumericLocaleSettings { + thousands_sep: Option, + decimal_pt: Option, +} + +impl Default for NumericLocaleSettings { + fn default() -> Self { + Self { + thousands_sep: None, + decimal_pt: Some(DECIMAL_PT), + } + } +} + +impl NumericLocaleSettings { + fn num_info_settings(&self, accept_si_units: bool) -> NumInfoParseSettings { + NumInfoParseSettings { + accept_si_units, + thousands_separator: self.thousands_sep, + decimal_pt: self.decimal_pt, + } + } +} + /// Data needed for sorting. Should be computed once before starting to sort /// by calling `GlobalSettings::init_precomputed`. #[derive(Clone, Debug, Default)] @@ -297,6 +323,8 @@ struct Precomputed { selections_per_line: usize, fast_lexicographic: bool, fast_ascii_insensitive: bool, + tokenize_blank_thousands_sep: bool, + tokenize_allow_unit_after_blank: bool, } impl GlobalSettings { @@ -341,6 +369,20 @@ impl GlobalSettings { .filter(|s| matches!(s.settings.mode, SortMode::GeneralNumeric)) .count(); + let uses_numeric = self + .selectors + .iter() + .any(|s| matches!(s.settings.mode, SortMode::Numeric | SortMode::HumanNumeric)); + let uses_human_numeric = self + .selectors + .iter() + .any(|s| matches!(s.settings.mode, SortMode::HumanNumeric)); + self.precomputed.tokenize_blank_thousands_sep = self.separator.is_none() + && uses_numeric + && self.numeric_locale.thousands_sep == Some(b' '); + self.precomputed.tokenize_allow_unit_after_blank = + self.precomputed.tokenize_blank_thousands_sep && uses_human_numeric; + self.precomputed.fast_lexicographic = !disable_fast_lexicographic && self.can_use_fast_lexicographic(); self.precomputed.fast_ascii_insensitive = self.can_use_fast_ascii_insensitive(); @@ -413,6 +455,7 @@ impl Default for GlobalSettings { buffer_size_is_explicit: false, compress_prog: None, merge_batch_size: default_merge_batch_size(), + numeric_locale: NumericLocaleSettings::default(), precomputed: Precomputed::default(), } } @@ -597,7 +640,12 @@ impl<'a> Line<'a> { } token_buffer.clear(); if settings.precomputed.needs_tokens { - tokenize(line, settings.separator, token_buffer); + tokenize( + line, + settings.separator, + token_buffer, + &settings.precomputed, + ); } if settings.mode == SortMode::Numeric { // exclude inf, nan, scientific notation @@ -607,11 +655,12 @@ impl<'a> Line<'a> { .and_then(|s| s.parse::().ok()); line_data.line_num_floats.push(line_num_float); } - for (selector, selection) in settings - .selectors - .iter() - .map(|selector| (selector, selector.get_selection(line, token_buffer))) - { + for (selector, selection) in settings.selectors.iter().map(|selector| { + ( + selector, + selector.get_selection(line, token_buffer, &settings.numeric_locale), + ) + }) { match selection { Selection::AsBigDecimal(parsed_float) => line_data.parsed_floats.push(parsed_float), Selection::WithNumInfo(str, num_info) => { @@ -660,7 +709,12 @@ impl<'a> Line<'a> { writeln!(writer)?; let mut fields = vec![]; - tokenize(self.line, settings.separator, &mut fields); + tokenize( + self.line, + settings.separator, + &mut fields, + &settings.precomputed, + ); for selector in &settings.selectors { let mut selection = selector.get_range(self.line, Some(&fields)); match selector.settings.mode { @@ -668,10 +722,9 @@ impl<'a> Line<'a> { // find out which range is used for numeric comparisons let (_, num_range) = NumInfo::parse( &self.line[selection.clone()], - &NumInfoParseSettings { - accept_si_units: selector.settings.mode == SortMode::HumanNumeric, - ..Default::default() - }, + &settings + .numeric_locale + .num_info_settings(selector.settings.mode == SortMode::HumanNumeric), ); let initial_selection = selection.clone(); @@ -789,24 +842,50 @@ impl<'a> Line<'a> { } /// Tokenize a line into fields. The result is stored into `token_buffer`. -fn tokenize(line: &[u8], separator: Option, token_buffer: &mut Vec) { +fn tokenize( + line: &[u8], + separator: Option, + token_buffer: &mut Vec, + precomputed: &Precomputed, +) { assert!(token_buffer.is_empty()); if let Some(separator) = separator { tokenize_with_separator(line, separator, token_buffer); } else { - tokenize_default(line, token_buffer); + tokenize_default( + line, + token_buffer, + precomputed.tokenize_blank_thousands_sep, + precomputed.tokenize_allow_unit_after_blank, + ); } } /// By default fields are separated by the first whitespace after non-whitespace. /// Whitespace is included in fields at the start. /// The result is stored into `token_buffer`. -fn tokenize_default(line: &[u8], token_buffer: &mut Vec) { +fn tokenize_default( + line: &[u8], + token_buffer: &mut Vec, + blank_thousands_sep: bool, + allow_unit_after_blank: bool, +) { token_buffer.push(0..0); // pretend that there was whitespace in front of the line let mut previous_was_whitespace = true; for (idx, char) in line.iter().enumerate() { - if char.is_ascii_whitespace() { + let is_whitespace = char.is_ascii_whitespace(); + let treat_as_separator = if is_whitespace { + if blank_thousands_sep && *char == b' ' { + !is_blank_thousands_sep(line, idx, allow_unit_after_blank) + } else { + true + } + } else { + false + }; + + if treat_as_separator { if !previous_was_whitespace { token_buffer.last_mut().unwrap().end = idx; token_buffer.push(idx..0); @@ -819,6 +898,31 @@ fn tokenize_default(line: &[u8], token_buffer: &mut Vec) { token_buffer.last_mut().unwrap().end = line.len(); } +fn is_blank_thousands_sep(line: &[u8], idx: usize, allow_unit_after_blank: bool) -> bool { + if line.get(idx) != Some(&b' ') { + return false; + } + + let prev_is_digit = idx + .checked_sub(1) + .and_then(|prev_idx| line.get(prev_idx)) + .is_some_and(u8::is_ascii_digit); + if !prev_is_digit { + return false; + } + + let next = line.get(idx + 1).copied(); + match next { + Some(c) if c.is_ascii_digit() => true, + Some(b'K' | b'k' | b'M' | b'G' | b'T' | b'P' | b'E' | b'Z' | b'Y' | b'R' | b'Q') + if allow_unit_after_blank => + { + true + } + _ => false, + } +} + /// Split between separators. These separators are not included in fields. /// The result is stored into `token_buffer`. fn tokenize_with_separator(line: &[u8], separator: u8, token_buffer: &mut Vec) { @@ -1077,7 +1181,12 @@ impl FieldSelector { /// Get the selection that corresponds to this selector for the line. /// If `needs_fields` returned false, tokens may be empty. - fn get_selection<'a>(&self, line: &'a [u8], tokens: &[Field]) -> Selection<'a> { + fn get_selection<'a>( + &self, + line: &'a [u8], + tokens: &[Field], + numeric_locale: &NumericLocaleSettings, + ) -> Selection<'a> { // `get_range` expects `None` when we don't need tokens and would get confused by an empty vector. let tokens = if self.needs_tokens { Some(tokens) @@ -1097,14 +1206,10 @@ impl FieldSelector { }; // Parse NumInfo for this number. - let (info, num_range) = NumInfo::parse( - range_str, - &NumInfoParseSettings { - accept_si_units: self.settings.mode == SortMode::HumanNumeric, - thousands_separator, - ..Default::default() - }, - ); + let mut parse_settings = + numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric); + parse_settings.thousands_separator = thousands_separator; + let (info, num_range) = NumInfo::parse(range_str, &parse_settings); // Shorten the range to what we need to pass to numeric_str_cmp later. range_str = &range_str[num_range]; Selection::WithNumInfo(range_str, info) @@ -1216,6 +1321,16 @@ impl FieldSelector { } } +fn detect_numeric_locale() -> NumericLocaleSettings { + let mut settings = NumericLocaleSettings::default(); + settings.decimal_pt = Some(locale_decimal_pt()); + settings.thousands_sep = match i18n::decimal::locale_grouping_separator().as_bytes() { + [b] => Some(*b), + _ => None, + }; + settings +} + /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { Arg::new(mode) @@ -1847,7 +1962,10 @@ fn emit_debug_warnings( #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let mut settings = GlobalSettings::default(); + let mut settings = GlobalSettings { + numeric_locale: detect_numeric_locale(), + ..Default::default() + }; let (processed_args, mut legacy_warnings) = preprocess_legacy_args(args); if !legacy_warnings.is_empty() { @@ -2964,7 +3082,8 @@ mod tests { fn tokenize_helper(line: &[u8], separator: Option) -> Vec { let mut buffer = vec![]; - tokenize(line, separator, &mut buffer); + let precomputed = Precomputed::default(); + tokenize(line, separator, &mut buffer, &precomputed); buffer } From 39fdcbbc5a2e9236bd6f8cb6017b0437f2163f68 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 17 Jan 2026 21:35:20 +0900 Subject: [PATCH 02/11] test: add test for human-numeric sort with blank thousands separator in sv_SE locale Add a new test function `test_human_numeric_blank_thousands_sep_locale` to verify that the sort utility correctly handles human-readable numeric sorting when the locale's thousands separator is a blank space (e.g., in sv_SE.UTF-8 or sv_SE). This ensures proper behavior of the `-h` flag with key-based sorting in such locales, preventing potential sorting errors with space-separated numeric strings. --- tests/by-util/test_sort.rs | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index e794898a286..fa6fdb6a30a 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -8,6 +8,8 @@ use std::env; use std::fmt::Write as FmtWrite; +#[cfg(unix)] +use std::process::Command; use std::time::Duration; use uutests::at_and_ucmd; @@ -1665,6 +1667,69 @@ fn test_g_float_locale_decimal_separator() { .stdout_is("1.10\n1.9\n"); } +#[test] +#[cfg(unix)] +fn test_human_numeric_blank_thousands_sep_locale() { + fn thousands_sep_for(locale: &str) -> Option { + let output = Command::new("locale") + .arg("thousands_sep") + .env("LC_ALL", locale) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let sep = String::from_utf8_lossy(&output.stdout); + let sep = sep.trim_end_matches(|ch| ch == '\n' || ch == '\r'); + if sep.is_empty() || sep.as_bytes().len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { + return None; + } + Some(sep.to_string()) + } + + let candidates = ["sv_SE.UTF-8", "sv_SE"]; + let mut selected_locale = None; + let mut thousands_sep = None; + for candidate in candidates { + if let Some(sep) = thousands_sep_for(candidate) { + selected_locale = Some(candidate.to_string()); + thousands_sep = Some(sep); + break; + } + } + + let (Some(locale), Some(sep)) = (selected_locale, thousands_sep) else { + return; + }; + + let line1 = format!("1 1k 1 M 4{sep}003 1M"); + let line2 = format!("2k 2M 2 k 4{sep}002 2"); + let line3 = format!("3M 3 3 G 4{sep}001 3k"); + let input = format!("{line1}\n{line2}\n{line3}\n"); + + let ts = TestScenario::new("sort"); + ts.fixtures.write("blank-thousands.txt", &input); + + let cases = [ + (1, format!("{line1}\n{line2}\n{line3}\n")), + (2, format!("{line3}\n{line1}\n{line2}\n")), + (3, format!("{line1}\n{line2}\n{line3}\n")), + (5, format!("{line3}\n{line2}\n{line1}\n")), + ]; + + for (key, expected) in cases { + let key_str = key.to_string(); + ts.ucmd() + .env("LC_ALL", &locale) + .arg("-h") + .arg("-k") + .arg(&key_str) + .arg("blank-thousands.txt") + .succeeds() + .stdout_is(expected); + } +} + #[test] // Test misc numbers ("'a" is not interpreted as literal, trailing text is ignored...) fn test_g_misc() { From 10423f704549e664163199a77d4468152043cc17 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 17 Jan 2026 21:40:09 +0900 Subject: [PATCH 03/11] refactor: simplify separator trimming in locale test Use array slice for trim_end_matches and String::len for length check to improve readability and efficiency in test_human_numeric_blank_thousands_sep_locale. --- tests/by-util/test_sort.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index fa6fdb6a30a..b25498e1018 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1680,8 +1680,8 @@ fn test_human_numeric_blank_thousands_sep_locale() { return None; } let sep = String::from_utf8_lossy(&output.stdout); - let sep = sep.trim_end_matches(|ch| ch == '\n' || ch == '\r'); - if sep.is_empty() || sep.as_bytes().len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { + let sep = sep.trim_end_matches(&['\n', '\r'][..]); + if sep.is_empty() || sep.len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { return None; } Some(sep.to_string()) From 9dc0648952b6c595230ebe9fabec55cb0b27f277 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 22:22:01 +0900 Subject: [PATCH 04/11] feat(sort): add locale-aware numeric sorting support Implement NumericLocaleSettings to handle thousands separators and decimal points based on locale. Update tokenization logic to accommodate blank thousands separators for numeric and human-numeric modes, improving parsing of locale-specific numbers. Also refactor numeric locale detection for safety/readability and clean up related initialization/spell-checker ignore. --- src/uu/sort/src/sort.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 68a2c4d8632..822a9094aa7 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1330,7 +1330,6 @@ fn detect_numeric_locale() -> NumericLocaleSettings { }; settings } - /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { Arg::new(mode) From 2d1fc913e902ed58853be445aefa69d53e03fbe5 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 08:44:50 +0900 Subject: [PATCH 05/11] refactor(sort): simplify detect_numeric_locale with struct literal Use struct literal initialization instead of creating a mutable default and assigning fields, improving code conciseness and readability without changing functionality. --- src/uu/sort/src/sort.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 822a9094aa7..5846079fa0e 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1322,13 +1322,13 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { - let mut settings = NumericLocaleSettings::default(); - settings.decimal_pt = Some(locale_decimal_pt()); - settings.thousands_sep = match i18n::decimal::locale_grouping_separator().as_bytes() { - [b] => Some(*b), - _ => None, - }; - settings + NumericLocaleSettings { + decimal_pt: Some(locale_decimal_pt()), + thousands_sep: match i18n::decimal::locale_grouping_separator().as_bytes() { + [b] => Some(*b), + _ => None, + }, + } } /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { From 048241d3bc956e1f5287b1edf99d732e0aa3e6aa Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 09:11:11 +0900 Subject: [PATCH 06/11] refactor(sort): improve thousands separator handling in numeric sorting - Ignore thousands separators in debug annotations to match GNU output - Simplify NumInfo parsing by removing redundant thousands separator logic - Enhance detection of numeric locale settings to handle multibyte separators like NBSP correctly, maintaining single-byte behavior for compatibility with upstream GNU coreutils --- src/uu/sort/src/sort.rs | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 5846079fa0e..775fcd0b4c3 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -720,12 +720,12 @@ impl<'a> Line<'a> { match selector.settings.mode { SortMode::Numeric | SortMode::HumanNumeric => { // find out which range is used for numeric comparisons - let (_, num_range) = NumInfo::parse( - &self.line[selection.clone()], - &settings - .numeric_locale - .num_info_settings(selector.settings.mode == SortMode::HumanNumeric), - ); + let mut parse_settings = settings + .numeric_locale + .num_info_settings(selector.settings.mode == SortMode::HumanNumeric); + // Debug annotations should ignore thousands separators to match GNU output. + parse_settings.thousands_separator = None; + let (_, num_range) = NumInfo::parse(&self.line[selection.clone()], &parse_settings); let initial_selection = selection.clone(); // Shorten selection to num_range. @@ -1195,21 +1195,11 @@ impl FieldSelector { }; let mut range_str = &line[self.get_range(line, tokens)]; if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric { - // Get the thousands separator from the locale, handling cases where the separator is empty or multi-character - let locale_thousands_separator = i18n::decimal::locale_grouping_separator().as_bytes(); - - // Upstream GNU coreutils ignore multibyte thousands separators - // (FIXME in C source). We keep the same single-byte behavior. - let thousands_separator = match locale_thousands_separator { - [b] => Some(*b), - _ => None, - }; - // Parse NumInfo for this number. - let mut parse_settings = - numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric); - parse_settings.thousands_separator = thousands_separator; - let (info, num_range) = NumInfo::parse(range_str, &parse_settings); + let (info, num_range) = NumInfo::parse( + range_str, + &numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric), + ); // Shorten the range to what we need to pass to numeric_str_cmp later. range_str = &range_str[num_range]; Selection::WithNumInfo(range_str, info) @@ -1322,10 +1312,17 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { + let encoding = i18n::get_numeric_locale().1; + let grouping = i18n::decimal::locale_grouping_separator(); NumericLocaleSettings { decimal_pt: Some(locale_decimal_pt()), - thousands_sep: match i18n::decimal::locale_grouping_separator().as_bytes() { + // Upstream GNU coreutils ignore multibyte thousands separators + // (FIXME in C source). We keep the same single-byte behavior. + thousands_sep: match grouping.as_bytes() { [b] => Some(*b), + // ICU returns NBSP as UTF-8 (0xC2 0xA0). In non-UTF8 locales like ISO-8859-1, + // the input byte is 0xA0, so map it to a single-byte separator. + [0xC2, 0xA0] if encoding != i18n::UEncoding::Utf8 => Some(0xA0), _ => None, }, } From 9840528db50b9a9d91991e5fff877e9575efa0ec Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 12:44:52 +0900 Subject: [PATCH 07/11] fix(sort): handle C locale numeric settings correctly - Update detect_numeric_locale to check for C locale (ASCII encoding and "und" locale) - In C locale, set thousands_sep to None to avoid incorrect grouping separators - Adjust test expectations to match new sorting behavior for numeric fields in C locale --- src/uu/sort/src/sort.rs | 13 ++++++++++++- tests/by-util/test_sort.rs | 8 ++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 775fcd0b4c3..61c43cf6f4c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1312,7 +1312,18 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { - let encoding = i18n::get_numeric_locale().1; + let numeric_locale = i18n::get_numeric_locale(); + let locale = &numeric_locale.0; + let encoding = numeric_locale.1; + let is_c_locale = encoding == i18n::UEncoding::Ascii && locale.to_string() == "und"; + + if is_c_locale { + return NumericLocaleSettings { + decimal_pt: Some(DECIMAL_PT), + thousands_sep: None, + }; + } + let grouping = i18n::decimal::locale_grouping_separator(); NumericLocaleSettings { decimal_pt: Some(locale_decimal_pt()), diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index b25498e1018..7218060d9c9 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -2432,18 +2432,18 @@ _ __ 1 _ -2.4 -___ 2,5 _ -2.,,3 -__ 2.4 ___ +2.,,3 +__ 2.4 ___ 2,,3 _ +2.4 +___ 1a _ 2b From 979369f1ef56eb0a7766918a33db837c88372c17 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 12:46:27 +0900 Subject: [PATCH 08/11] refactor(sort): split long line assignment for improved readability The assignment of NumInfo::parse result was reformatted by splitting it across two lines to enhance code readability and adhere to line length guidelines. --- src/uu/sort/src/sort.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 61c43cf6f4c..8e3b6bcdf71 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -725,7 +725,8 @@ impl<'a> Line<'a> { .num_info_settings(selector.settings.mode == SortMode::HumanNumeric); // Debug annotations should ignore thousands separators to match GNU output. parse_settings.thousands_separator = None; - let (_, num_range) = NumInfo::parse(&self.line[selection.clone()], &parse_settings); + let (_, num_range) = + NumInfo::parse(&self.line[selection.clone()], &parse_settings); let initial_selection = selection.clone(); // Shorten selection to num_range. From f58caedecdaca57a9025edf8ad4f2949ce4b7f14 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 13:45:26 +0900 Subject: [PATCH 09/11] i18n: treat C locale as no grouping separator --- src/uucore/src/lib/features/i18n/decimal.rs | 12 ++++++++-- .../mixed_floats_ints_chars_numeric.expected | 4 ++-- ...d_floats_ints_chars_numeric.expected.debug | 12 +++++----- ..._floats_ints_chars_numeric_stable.expected | 4 ++-- ...s_ints_chars_numeric_stable.expected.debug | 8 +++---- ..._floats_ints_chars_numeric_unique.expected | 3 +-- ...s_ints_chars_numeric_unique.expected.debug | 6 ++--- ...ints_chars_numeric_unique_reverse.expected | 3 +-- ...hars_numeric_unique_reverse.expected.debug | 6 ++--- .../sort/multiple_groupings_numeric.expected | 8 +++---- .../multiple_groupings_numeric.expected.debug | 24 +++++++++---------- 11 files changed, 46 insertions(+), 44 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/decimal.rs b/src/uucore/src/lib/features/i18n/decimal.rs index 0a901143c6b..a7ceca2efa3 100644 --- a/src/uucore/src/lib/features/i18n/decimal.rs +++ b/src/uucore/src/lib/features/i18n/decimal.rs @@ -6,7 +6,7 @@ use std::sync::OnceLock; use icu_decimal::provider::DecimalSymbolsV1; -use icu_locale::Locale; +use icu_locale::{Locale, locale}; use icu_provider::prelude::*; use crate::i18n::get_numeric_locale; @@ -60,7 +60,15 @@ fn get_grouping_separator(loc: Locale) -> String { pub fn locale_grouping_separator() -> &'static str { static GROUPING_SEP: OnceLock = OnceLock::new(); - GROUPING_SEP.get_or_init(|| get_grouping_separator(get_numeric_locale().0.clone())) + GROUPING_SEP.get_or_init(|| { + let loc = get_numeric_locale().0.clone(); + // C/POSIX locale (represented as "und") has no grouping separator. + if loc == locale!("und") { + String::new() + } else { + get_grouping_separator(loc) + } + }) } #[cfg(test)] diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected index a781a36bba8..59541af3252 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected @@ -21,10 +21,10 @@ CARAvan 8.013 45 46.89 - 4567. - 37800 576,446.88800000 576,446.890 + 4567. + 37800 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug index a00067b1ee6..b7b76e58986 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug @@ -67,18 +67,18 @@ __ 46.89 _____ _____ - 4567. - _____ -____________________ ->>>>37800 - _____ -_________ 576,446.88800000 ___ ________________ 576,446.890 ___ ___________ + 4567. + _____ +____________________ +>>>>37800 + _____ +_________ 4798908.340000000000 ____________________ ____________________ diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected index 36eeda637f7..0ccdd84c059 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected @@ -24,10 +24,10 @@ CARAvan 8.013 45 46.89 +576,446.890 +576,446.88800000 4567. 37800 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug index 3fba8903042..66a98b20879 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug @@ -50,14 +50,14 @@ _____ __ 46.89 _____ +576,446.890 +___ +576,446.88800000 +___ 4567. _____ >>>>37800 _____ -576,446.88800000 -___ -576,446.890 -___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected index cb27c6664ce..cd4256c5f46 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected @@ -11,10 +11,9 @@ 8.013 45 46.89 +576,446.890 4567. 37800 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug index dd6e8dfcc67..663a4b3a918 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug @@ -24,14 +24,12 @@ _____ __ 46.89 _____ +576,446.890 +___ 4567. _____ >>>>37800 _____ -576,446.88800000 -___ -576,446.890 -___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected index bbce169347f..97e261f1452 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected @@ -1,10 +1,9 @@ 4798908.8909800 4798908.45 4798908.340000000000 -576,446.890 -576,446.88800000 37800 4567. +576,446.890 46.89 45 8.013 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug index 4b01a840618..01f7abf5bf2 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug @@ -4,14 +4,12 @@ _______________ __________ 4798908.340000000000 ____________________ -576,446.890 -___ -576,446.88800000 -___ >>>>37800 _____ 4567. _____ +576,446.890 +___ 46.89 _____ 45 diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected b/tests/fixtures/sort/multiple_groupings_numeric.expected index a6daab83676..9dd5b5f6553 100644 --- a/tests/fixtures/sort/multiple_groupings_numeric.expected +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected @@ -2,14 +2,14 @@ CARAvan + 1,999.99 +1,234 1.234 + 2,000 2.000 2.000,50 +12,34 22 23,. 111 210 -1,234 -12,34 - 1,999.99 - 2,000 diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected.debug b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug index 57a4ae01b9a..62e98a46a18 100644 --- a/tests/fixtures/sort/multiple_groupings_numeric.expected.debug +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug @@ -10,15 +10,27 @@ CARAvan ^ no match for key _______ +>>1,999.99 + _ +__________ +1,234 +_ +_____ >1.234 _____ ______ +>>>2,000 + _ +________ 2.000 _____ _____ 2.000,50 _____ ________ +12,34 +__ +_____ 22 __ __ @@ -31,15 +43,3 @@ ___ >210 ___ ____ -1,234 -_ -_____ -12,34 -__ -_____ ->>1,999.99 - _ -__________ ->>>2,000 - _ -________ From d7953dbb032ae26e4251eebb382274deb54dcd1a Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 14:54:13 +0900 Subject: [PATCH 10/11] fix(tests): correct sorting order of decimal values in numeric sort fixture Update the expected output for the multiple decimals numeric sort test to reflect the proper ascending order. The values "576,446.88800000" and "576,446.890" were misplaced and have been repositioned to their correct locations in the sorted sequence, ensuring the test accurately validates the sorting logic. The debug fixture was updated accordingly. --- .../fixtures/sort/multiple_decimals_numeric.expected | 4 ++-- .../sort/multiple_decimals_numeric.expected.debug | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected b/tests/fixtures/sort/multiple_decimals_numeric.expected index 3ef4d22e881..8f42e7ce5da 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected @@ -21,6 +21,8 @@ CARAvan 8.013 45 46.89 +576,446.88800000 +576,446.890 4567..457 4567. 4567.1 @@ -28,8 +30,6 @@ CARAvan 37800 45670.89079.098 45670.89079.1 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug index 0ae6d2958a5..948c4869c32 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug @@ -67,6 +67,12 @@ __ 46.89 _____ _____ +576,446.88800000 +___ +________________ +576,446.890 +___ +___________ >>>>>>>>>>4567..457 _____ ___________________ @@ -88,12 +94,6 @@ _____________________ >>>>>>45670.89079.1 ___________ ___________________ -576,446.88800000 -___ -________________ -576,446.890 -___ -___________ 4798908.340000000000 ____________________ ____________________ From fb95cb0ea0eb932848eee302b7ae8a4e1e367148 Mon Sep 17 00:00:00 2001 From: mattsu Date: Fri, 23 Jan 2026 08:33:04 +0900 Subject: [PATCH 11/11] fix(sort): skip ordering incompatibility check when --key is specified Previously, the ordering_incompatible check was performed unconditionally, causing errors even when the --key option was used, where such incompatibilities might not apply. This change adds a condition to skip the check if --key is present, ensuring correct behavior for key-based sorting. --- src/uu/sort/src/sort.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 8e3b6bcdf71..30e10a2db07 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -2080,7 +2080,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let ignore_non_printing = matches.get_flag(options::IGNORE_NONPRINTING); let ignore_case = matches.get_flag(options::IGNORE_CASE); - if ordering_incompatible(mode_flags, dictionary_order, ignore_non_printing) { + if !matches.contains_id(options::KEY) + && ordering_incompatible(mode_flags, dictionary_order, ignore_non_printing) + { let opts = ordering_opts_string( mode_flags, dictionary_order,