From 395c5fcb098376f5a6267283b9e87e4f85170471 Mon Sep 17 00:00:00 2001 From: nova Date: Sun, 21 Sep 2025 21:30:15 +0200 Subject: [PATCH] improvements to sort_natural --- sorting.c | 112 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/sorting.c b/sorting.c index 32f9fc8..e7aae3e 100644 --- a/sorting.c +++ b/sorting.c @@ -24,75 +24,79 @@ int skip_dot(const struct dirent *entry){ return 1; } -int sort_natural(const void *file0_, const void *file1_){ - file *file0 = (file*)file0_; - file *file1 = (file*)file1_; +int sort_natural(const void *file0, const void *file1){ - unsigned char *a = (unsigned char*)file0->file_name; - unsigned char *b = (unsigned char*)file1->file_name; - - if (file0->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR) && !(file1->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR))) { + if (((file*)file0)->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR) && !(((file*)file1)->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR))) { return -1; } - if (!(file0->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR)) && file1->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR)) { + if (!(((file*)file0)->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR)) && ((file*)file1)->file_type & (FILE_TYPE_SYMLINK | FILE_TYPE_DIR)) { return 1; } - unsigned long num0 = 0; - unsigned long num1 = 0; - /* bitwise OR a with ' ' turns turns caps into small letters - * while doing this on all chars may cause unexpected behaviour on the extended ascii set, - * for now i dont care */ - while ((*a | ' ') == (*b | ' ') && *a != '\0') { + const unsigned char *a = (unsigned char*)((file*)file0)->file_name; + const unsigned char *b = (unsigned char*)((file*)file1)->file_name; + + long parsed_number0 = 0; + long parsed_number1 = 0; + char is_num = 0; + char result = 0; + + do { + is_num = 0; + if ((*a >= '0') && (*a <= '9')) { + parsed_number0 = 0; while((*a >= '0') && (*a <= '9')) { - num0 = (num0 * 10) + (*a - '0'); + parsed_number0 = (parsed_number0 * 10) + (*a - '0'); a++; } + is_num |= 1; + } + if ((*b >= '0') && (*b <= '9')) { + parsed_number1 = 0; while((*b >= '0') && (*b <= '9')) { - num1 = (num1 * 10) + (*b - '0'); + parsed_number1 = (parsed_number1 * 10) + (*b - '0'); b++; } - if (num0 != num1) { - break; - } - } else { - a++; - b++; - } - } - if (num0 == num1) { - if (*a == '\0') { - a--; - } - if (*b == '\0') { - b--; + is_num |= 2; } - unsigned char c0; - unsigned char c1; - /* in this case we actually check for a through z as otherwise unicode characters get ordered wrongly */ - if (*a >= 'A' && *a <= 'Z') { - c0 = (*a | ' '); - } else { - c0 = *a; + if (is_num) { + if (is_num == 1) { + if (*b < '0') { + result = 1; + } else { + result = -1; + } + break; + } else if (is_num == 2) { + if (*a < '0') { + result = -1; + } else { + result = 1; + } + break; + } else { + if (parsed_number0 > parsed_number1) { + result = 1; + break; + } else if (parsed_number0 < parsed_number1) { + result = -1; + break; + } + } + /* those breaks are not set here, due to the possibillity that both numbers are equal + * in which case the comparison should continue */ } - if (*b >= 'A' && *b <= 'Z') { - c1 = (*b | ' '); - } else { - c1 = *b; - } - if (c0 > c1) { - return 1; - } else if (c0 < c1) { - return -1; - } else { - return 0; - } - } else if (num0 > num1) { - return 1; - } else { - return -1; - } + + unsigned char aa = ((*a >= 'A') && (*a <= 'Z')) ? (*a | ' ') : *a; + unsigned char bb = ((*b >= 'A') && (*b <= 'Z')) ? (*b | ' ') : *b; + /*using a simple aa - bb would occasionaly cause underflows with wide chars*/ + result = ((aa == bb) ? 0 : ((aa > bb) ? 1 : -1 )); + a++; + b++; + } while (result == 0); + + return result; } int sort_alpha(const void *file0, const void *file1){ char *file_name0 = ((file*)file0)->file_name;