You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
413 lines
11 KiB
413 lines
11 KiB
/* sort.c - put input lines into order
|
|
*
|
|
* Copyright 2004, 2008 Rob Landley <rob@landley.net>
|
|
*
|
|
* See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
|
|
*
|
|
* Deviations from POSIX: Lots.
|
|
* We invented -x
|
|
|
|
USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
|
|
|
|
config SORT
|
|
bool "sort"
|
|
default y
|
|
help
|
|
usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
|
|
|
|
Sort all lines of text from input files (or stdin) to stdout.
|
|
|
|
-r Reverse
|
|
-u Unique lines only
|
|
-n Numeric order (instead of alphabetical)
|
|
-b Ignore leading blanks (or trailing blanks in second part of key)
|
|
-c Check whether input is sorted
|
|
-d Dictionary order (use alphanumeric and whitespace chars only)
|
|
-f Force uppercase (case insensitive sort)
|
|
-i Ignore nonprinting characters
|
|
-M Month sort (jan, feb, etc)
|
|
-x Hexadecimal numerical sort
|
|
-s Skip fallback sort (only sort with keys)
|
|
-z Zero (null) terminated lines
|
|
-k Sort by "key" (see below)
|
|
-t Use a key separator other than whitespace
|
|
-o Output to FILE instead of stdout
|
|
-V Version numbers (name-1.234-rc6.5b.tgz)
|
|
|
|
Sorting by key looks at a subset of the words on each line. -k2 uses the
|
|
second word to the end of the line, -k2,2 looks at only the second word,
|
|
-k2,4 looks from the start of the second to the end of the fourth word.
|
|
-k2.4,5 starts from the fourth character of the second word, to the end
|
|
of the fifth word. Specifying multiple keys uses the later keys as tie
|
|
breakers, in order. A type specifier appended to a sort key (such as -2,2n)
|
|
applies only to sorting that key.
|
|
|
|
config SORT_FLOAT
|
|
bool
|
|
default y
|
|
depends on TOYBOX_FLOAT
|
|
help
|
|
usage: sort [-g]
|
|
|
|
-g General numeric sort (double precision with nan and inf)
|
|
*/
|
|
|
|
#define FOR_sort
|
|
#include "toys.h"
|
|
|
|
GLOBALS(
|
|
char *t;
|
|
struct arg_list *k;
|
|
char *o, *T, S;
|
|
|
|
void *key_list;
|
|
int linecount;
|
|
char **lines, *name;
|
|
)
|
|
|
|
// The sort types are n, g, and M.
|
|
// u, c, s, and z apply to top level only, not to keys.
|
|
// b at top level implies bb.
|
|
// The remaining options can be applied to search keys.
|
|
|
|
#define FLAG_bb (1<<31) // Ignore trailing blanks
|
|
|
|
struct sort_key
|
|
{
|
|
struct sort_key *next_key; // linked list
|
|
unsigned range[4]; // start word, start char, end word, end char
|
|
int flags;
|
|
};
|
|
|
|
// Copy of the part of this string corresponding to a key/flags.
|
|
|
|
static char *get_key_data(char *str, struct sort_key *key, int flags)
|
|
{
|
|
int start = 0, end, len, i, j;
|
|
|
|
// Special case whole string, so we don't have to make a copy
|
|
|
|
if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
|
|
&& !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
|
|
|
|
// Find start of key on first pass, end on second pass
|
|
|
|
len = strlen(str);
|
|
for (j=0; j<2; j++) {
|
|
if (!key->range[2*j]) end=len;
|
|
|
|
// Loop through fields
|
|
else {
|
|
end = 0;
|
|
for (i = 1; i < key->range[2*j]+j; i++) {
|
|
|
|
// Skip leading blanks
|
|
if (str[end] && !TT.t) while (isspace(str[end])) end++;
|
|
|
|
// Skip body of key
|
|
for (; str[end]; end++) {
|
|
if (TT.t) {
|
|
if (str[end]==*TT.t) {
|
|
end++;
|
|
break;
|
|
}
|
|
} else if (isspace(str[end])) break;
|
|
}
|
|
}
|
|
}
|
|
if (!j) start=end;
|
|
}
|
|
|
|
// Key with explicit separator starts after the separator
|
|
if (TT.t && str[start]==*TT.t) start++;
|
|
|
|
// Strip leading and trailing whitespace if necessary
|
|
if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
|
|
while (isspace(str[start])) start++;
|
|
if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
|
|
|
|
// Handle offsets on start and end
|
|
if (key->range[3]) {
|
|
end += key->range[3]-1;
|
|
if (end>len) end=len;
|
|
}
|
|
if (key->range[1]) {
|
|
start += key->range[1]-1;
|
|
if (start>len) start=len;
|
|
}
|
|
|
|
// Make the copy
|
|
if (end<start) end = start;
|
|
str = xstrndup(str+start, end-start);
|
|
|
|
// Handle -d
|
|
if (flags&FLAG_d) {
|
|
for (start = end = 0; str[end]; end++)
|
|
if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
|
|
str[start] = 0;
|
|
}
|
|
|
|
// Handle -i
|
|
if (flags&FLAG_i) {
|
|
for (start = end = 0; str[end]; end++)
|
|
if (isprint(str[end])) str[start++] = str[end];
|
|
str[start] = 0;
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
// append a sort_key to key_list.
|
|
|
|
static struct sort_key *add_key(void)
|
|
{
|
|
void **stupid_compiler = &TT.key_list;
|
|
struct sort_key **pkey = (struct sort_key **)stupid_compiler;
|
|
|
|
while (*pkey) pkey = &((*pkey)->next_key);
|
|
return *pkey = xzalloc(sizeof(struct sort_key));
|
|
}
|
|
|
|
// Perform actual comparison
|
|
static int compare_values(int flags, char *x, char *y)
|
|
{
|
|
if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
|
|
char *xx,*yy;
|
|
double dx = strtod(x,&xx), dy = strtod(y,&yy);
|
|
int xinf, yinf;
|
|
|
|
// not numbers < NaN < -infinity < numbers < +infinity
|
|
|
|
if (x==xx) return y==yy ? 0 : -1;
|
|
if (y==yy) return 1;
|
|
|
|
// Check for isnan
|
|
if (dx!=dx) return (dy!=dy) ? 0 : -1;
|
|
if (dy!=dy) return 1;
|
|
|
|
// Check for infinity. (Could underflow, but avoids needing libm.)
|
|
xinf = (1.0/dx == 0.0);
|
|
yinf = (1.0/dy == 0.0);
|
|
if (xinf) {
|
|
if(dx<0) return (yinf && dy<0) ? 0 : -1;
|
|
return (yinf && dy>0) ? 0 : 1;
|
|
}
|
|
if (yinf) return dy<0 ? 1 : -1;
|
|
|
|
return dx>dy ? 1 : (dx<dy ? -1 : 0);
|
|
} else if (flags & FLAG_M) {
|
|
struct tm thyme;
|
|
int dx;
|
|
char *xx,*yy;
|
|
|
|
xx = strptime(x,"%b",&thyme);
|
|
dx = thyme.tm_mon;
|
|
yy = strptime(y,"%b",&thyme);
|
|
if (!xx) return !yy ? 0 : -1;
|
|
else if (!yy) return 1;
|
|
else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
|
|
|
|
} else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
|
|
else if (flags & FLAG_V) {
|
|
while (*x && *y) {
|
|
while (*x && *x == *y) x++, y++;
|
|
if (isdigit(*x) && isdigit(*y)) {
|
|
long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
|
|
|
|
if (xx<yy) return -1;
|
|
if (xx>yy) return 1;
|
|
} else {
|
|
char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
|
|
|
|
// -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
|
|
if (xx != yy) {
|
|
if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
|
|
else return 1;
|
|
}
|
|
}
|
|
}
|
|
return *x ? !!*y : -1;
|
|
} else if (flags & FLAG_n) {
|
|
// Full floating point version of -n
|
|
if (CFG_SORT_FLOAT) {
|
|
double dx = atof(x), dy = atof(y);
|
|
|
|
return dx>dy ? 1 : (dx<dy ? -1 : 0);
|
|
// Integer version of -n for tiny systems
|
|
} else return atoi(x)-atoi(y);
|
|
|
|
// Ascii sort
|
|
} else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
|
|
}
|
|
|
|
// Callback from qsort(): Iterate through key_list and perform comparisons.
|
|
static int compare_keys(const void *xarg, const void *yarg)
|
|
{
|
|
int flags = toys.optflags, retval = 0;
|
|
char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
|
|
struct sort_key *key;
|
|
|
|
for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){
|
|
flags = key->flags ? key->flags : toys.optflags;
|
|
|
|
// Chop out and modify key chunks, handling -dfib
|
|
|
|
x = get_key_data(xx, key, flags);
|
|
y = get_key_data(yy, key, flags);
|
|
|
|
retval = compare_values(flags, x, y);
|
|
|
|
// Free the copies get_key_data() made.
|
|
|
|
if (x != xx) free(x);
|
|
if (y != yy) free(y);
|
|
|
|
if (retval) break;
|
|
}
|
|
|
|
// Perform fallback sort if necessary (always case insensitive, no -f,
|
|
// the point is to get a stable order even for -f sorts)
|
|
if (!retval && !FLAG(s)) {
|
|
flags = toys.optflags;
|
|
retval = strcmp(xx, yy);
|
|
}
|
|
|
|
return retval * ((flags&FLAG_r) ? -1 : 1);
|
|
}
|
|
|
|
// Read each line from file, appending to a big array.
|
|
static void sort_lines(char **pline, long len)
|
|
{
|
|
char *line;
|
|
|
|
if (!pline) return;
|
|
line = *pline;
|
|
if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
|
|
*pline = 0;
|
|
|
|
// handle -c here so we don't allocate more memory than necessary.
|
|
if (FLAG(c)) {
|
|
int j = FLAG(u) ? -1 : 0;
|
|
|
|
if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
|
|
error_exit("%s: Check line %d\n", TT.name, TT.linecount);
|
|
free(TT.lines);
|
|
TT.lines = (void *)line;
|
|
} else {
|
|
if (!(TT.linecount&63))
|
|
TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
|
|
TT.lines[TT.linecount] = line;
|
|
}
|
|
TT.linecount++;
|
|
}
|
|
|
|
// Callback from loopfiles to handle input files.
|
|
static void sort_read(int fd, char *name)
|
|
{
|
|
TT.name = name;
|
|
do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines);
|
|
}
|
|
|
|
void sort_main(void)
|
|
{
|
|
int idx, fd = 1;
|
|
|
|
// Parse -k sort keys.
|
|
if (TT.k) {
|
|
struct arg_list *arg;
|
|
|
|
for (arg = TT.k; arg; arg = arg->next) {
|
|
struct sort_key *key = add_key();
|
|
char *temp;
|
|
int flag;
|
|
|
|
idx = 0;
|
|
temp = arg->arg;
|
|
while (*temp) {
|
|
// Start of range
|
|
key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
|
|
if (*temp=='.')
|
|
key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
|
|
|
|
// Handle flags appended to a key type.
|
|
for (;*temp;temp++) {
|
|
char *temp2, *optlist;
|
|
|
|
// Note that a second comma becomes an "Unknown key" error.
|
|
|
|
if (*temp==',' && !idx++) {
|
|
temp++;
|
|
break;
|
|
}
|
|
|
|
// Which flag is this?
|
|
|
|
optlist = toys.which->options;
|
|
temp2 = strchr(optlist, *temp);
|
|
flag = (1<<(optlist-temp2+strlen(optlist)-1));
|
|
|
|
// Was it a flag that can apply to a key?
|
|
|
|
if (!temp2 || flag>FLAG_x
|
|
|| (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
|
|
{
|
|
toys.exitval = 2;
|
|
error_exit("Unknown key option.");
|
|
}
|
|
// b after , means strip _trailing_ space, not leading.
|
|
if (idx && flag==FLAG_b) flag = FLAG_bb;
|
|
key->flags |= flag;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// global b flag strips both leading and trailing spaces
|
|
if (FLAG(b)) toys.optflags |= FLAG_bb;
|
|
|
|
// If no keys, perform alphabetic sort over the whole line.
|
|
if (!TT.key_list) add_key()->range[0] = 1;
|
|
|
|
// Open input files and read data, populating TT.lines[TT.linecount]
|
|
loopfiles(toys.optargs, sort_read);
|
|
|
|
// The compare (-c) logic was handled in sort_read(),
|
|
// so if we got here, we're done.
|
|
if (FLAG(c)) goto exit_now;
|
|
|
|
// Perform the actual sort
|
|
qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
|
|
|
|
// handle unique (-u)
|
|
if (FLAG(u)) {
|
|
int jdx;
|
|
|
|
for (jdx=0, idx=1; idx<TT.linecount; idx++) {
|
|
if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
|
|
free(TT.lines[idx]);
|
|
else TT.lines[++jdx] = TT.lines[idx];
|
|
}
|
|
if (TT.linecount) TT.linecount = jdx+1;
|
|
}
|
|
|
|
// Open output file if necessary. We can't do this until we've finished
|
|
// reading in case the output file is one of the input files.
|
|
if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
|
|
|
|
// Output result
|
|
for (idx = 0; idx<TT.linecount; idx++) {
|
|
char *s = TT.lines[idx];
|
|
unsigned i = strlen(s);
|
|
|
|
if (!FLAG(z)) s[i] = '\n';
|
|
xwrite(fd, s, i+1);
|
|
if (CFG_TOYBOX_FREE) free(s);
|
|
}
|
|
|
|
exit_now:
|
|
if (CFG_TOYBOX_FREE) {
|
|
if (fd != 1) close(fd);
|
|
free(TT.lines);
|
|
}
|
|
}
|