You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
227 lines
6.4 KiB
227 lines
6.4 KiB
/* cut.c - print selected ranges from a file
|
|
*
|
|
* Copyright 2016 Rob Landley <rob@landley.net>
|
|
*
|
|
* http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
|
|
*
|
|
* Deviations from posix: added -DF. We can only accept 512 selections, and
|
|
* "-" counts as start to end. Using spaces to separate a comma-separated list
|
|
* is silly and inconsistent with dd, ps, cp, and mount.
|
|
*
|
|
* todo: -n, -s with -c
|
|
|
|
USE_CUT(NEWTOY(cut, "b*|c*|f*|F*|C*|O(output-delimiter):d:sDn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
|
|
|
|
config CUT
|
|
bool "cut"
|
|
default y
|
|
help
|
|
usage: cut [-Ds] [-bcfF LIST] [-dO DELIM] [FILE...]
|
|
|
|
Print selected parts of lines from each FILE to standard output.
|
|
|
|
Each selection LIST is comma separated, either numbers (counting from 1)
|
|
or dash separated ranges (inclusive, with X- meaning to end of line and -X
|
|
from start). By default selection ranges are sorted and collated, use -D
|
|
to prevent that.
|
|
|
|
-b Select bytes
|
|
-c Select UTF-8 characters
|
|
-C Select unicode columns
|
|
-d Use DELIM (default is TAB for -f, run of whitespace for -F)
|
|
-D Don't sort/collate selections or match -fF lines without delimiter
|
|
-f Select fields (words) separated by single DELIM character
|
|
-F Select fields separated by DELIM regex
|
|
-O Output delimiter (default one space for -F, input delim for -f)
|
|
-s Skip lines without delimiters
|
|
*/
|
|
#define FOR_cut
|
|
#include "toys.h"
|
|
|
|
GLOBALS(
|
|
char *d, *O;
|
|
struct arg_list *select[5]; // we treat them the same, so loop through
|
|
|
|
int pairs;
|
|
regex_t reg;
|
|
)
|
|
|
|
|
|
// Apply selections to an input line, producing output
|
|
static void cut_line(char **pline, long len)
|
|
{
|
|
unsigned *pairs = (void *)toybuf;
|
|
char *line;
|
|
int i, j;
|
|
|
|
if (!pline) return;
|
|
line = *pline;
|
|
if (len && line[len-1]=='\n') line[--len] = 0;
|
|
|
|
// Loop through selections
|
|
for (i=0; i<TT.pairs; i++) {
|
|
unsigned start = pairs[2*i], end = pairs[(2*i)+1], count;
|
|
char *s = line, *ss;
|
|
|
|
// input: start/end position, count=difference between them
|
|
// output: s = start of string, len = bytes to output
|
|
|
|
if (start) start--;
|
|
if (start>=len) continue;
|
|
if (!end || end>len) end = len;
|
|
count = end-start;
|
|
|
|
// Find start and end of output string for the relevant selection type
|
|
if (toys.optflags&FLAG_b) s += start;
|
|
else if (toys.optflags&FLAG_C) {
|
|
// crunch_str() currently assumes that combining characters get
|
|
// escaped, to provide an unambiguous visual representation.
|
|
// This assumes the input string is null terminated.
|
|
if (start) crunch_str(&s, start, 0, 0, 0);
|
|
if (!*s) continue;
|
|
start = s-line;
|
|
ss = s;
|
|
crunch_str(&ss, count, 0, 0, 0);
|
|
count = ss-s;
|
|
|
|
} else if (toys.optflags&FLAG_c) {
|
|
wchar_t wc;
|
|
char *sss;
|
|
|
|
// Find start
|
|
ss = line+len;
|
|
while (start && s<ss) {
|
|
if (0<=(j = utf8towc(&wc, s, len))) start--;
|
|
s += (j<1) ? 1 : j;
|
|
}
|
|
if (s == ss) continue;
|
|
|
|
// Find end
|
|
end = count;
|
|
sss = s;
|
|
while (end && sss<ss) {
|
|
if (0<=(j = utf8towc(&wc, sss, len))) end--;
|
|
sss += (j<1) ? 1 : j;
|
|
}
|
|
count = sss-s;
|
|
} else {
|
|
regmatch_t match;
|
|
|
|
// Loop through skipping appropriate number of fields
|
|
for (j = 0; j<2; j++) {
|
|
ss = s;
|
|
if (j) start = count;
|
|
else end = start;
|
|
while (*ss && start) {
|
|
if (toys.optflags&FLAG_f) {
|
|
if (!strchr(TT.d, *ss++)) continue;
|
|
if (!--start && j) ss--;
|
|
} else {
|
|
if (regexec(&TT.reg, ss, 1, &match, REG_NOTBOL|REG_NOTEOL)) {
|
|
ss = line+len;
|
|
continue;
|
|
}
|
|
if (!match.rm_eo) break; // zero length match == no delimiter
|
|
ss += (!--start && j) ? match.rm_so : match.rm_eo;
|
|
}
|
|
}
|
|
if (!j && !*(s = ss)) break;
|
|
}
|
|
|
|
// If we never encountered even one separator, print whole line (posix!)
|
|
if (!j && end == start) {
|
|
if (toys.optflags&FLAG_D) break;
|
|
if (toys.optflags&FLAG_s) return;
|
|
fwrite(line, len, 1, stdout);
|
|
break;
|
|
} else if (!*s) continue;
|
|
count = ss-s;
|
|
}
|
|
if (i && TT.O) fputs(TT.O, stdout);
|
|
fwrite(s, count, 1, stdout);
|
|
}
|
|
xputc('\n');
|
|
}
|
|
|
|
static int compar(unsigned *a, unsigned *b)
|
|
{
|
|
if (*a<*b) return -1;
|
|
if (*a>*b) return 1;
|
|
if (a[1]<b[1]) return -1;
|
|
if (a[1]>b[1]) return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
// parse A or A-B or A- or -B
|
|
static char *get_range(void *data, char *str, int len)
|
|
{
|
|
char *end = str;
|
|
unsigned *pairs = (void *)toybuf, i;
|
|
|
|
// Using toybuf[] to store ranges means we can have 512 selections max.
|
|
if (TT.pairs == sizeof(toybuf)/sizeof(int)) perror_exit("select limit");
|
|
pairs += 2*TT.pairs++;
|
|
|
|
pairs[1] = UINT_MAX;
|
|
for (i = 0; ;i++) {
|
|
if (i==2) return end;
|
|
if (isdigit(*end)) {
|
|
long long ll = estrtol(end, &end, 10);
|
|
|
|
if (ll<1 || ll>UINT_MAX || errno) return end;
|
|
pairs[i] = ll;
|
|
}
|
|
if (*end++ != '-') break;
|
|
}
|
|
if (!i) pairs[1] = pairs[0];
|
|
if ((end-str)<len) return end;
|
|
if (pairs[0]>pairs[1]) return str;
|
|
|
|
// No error
|
|
return 0;
|
|
}
|
|
|
|
void cut_main(void)
|
|
{
|
|
int i;
|
|
char buf[8];
|
|
|
|
// Parse command line arguments
|
|
if ((toys.optflags&(FLAG_s|FLAG_f|FLAG_F))==FLAG_s)
|
|
error_exit("-s needs -Ff");
|
|
if ((toys.optflags&(FLAG_d|FLAG_f|FLAG_F))==FLAG_d)
|
|
error_exit("-d needs -Ff");
|
|
if (!TT.d) TT.d = (toys.optflags&FLAG_F) ? "[[:space:]][[:space:]]*" : "\t";
|
|
if (toys.optflags&FLAG_F) xregcomp(&TT.reg, TT.d, REG_EXTENDED);
|
|
if (!TT.O) {
|
|
if (toys.optflags&FLAG_F) TT.O = " ";
|
|
else if (toys.optflags&FLAG_f) TT.O = TT.d;
|
|
}
|
|
|
|
// Parse ranges, which are attached to a selection type (only one can be set)
|
|
for (i = 0; i<ARRAY_LEN(TT.select); i++) {
|
|
sprintf(buf, "bad -%c", "CFfcb"[i]); // reverse order from newtoy optstr
|
|
if (TT.select[i]) comma_args(TT.select[i], 0, buf, get_range);
|
|
}
|
|
if (!TT.pairs) error_exit("no selections");
|
|
|
|
// Sort and collate selections
|
|
if (!(toys.optflags&FLAG_D)) {
|
|
int from, to;
|
|
unsigned *pairs = (void *)toybuf;
|
|
|
|
qsort(toybuf, TT.pairs, 8, (void *)compar);
|
|
for (to = 0, from = 2; from/2 < TT.pairs; from += 2) {
|
|
if (pairs[from] > pairs[to+1]) {
|
|
to += 2;
|
|
memcpy(pairs+to, pairs+from, 2*sizeof(unsigned));
|
|
} else if (pairs[from+1] > pairs[to+1]) pairs[to+1] = pairs[from+1];
|
|
}
|
|
TT.pairs = (to/2)+1;
|
|
}
|
|
|
|
// For each argument, loop through lines of file and call cut_line() on each
|
|
loopfiles_lines(toys.optargs, cut_line);
|
|
}
|