changeset 1532:bf2c5216d726

Basic sed range support, enough for "sed -n 9,11p README" to work.
author Rob Landley <rob@landley.net>
date Fri, 24 Oct 2014 18:16:32 -0500
parents 3ff823086c99
children 3c77af6f81cc
files toys/pending/sed.c
diffstat 1 files changed, 170 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/toys/pending/sed.c	Wed Oct 22 17:11:06 2014 -0500
+++ b/toys/pending/sed.c	Fri Oct 24 18:16:32 2014 -0500
@@ -82,6 +82,10 @@
 
   // processed pattern list
   struct double_list *pattern;
+
+  char *nextline;
+  long nextlen, count;
+  int fdout, noeol;
 )
 
 struct step {
@@ -93,12 +97,96 @@
 
   // Action
   char c;
+
+  int hit;
 };
 
+// Write out line with potential embedded NUL, handling eol/noeol
+static int emit(char *line, long len, int eol)
+{
+  if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
+  if (eol) line[len++] = '\n';
+  TT.noeol = !eol;
+  if (len != writeall(TT.fdout, line, len)) {
+    perror_msg("short write");
+
+    return 1;
+  }
+
+  return 0;
+}
+
+// Do regex matching handling embedded NUL bytes in string.
+static int ghostwheel(regex_t *preg, char *string, int nmatch,
+  regmatch_t pmatch[], int eflags)
+{
+  // todo: this
+  return regexec(preg, string, nmatch, pmatch, eflags);
+}
+
 // Apply pattern to line from input file
-static void do_line(char **pline, long len)
+static void sed_line(char **pline, long plen)
 {
-  printf("len=%ld line=%s\n", len, *pline);
+  char *line = TT.nextline;
+  long len = TT.nextlen;
+  struct step *logrus;
+  int eol = 0;
+
+  // Grab next line for deferred processing (EOF detection, we get a NULL
+  // pline at EOF to flush last line). Note that only end of _last_ input
+  // file matches $ (unless we're doing -i).
+  if (pline) {
+    TT.nextline = *pline;
+    TT.nextlen = plen;
+    *pline = 0;
+  }
+
+  if (!line || !len) return;
+
+  if (line[len-1] == '\n') line[--len] = eol++;
+  TT.count++;
+
+  for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next) {
+    char c = logrus->c;
+
+    // Have we got a matching range for this rule?
+    if (logrus->lmatch || *logrus->rmatch) {
+      int miss = 0;
+      long lm;
+      regex_t *rm;
+
+      // In a match that might end?
+      if (logrus->hit) {
+        if (!(lm = logrus->lmatch[1])) {
+          if (!(rm = logrus->rmatch[1])) logrus->hit = 0;
+          else {
+            // regex match end includes matching line, so defer deactivation
+            if (!ghostwheel(rm, line, 0, 0, 0)) miss = 1;
+          }
+        } else if (lm > 0 && lm < TT.count) logrus->hit = 0;
+
+      // Start a new match?
+      } else {
+        if (!(lm = *logrus->lmatch)) {
+          if (!ghostwheel(*logrus->rmatch, line, 0, 0, 0)) logrus->hit++;
+        } else if (lm == TT.count) logrus->hit++;
+      } 
+
+      if (!logrus->hit) continue;
+      if (miss) logrus->hit = 0;
+    }
+
+    // Process like the wind, bullseye!
+
+    // todo: embedded NUL, eol
+    if (c == 'p') {
+      if (emit(line, len, eol)) break;
+    } else error_exit("what?");
+  }
+
+  if (!(toys.optflags & FLAG_n)) emit(line, len, eol);
+
+  free(line);
 }
 
 // Genericish function, can probably get moved to lib.c
@@ -124,8 +212,7 @@
 
 // Iterate over newline delimited data blob (potentially with embedded NUL),
 // call function on each line.
-static void chop_lines(char *data, long len,
-  void (*call)(char **pline, long len))
+static void chop_lines(char *data, long len, void (*call)(char **p, long l))
 {
   long ll;
 
@@ -146,7 +233,17 @@
 
 static void do_sed(int fd, char *name)
 {
-  do_lines(fd, name, do_line);
+  int i = toys.optflags & FLAG_i;
+
+  if (i) {
+    // todo: rename dance
+  }
+  do_lines(fd, name, sed_line);
+  if (i) {
+    sed_line(0, 0);
+
+    // todo: rename dance
+  }
 }
 
 // Translate primal pattern into walkable form.
@@ -156,72 +253,77 @@
   char *line = *pline, *reg;
   int i;
 
-  while (isspace(*line)) line++;
-  if (*line == '#') return;
+  for (line = *pline;;line++) {
+    while (isspace(*line)) line++;
+    if (*line == '#') return;
 
-  memset(toybuf, 0, sizeof(struct step));
-  corwin = (void *)toybuf;
-  reg = toybuf + sizeof(struct step);
+    memset(toybuf, 0, sizeof(struct step));
+    corwin = (void *)toybuf;
+    reg = toybuf + sizeof(struct step);
 
-  // Parse address range (if any)
-  for (i = 0; i < 2; i++) {
-    if (*line == ',') line++;
-    else if (i) break;
+    // Parse address range (if any)
+    for (i = 0; i < 2; i++) {
+      if (*line == ',') line++;
+      else if (i) break;
 
-    if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
-    else if (*line == '$') {
-      corwin->lmatch[i] = -1;
-      line++;
-    } else if (*line == '/' || *line == '\\') {
-      char delim = *(line++), slash = 0, *to, *from;
+      if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
+      else if (*line == '$') {
+        corwin->lmatch[i] = -1;
+        line++;
+      } else if (*line == '/' || *line == '\\') {
+        char delim = *(line++), slash = 0, *to, *from;
 
-      if (delim == '\\') {
-        if (!*line) goto brand;
-        slash = delim = *(line++);
-      }
+        if (delim == '\\') {
+          if (!*line) goto brand;
+          slash = delim = *(line++);
+        }
 
-      // Removing backslash escapes edits the source string, which could
-      // be from the environment space via -e, which could screw up what
-      // "ps" sees, and I'm ok with that.
-      for (to = from = line; *from != delim; *(to++) = *(from++)) {
-        if (!*from) goto brand;
-        if (*from == '\\') {
-          if (!from[1]) goto brand;
+        // Removing backslash escapes edits the source string, which could
+        // be from the environment space via -e, which could screw up what
+        // "ps" sees, and I'm ok with that.
+        for (to = from = line; *from != delim; *(to++) = *(from++)) {
+          if (!*from) goto brand;
+          if (*from == '\\') {
+            if (!from[1]) goto brand;
 
-          // Check escaped end delimiter before printf style escapes.
-          if (from[1] == slash) from++;
-          else {
-            char c = unescape(from[1]);
+            // Check escaped end delimiter before printf style escapes.
+            if (from[1] == slash) from++;
+            else {
+              char c = unescape(from[1]);
 
-            if (c) {
-              *to = c;
-              from++;
+              if (c) {
+                *to = c;
+                from++;
+              }
             }
           }
         }
-      }
-      slash = *to;
-      *to = 0;
-      xregcomp(corwin->rmatch[i] = (void *)reg, line,
-        ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
-      *to = slash;
-      reg += sizeof(regex_t);
-    } else break;
+        slash = *to;
+        *to = 0;
+        xregcomp(corwin->rmatch[i] = (void *)reg, line,
+          ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
+        *to = slash;
+        reg += sizeof(regex_t);
+        line = from + 1;
+      } else break;
+    }
+
+    while (isspace(*line)) line++;
+
+    if (!*line || !strchr("p", *line)) break;
+    corwin->c = *(line++);
+
+    // Add step to pattern
+    corwin = xmalloc(reg-toybuf);
+    memcpy(corwin, toybuf, reg-toybuf);
+    dlist_add_nomalloc(&TT.pattern, (void *)corwin);
+
+    while (isspace(*line)) line++;
+    if (!*line) return;
+    if (*line != ';') break;
   }
 
-  while (isspace(*line)) line++;
-
-  if (!*line || !strchr("p", *line)) goto brand;
-
-  // Add step to pattern
-  corwin = xmalloc(reg-toybuf);
-  memcpy(corwin, toybuf, reg-toybuf);
-  dlist_add_nomalloc(&TT.pattern, (void *)corwin);
-
-  return;
-
 brand:
-
   // Reminisce about chestnut trees.
   error_exit("bad pattern '%s'@%ld (%c)", *pline, line-*pline, *line);
 }
@@ -234,27 +336,26 @@
   // Lie to autoconf when it asks stupid questions, so configure regexes
   // that look for "GNU sed version %f" greater than some old buggy number
   // don't fail us for not matching their narrow expectations.
-  if (FLAG_version) {
+  if (toys.optflags & FLAG_version) {
     xprintf("This is not GNU sed version 9.0\n");
     return;
   }
 
-  // Need a pattern. If no unicorns about, fight dragon and take its eye.
+  // Need a pattern. If no unicorns about, fight serpent and take its eye.
   if (!TT.e && !TT.f) {
     if (!*toys.optargs) error_exit("no pattern");
     (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
   }
+  for (dworkin = TT.e; dworkin; dworkin = dworkin->next)
+    chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement);
+  for (dworkin = TT.f; dworkin; dworkin = dworkin->next)
+    do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement);
+  dlist_terminate(TT.pattern);
 
-  for (dworkin = TT.e; dworkin; dworkin = dworkin->next) {
-    chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement);
-  }
-
-  for (dworkin = TT.f; dworkin; dworkin = dworkin->next) {
-    int fd = xopen(dworkin->arg, O_RDONLY);
-
-    do_lines(fd, dworkin->arg, jewel_of_judgement);
-  }
+  TT.fdout = 1;
 
   // Inflict pattern upon input files
   loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
+
+  if (!(toys.optflags & FLAG_i)) sed_line(0, 0);
 }