The following C program contains the re2post
function that converts a regular expression pattern string using infix operators into a pattern string using postfix operators. I believe this is Ken Thompson’s implementation with tweaks by Russ Cox. I found converting this code into Zig difficult because it makes heavy use of a few C-isms, especially iteration over the elements of arrays via pointer arithmetic.
#include <stdio.h>
#include <string.h>
/*
* Convert infix regexp re to postfix notation.
* Insert . as explicit concatenation operator.
* Cheesy parser, return static buffer.
*/
char*
re2post(char *re)
{
int nalt, natom;
static char buf[8000];
char *dst;
struct {
int nalt;
int natom;
} paren[100], *p;
p = paren;
dst = buf;
nalt = 0;
natom = 0;
if(strlen(re) >= sizeof buf/2)
return NULL;
for(; *re; re++){
switch(*re){
case '(':
if(natom > 1){
--natom;
*dst++ = '.';
}
if(p >= paren+100)
return NULL;
p->nalt = nalt;
p->natom = natom;
p++;
nalt = 0;
natom = 0;
break;
case '|':
if(natom == 0)
return NULL;
while(--natom > 0)
*dst++ = '.';
nalt++;
break;
case ')':
if(p == paren)
return NULL;
if(natom == 0)
return NULL;
while(--natom > 0)
*dst++ = '.';
for(; nalt > 0; nalt--)
*dst++ = '|';
--p;
nalt = p->nalt;
natom = p->natom;
natom++;
break;
case '*':
case '+':
case '?':
if(natom == 0)
return NULL;
*dst++ = *re;
break;
default:
if(natom > 1){
--natom;
*dst++ = '.';
}
*dst++ = *re;
natom++;
break;
}
}
if(p != paren)
return NULL;
while(--natom > 0)
*dst++ = '.';
for(; nalt > 0; nalt--)
*dst++ = '|';
*dst = 0;
return buf;
}
int main(void) {
printf("%s\n", re2post("a(b|c)+d"));
}
Particularly difficult to understand is the handling of the paren
array of structs, which seems to me is basically a stack, but the implicit use of default values for ints makes it hard to follow to a non-C programmer like me.
Compiled with
zig cc re2post.c -o re2post
and run with
./re2post
it produces the output:
abc|+.d.