wrote:
> ...I'm writing a function that takes a string, and returns an array
> of strings which are the result of splitting the input on whitespace
> and parentheses (but the parentheses should also be included in the
> array as strings).
>
> an example:
>
> explode("foo bar baz") -> ["foo", "bar", "boys"]
> explode("foo(bar)baz") -> ["foo", "(", "bar", ")", "baz"]
>
<snip>
>
> #include <stdio.h>
Avoid using unprototyped functions...
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
> char* extract(char* str, int len) {
String lengths and object sizes in general are better measured with
size_t than int.
> char* out = (char*)malloc(len + 1);
You should check the return value of malloc.
> out = memcpy(out, str, len);
> out[len] = '\0';
> return out;
> }
>
> char istax(int ch) {
> int out = (ch=='(') | (ch==')');
Look up the difference between | and ||.
> return out;
> }
This isn't really worth a function.
> char** explode(char* str) {
> int nt = counttokens(str);
This design is somewhat poor. You have a separate function to
count the number of tokens, yet you use duplicate code to
extract the tokens. If the specifications change, then you
need to maintain two separate pieces of code synchonously.
> if(!nt) {
> return 0;
> }
>
> char** ret = (char**)malloc(nt);
C90 won't let you mix declarations and statements.
>
> int i = 0;
> int len = strlen(str);
> char ch;
> int start = 0;
> int mode = 0;
> int t = 0;
<snip>
You seem to have more indexing variables than you can handle.
Here's one way that I might do this. The 'work' function does
the counting and the allocation. I just scan through the string
in question (s), and use another pointer t to mark the begining
of an 'identifier' token. Since t can be null, it serves as a
'mode' flag.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *span_dup(const char *s, const char *t)
{
size_t n = t ? t - s : strlen(s);
char *m = malloc(n + 1);
if (m) { memcpy(m, s, n); m[n] = 0; }
return m;
}
size_t work(char **a, const char *s)
{
const char *t = 0;
size_t n = 0;
for (; *s; s++)
{
if (*s == ' ' || *s == '(' || *s == ')')
{
/* add any prior scanned identifier */
if (t) { n++; if (a) *a++ = span_dup(t, s); t = 0; }
/* add a ( or ) token */
if (*s != ' ') { n++; if (a) *a++ = span_dup(s, s+1); }
}
else if (!t)
t = s; /* start new identifier token */
}
/* add any last (outstanding) identifier token */
if (t) { n++; if (a) *a++ = span_dup(t, s); }
return n;
}
char **explode(const char *s)
{
size_t n = work(0, s);
char **m = malloc((n + 1) * sizeof *m);
if (m) { work(m, s); m[n] = 0; }
return m;
}
int main(void)
{
char **s, **m = explode("Hello (World)");
if (m == 0) return 0;
for (s = m; *s; s++) printf("<%s>\n", *s);
return 0;
}
--
Peter