Velocity Reviews - Computer Hardware Reviews

Velocity Reviews > Newsgroups > Programming > C Programming > Help with program crashing...

Reply
Thread Tools

Help with program crashing...

 
 
santosh
Guest
Posts: n/a
 
      01-19-2006
Hello all,

I've put together a small program to count the number of characters and
'words' in a text file. The minimum length of a word, (in terms of no.
of characters), as well as word delimiting characters can be specified
on the command line. The default delimiting characters built into the
program are space, newline, tab, carriage return, form feed, vertical
tab, comma and null. If a 'u' or 'U' is specified as the last command
line argument, this default set is ignored and the characters in the
penultimate command line argument are used.

Now, the problem is that the program is terminated by the OS when the
'minimum word length' argument, (the second command line argument after
the filename), exceeds a certain value which seems to vary from file to
file. As far as I can figure it out, the word counting code, in
function words() shouldn't simply crash.

Can anyone spot any logical mistake or other dubious calculation which
might cause this behaviour?

I compiled it with gcc -Wall -ansi -pedantic and there were three
warnings, none of which seem to me to be able to affect the code.

Thanks in advance.

The code follows:

/*
* Usage = words filename [mwl] [delchars] [uddc]
* Options in square brackets are optional.
* filename - Path name of file to be scanned.
* mwl - Specifies the minimum length, (in characters), a word must
have
* to be counted as such.
* delchars - One or more characters which will be added to the set of

* default word delimiting characters unless 'uddc' is specified
* as 'u'/'U', in which case, the default delimiters will be
* ignored.
*
* Default 'mwl' - 1 character.
* Default 'delchars' - space, tab, newline, carriage return, form
feed,
* vertical tab
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* A pointer to this structure is passed to the 'words()' function */
struct words_args {
FILE *fp; /* Pointer to a file opened in text mode for reading */
size_t mwl; /* Minimum length (in chars) a 'word' must have */
char *delchars; /* Null terminated array of custom delimiting chars */
char uddc; /* If 'u' or 'U', ignores default delimiting chars */
size_t *nwords; /* Upon return: No. of words in given file */
size_t *tchars; /* Upon return: Total no. of characters in file */
size_t *wchars; /* Upon return: Total chars making up 'words' */
size_t *awl; /* Upon return: Average length of words (in chars) */
};

int words( struct words_args *args );
/* Returns true if was file opened successfully */
unsigned short int words_open_file
( char *clarg_filename, const char *mode, struct words_args *pf );
/* Returns true if 'mwl' command line parameter is valid and
convertable */
unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
*pm );
/* Returns true if 'uddc' command line parameter is valid */
unsigned short int words_check_uddc( char *clarg_uddc, struct
words_args *pu );
/* Returns true if file was closed successfully */
unsigned short int words_close_file( FILE *fp );
/* Prints to file stream, the values returned by 'words()' */
void words_print_results( char *fn, struct words_args *ps, FILE *stream
);

void words_print_results( char *fn, struct words_args *ps, FILE *stream
) {
fprintf( stream, "\n\nFile: %s\n\tTotal characters = %u\n\tTotal "
"characters making up 'words' = %u\n\tTotal words = %u\n\t"
"Average word length, (in characters) = %u\nFile size = "
"%u characters x %u bytes per character = %u bytes.\n", fn,
*ps->tchars, *ps->wchars, *ps->nwords, *ps->awl, *ps->tchars,
sizeof(char), ((*ps->tchars) * sizeof(char)) );
return;
}

unsigned short int words_open_file
( char *clarg_filename, const char *mode, struct words_args *pf ) {
if( (pf->fp = fopen(clarg_filename, mode)) == NULL ) {
fprintf( stderr, "\nFile: %s\n\tOpen failed. (Mode "
"'%s').\n", clarg_filename, mode );
return 0;
}
else
return 1;
}

unsigned short int words_close_file( FILE *fp ) {
int rv;

rv = fclose(fp);
if( rv == 0 )
return 1;
if( rv == EOF ) {
fprintf( stderr, "\nwords_close_file(): Attempt to close file"
" failed.\n" );
return 0;
}
else {
fprintf( stderr, "\nwords_close_file(): Attempt to close file"
" failed.\n" );
return 0;
}
}

unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
*pm ) {
int sscanf_rv;
char *str = NULL;

/* Check if string contains only digit characters */
for(str = clarg_mwl; *str != '\0'; ++str) {
/* If not space and digit char signal error */
if( !isdigit((int) *str) && !isspace((int) *str) ){
fprintf( stderr, "\nwords_conv_mwl(): Bad parameter"
".\n\tParameter is: %s\n.", clarg_mwl );
return 0;
}
}

/* Attempt to convert string and assign to structure member */
sscanf_rv = sscanf(clarg_mwl, "%u", &pm->mwl);
if( (sscanf_rv == 0) || (sscanf_rv == EOF) ) {
fprintf( stderr, "\nwords_conv_mwl(): sscanf() returned 0 or"
" EOF.\n\tParameter is: %s\n", clarg_mwl );
return 0;
}
else {
if( sscanf_rv == 1 )
return 1;
else {
fprintf( stderr, "\nwords_conv_mwl(): sscanf() "
"returned %d.\n\tParameter is: %s\n.",
sscanf_rv, clarg_mwl );
return 0;
}
}
}

unsigned short int words_check_uddc( char *clarg_uddc, struct
words_args *pu ) {
char *cla_uddc = clarg_uddc, ch;
size_t ctr;

for(ctr = 0; *(cla_uddc + ctr) != '\0'; ++ctr) {
if( isspace((int) *(cla_uddc + ctr)) )
continue;
else {
ch = *(cla_uddc + ctr);
if( (ch == 'u') || (ch == 'U') ) {
pu->uddc = 'u';
return 1;
}
else {
fprintf( stderr, "\nwords_check_uddc(): "
"Invalid parameter\n\t: %s\n",
clarg_uddc );
return 0;
}
}
}
return 0;
}



int main( int argc, char *argv[] ) {
int words_rv = 0;
struct words_args args, *pargs = NULL, *pmemb = NULL;
size_t no_words = 0, no_tchars = 0, no_wchars = 0, avgwl = 0;
const char *words_usage = "\nwords - Data about words and characters "
"in a text file.\n\nUsage - words filename [mwl] [delchars] "
"[uddc]\nOptions within square brackets are optional.\n\n"
"filename - Relative or absolute path name of file to be "
"examined.\nmwl - Minimum length, (in characters), a 'word' "
"must have to be counted.\ndelchars - One or more characters "
"to be added to the default set of\nword delimiting characters"
".\nuddc - If this is 'u' or 'U', the default, builtin set of"
" delimiting characters\nwill be ignored, and the one "
"specified on the command line will be used.\n";

if( argc < 2 ) {
fprintf( stderr, words_usage ); /* Print usage message */
exit(EXIT_FAILURE);
}

pargs = &args; /* Initialise pointer to struct and member pointers */
args.nwords = &no_words;
args.tchars = &no_tchars;
args.wchars = &no_wchars;
args.awl = &avgwl;

if( argc == 2 ) { /* Only 'filename' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
else {
args.mwl = 1; /* Default value */
args.delchars = NULL; /* Default value */
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}
}

if( argc == 3 ) { /* 'filename' and 'mwl' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);

args.delchars = NULL; /* Default value */
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

if( argc == 4 ) { /* 'filename', 'mwl' and 'delchars' are specified */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);

/* Assign 'delchars' CL argument string to corresp. pointer */
args.delchars = argv[3];
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

if( argc == 5 ) { /* 'filename', 'mwl', 'delchars' & 'uddc' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);
/* Assign 'delchars' CL argument string to corresp. pointer */
args.delchars = argv[3];
/* Verify the last, i.e. the 'uddc', CL argument */
if( !words_check_uddc(argv[4], pmemb) )
exit(EXIT_FAILURE);

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

/* Execution will reach here only if 'argc' and thus the command line
* contains excess arguments. 'argc' should be at most 5. For now we
* print a message and exit. Later we can modify the above if
statement
* to accept 5 arguments and ignore the rest.
*/
if( argc > 5 ) {
fprintf( stderr, "\nWarning: Only four command line arguments "
"are supported.\n" );
fprintf( stderr, words_usage );
exit(EXIT_FAILURE);
}
}
/*
----------------------------------------------------------------------------
* END OF main()
*
----------------------------------------------------------------------------
*/

int words( struct words_args *args ) {
int nc;
size_t dss = 0, cwl = 0;
const char *ddcs = " \n\t\r\f\v,"; /* Default delimiting characters */
char *ds = NULL;

/* Arguments verification */
if( (args->delchars == NULL) && (args->uddc != 0) )
return 0;
if( args->fp == NULL )
return 0;
if( (args->nwords == NULL) || (args->tchars == NULL) )
return 0;
if( (args->wchars == NULL) || (args->awl == NULL) )
return 0;

/* If the custom delimiters string is not null, we verify that it ends

* with a null character, and if so, based on whether 'args->uddc' is
* 'U'/'u' or 0, we set the delimiters string 'ds' to point to the
* custom delimiters string exclusively in the former case or allocate
* space on the heap and by means of strcpy() and strcat(), build a
* unified delimiters string, consisting of both the default
delimiters
* 'ddcs' and custom delimiters, in the latter case.
*/
if( args->delchars != NULL ) {
if( strchr(args->delchars, '\0') != NULL ) {
if( args->uddc == 0 ) {
dss = (strlen(ddcs)+strlen(args->delchars)+1);
ds = malloc(dss);
if( ds == NULL )
return 0;
else {
if( strcpy(ds, ddcs) != ds )
return 0;
if( strcat(ds, args->delchars) != ds )
return 0;
}
}
else {
if((args->uddc == 'u') || (args->uddc == 'U'))
ds = args->delchars;
else
return 0;
}
}
else
return 0;
}
else {
if( args->delchars == NULL )
ds = ddcs;
else
return 0;
}

/* If 'mwl' is zero, replace with one. */
if( args->mwl == 0 )
args->mwl = 1;

/* The actual word counting code... */
while( (nc = fgetc(args->fp)) != EOF ) {
if( strchr(ds, nc) != NULL ) {
if( cwl >= args->mwl ) {
++(*args->nwords); /* Increment word count */
++(*args->tchars); /* Inc. total char count */
cwl = 0; /* Reset current word length count */
continue;
}
else {
++(*args->tchars);
cwl = 0;
continue;
}
}
else {
++cwl; /* Increment the current world length counter */
++(*args->tchars); /* Increment total char count */
++(*args->wchars); /* Inc. the words chars count */
continue;
}
}

/* File read returned EOF. If it's file's end, then calculate
* average word length and return successfully. If it's due to a read
* error or otherwise, return failure.
*/
if( feof(args->fp) ) {
/* Average word length is total characters making up words
* divided by the number of words
*/
*args->awl = *args->wchars / *args->nwords;
return 1;
}
else {
if( ferror(args->fp) )
return 0;
else
return 0;
}
}
/* ------------------------------------ */
/* END OF words.c */
/* ------------------------------------ */

 
Reply With Quote
 
 
 
 
boa
Guest
Posts: n/a
 
      01-19-2006
santosh wrote:
> Hello all,
>
> I've put together a small program to count the number of characters and
> 'words' in a text file. The minimum length of a word, (in terms of no.
> of characters), as well as word delimiting characters can be specified
> on the command line. The default delimiting characters built into the
> program are space, newline, tab, carriage return, form feed, vertical
> tab, comma and null. If a 'u' or 'U' is specified as the last command
> line argument, this default set is ignored and the characters in the
> penultimate command line argument are used.
>
> Now, the problem is that the program is terminated by the OS when the
> 'minimum word length' argument, (the second command line argument after
> the filename), exceeds a certain value which seems to vary from file to
> file. As far as I can figure it out, the word counting code, in
> function words() shouldn't simply crash.
>
> Can anyone spot any logical mistake or other dubious calculation which
> might cause this behaviour?


If 'minimum word length' is set very high, higher than the max length of
any word in the input, you get a divide by zero in words(), approx at
line 411.

/* Average word length is total characters making up words
* divided by the number of words
*/
*args->awl = *args->wchars / *args->nwords;


HTH
boa

 
Reply With Quote
 
 
 
 
Richard Heathfield
Guest
Posts: n/a
 
      01-19-2006
santosh said:

> Can anyone spot any logical mistake or other dubious calculation which
> might cause this behaviour?


*args->awl = *args->wchars / *args->nwords;

*args->nwords is 0. Oops.

Your program is very complicated, considering its simple task. Why, for
example, are these struct members pointers?

size_t *nwords;
size_t *tchars;
size_t *wchars;
size_t *awl;

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
 
Reply With Quote
 
santosh
Guest
Posts: n/a
 
      01-19-2006
Richard Heathfield wrote:
> santosh said:
>
> > Can anyone spot any logical mistake or other dubious calculation which
> > might cause this behaviour?

>
> *args->awl = *args->wchars / *args->nwords;
>
> *args->nwords is 0. Oops.


Yes, should've spotted that. Thanks.

> Your program is very complicated, considering its simple task. Why, for
> example, are these struct members pointers?
>
> size_t *nwords;
> size_t *tchars;
> size_t *wchars;
> size_t *awl;


Well, originally I planned on passing the structure by value with the
above pointers pointing to the appropriate variables in the caller, but
later changed words() to accept a pointer to struct instead and forgot
to change the above pointers to variables. I will do that now.

Most of the code in the program is error checking at every turn,
otherwise bad CL arguments will cause further misbehaviour. The actual
word counting function seems simple enough to me, though as you point
out above, I can avoid the constant indirections.

 
Reply With Quote
 
Richard Heathfield
Guest
Posts: n/a
 
      01-19-2006
santosh said:

> Well, originally I planned on passing the structure by value with the
> above pointers pointing to the appropriate variables in the caller,


Just as a rule of thumb, it's generally (i.e. practically always!) best to
pass the address of the structure (as you have now chosen to do). If you
don't want the called function to modify the structure whose address is
passed, pass it as const struct T *p rather than struct T *p.


--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
 
Reply With Quote
 
santosh
Guest
Posts: n/a
 
      01-20-2006
Richard Heathfield wrote:
> Just as a rule of thumb, it's generally (i.e. practically always!) best to
> pass the address of the structure (as you have now chosen to do).


Yes, I guess passing a copy of a structure is required far less often
than by reference.

> If you don't want the called function to modify the structure whose address is
> passed, pass it as const struct T *p rather than struct T *p.


Okay, but the callee can make a copy of the const struct T *p and use
that.

An array name is also a const pointer, but modification is allowed in
that case...

 
Reply With Quote
 
Richard Heathfield
Guest
Posts: n/a
 
      01-20-2006
santosh said:

> Richard Heathfield wrote:
>> If you don't want the called function to modify the structure whose
>> address is passed, pass it as const struct T *p rather than struct T *p.

>
> Okay, but the callee can make a copy of the const struct T *p and use
> that.


Sure, but adding const ensures that the callee can't change the struct's
contents itself.

> An array name is also a const pointer,


No, it isn't.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
 
Reply With Quote
 
santosh
Guest
Posts: n/a
 
      01-20-2006
Richard Heathfield wrote:
> santosh said:
> > An array name is also a const pointer,

>
> No, it isn't.


Sorry about that. I'm still quite new to C.
I should have said it's a pointer constant, not a const pointer.

 
Reply With Quote
 
Richard Heathfield
Guest
Posts: n/a
 
      01-20-2006
santosh said:

> Richard Heathfield wrote:
>> santosh said:
>> > An array name is also a const pointer,

>>
>> No, it isn't.

>
> Sorry about that. I'm still quite new to C.
> I should have said it's a pointer constant, not a const pointer.


It isn't even a pointer constant. It's an array name. When used in a value
context, however, we must apply what Chris Torek calls "The Rule":

A[i] = *(A + i) by definition, from the Standard

=> &A[i] = &*(A + i)

=> &A[i] = (A + i)

=> &A[0] = (A + 0)

=> &A[0] = A

(i.e. the name of an array, used in a value context, decays to a pointer to
the array's first element).

If this is what you meant, you are correct. Otherwise, you are not correct.

--
Richard Heathfield
"Usenet is a strange place" - dmr 29/7/1999
http://www.cpax.org.uk
email: rjh at above domain (but drop the www, obviously)
 
Reply With Quote
 
Chris Torek
Guest
Posts: n/a
 
      01-20-2006
>>santosh said:
>>> An array name is also a const pointer,


>Richard Heathfield wrote:
>> No, it isn't.


In article <. com>
santosh <> writes:
>Sorry about that. I'm still quite new to C.
>I should have said it's a pointer constant, not a const pointer.


Even this is not quite right, for two reasons. One is somewhat
minor, but still significant: it is not (or at least not necessarily)
a *constant*:

#include <stdio.h>
void f(void) {
char a[10];
printf("%p\n", (void *)a);
}
void g(void) {
f();
}
int main(void) {
f();
g();
return 0;
}

This will often print two different numbers (although some systems
will produce just one, in which case "a" does appear to be a constant
after all). Note, however, that if we make "a" static, it is quite
likely to produce the same number for both printf()s (since the
output from %p is implementation defined, we cannot predict with
100% certainty that it *will* be the same number, though).

The more important reason not to say that an array "is" a pointer
is because an array is *not* a pointer. An array is an array, and
a pointer is a pointer. An array name used where a value is needed
*becomes* a pointer, but this pointer is *computed* (at compile
and/or run time as appropriate) by the compiler, in exactly the
same way that the address of any other object is computed.

See also <http://c-faq.com/aryptr/index.html> (all of section 6!).
--
In-Real-Life: Chris Torek, Wind River Systems
Salt Lake City, UT, USA (40°39.22'N, 111°50.29'W) +1 801 277 2603
email: forget about it http://web.torek.net/torek/index.html
Reading email is like searching for food in the garbage, thanks to spammers.
 
Reply With Quote
 
 
 
Reply

Thread Tools

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are Off


Similar Threads
Thread Thread Starter Forum Replies Last Post
Help! Can't identify program that resullts in Win 7 on starting to display a message re opening mystery "Program" Seagull NZ Computing 4 12-27-2011 03:46 AM
Looking for a module/program author, or help with program... Kris Stark Perl Misc 1 09-08-2005 01:47 AM
Help !I want to write a program to count the running time of another program freehomesp@yahoo.com.cn C Programming 1 08-12-2005 06:13 AM
Calling Java program in another Java program Rey Java 4 12-12-2003 10:18 PM
passing data between Java program and C program--help pipi Java 1 07-21-2003 05:02 AM



Advertisments
 



1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57