/* $Id: mstring.c,v 1.5 2003/05/03 03:20:54 andrewbaker Exp $ */
/*
** Copyright (C) 1998-2001 Martin Roesch <roesch@clark.net>
**
** This program is distributed under the terms of version 1.0 of the 
** Q Public License.  See LICENSE.QPL for further details.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**
*/

/***************************************************************************
 *
 * File: MSTRING.C
 *
 * Purpose: Provide a variety of string functions not included in libc.  Makes
 *          up for the fact that the libstdc++ is hard to get reference
 *          material on and I don't want to write any more non-portable c++
 *          code until I have solid references and libraries to use.
 *
 * History:
 *
 * Date:      Author:  Notes:
 * ---------- ------- ----------------------------------------------
 *  08/19/98    MFR    Initial coding begun
 *  03/06/99    MFR    Added Boyer-Moore pattern match routine, don't use
 *                     mContainsSubstr() any more if you don't have to
 *  12/31/99	JGW    Added a full Boyer-Moore implementation to increase
 *                     performance. Added a case insensitive version of mSearch
 *
 **************************************************************************/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

/****************************************************************
 *
 *  Function: mSplit()
 *
 *  Purpose: Splits a string into tokens non-destructively.
 *
 *  Parameters:
 *      char *str => the string to be split
 *      char *sep => a string of token seperaters
 *      int max_strs => how many tokens should be returned
 *      int *toks => place to store the number of tokens found in str
 *      char meta => the "escape metacharacter", treat the character
 *                   after this character as a literal and "escape" a
 *                   seperator
 *
 *  Returns:
 *      2D char array with one token per "row" of the returned
 *      array.
 *
 ****************************************************************/
char **mSplit(char *str, char *sep, int max_strs, int *toks, char meta)
{
    char **retstr;      /* 2D array which is returned to caller */
    char *idx;          /* index pointer into str */
    char *end;          /* ptr to end of str */
    char *sep_end;      /* ptr to end of seperator string */
    char *sep_idx;      /* index ptr into seperator string */
    int len = 0;        /* length of current token string */
    int curr_str = 0;       /* current index into the 2D return array */
    char last_char = 0xFF;

#ifdef DEBUG
    printf("[*] Splitting string: %s\n", str);
    printf("curr_str = %d\n", curr_str);
#endif

    /*
     * find the ends of the respective passed strings so our while() loops
     * know where to stop
     */
    sep_end = sep + strlen(sep);
    end = str + strlen(str);

    /* remove trailing whitespace */
    while(isspace((int) *(end - 1)) && ((end - 1) >= str))
        *(--end) = '\0';    /* -1 because of NULL */

    /* set our indexing pointers */
    sep_idx = sep;
    idx = str;

    /*
     * alloc space for the return string, this is where the pointers to the
     * tokens will be stored
     */
    retstr = (char **) malloc((sizeof(char **) * max_strs));

    max_strs--;

#ifdef DEBUG
    printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str);
#endif

    /* loop thru each letter in the string being tokenized */
    while(idx < end)
    {
        /* loop thru each seperator string char */
        while(sep_idx < sep_end)
        {
            /*
             * if the current string-indexed char matches the current
             * seperator char...
             */
            if((*idx == *sep_idx) && (last_char != meta))
            {
                /* if there's something to store... */
                if(len > 0)
                {
#ifdef DEBUG
                    printf("Allocating %d bytes for token ", len + 1);
                    fflush(stdout);
#endif
                    if(curr_str <= max_strs)
                    {
                        /* allocate space for the new token */
                        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1);

                        /* make sure we got a good allocation */
                        if(retstr[curr_str] == NULL)
                        {
                            fprintf(stderr, "msplit() got NULL substring malloc!\n");
                            exit(1);
                        }
                        /* copy the token into the return string array */
                        memcpy(retstr[curr_str], (idx - len), len);
                        retstr[curr_str][len] = 0;
#ifdef DEBUG
                        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]);
                        fflush(stdout);
#endif
                        /* twiddle the necessary pointers and vars */
                        len = 0;
                        curr_str++;
#ifdef DEBUG
                        printf("curr_str = %d\n", curr_str);
                        printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str);
#endif
                        last_char = *idx;
                        idx++;
                    }
#ifdef DEBUG
                    printf("Checking if curr_str (%d) >= max_strs (%d)\n", curr_str, max_strs);
#endif

                    /*
                     * if we've gotten all the tokens requested, return the
                     * list
                     */
                    if(curr_str >= max_strs)
                    {
                        while(isspace((int) *idx))
                            idx++;

                        len = end - idx;
#ifdef DEBUG
                        printf("Finishing up...\n");
                        printf("Allocating %d bytes for last token ", len + 1);
                        fflush(stdout);
#endif
                        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1);

                        if(retstr[curr_str] == NULL)
                            printf("Got NULL back from substr malloc\n");

                        memcpy(retstr[curr_str], idx, len);
                        retstr[curr_str][len] = 0;

#ifdef DEBUG
                        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]);
                        fflush(stdout);
#endif

                        *toks = curr_str + 1;
#ifdef DEBUG
                        printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str);
                        printf("mSplit got %d tokens!\n", *toks);
                        fflush(stdout);
#endif
                        return retstr;
                    }
                }
                else
                /*
                 * otherwise, the previous char was a seperator as well,
                 * and we should just continue
                 */
                {
                    last_char = *idx;
                    idx++;
                    /* make sure to reset this so we test all the sep. chars */
                    sep_idx = sep;
                    len = 0;
                }
            }
            else
            {
                /* go to the next seperator */
                sep_idx++;
            }
        }

        sep_idx = sep;
        len++;
        last_char = *idx;
        idx++;
    }

    /* put the last string into the list */

    if(len > 0)
    {
#ifdef DEBUG
        printf("Allocating %d bytes for last token ", len + 1);
        fflush(stdout);
#endif

        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1);

        if(retstr[curr_str] == NULL)
            printf("Got NULL back from substr malloc\n");

        memcpy(retstr[curr_str], (idx - len), len);
        retstr[curr_str][len] = 0;

#ifdef DEBUG
        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]);
        fflush(stdout);
#endif
        *toks = curr_str + 1;
    }
#ifdef DEBUG
    printf("mSplit got %d tokens!\n", *toks);
    fflush(stdout);
#endif

    /* return the token list */
    return retstr;
}

