lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:   Tue, 1 Nov 2022 14:46:10 +0530
From:   Amit <amitchoudhary0523@...il.com>
To:     linux-kernel@...r.kernel.org
Subject: String functions in C language that are not present in standard C
 library - version 1.

String functions in C language that are not present in standard C
library - version 1.

Functions implemented are:
-------------------------------------

get_input_from_stdin_and_discard_extra_characters()
str_is_null_or_empty()
str_is_whitespaces()
str_is_null_or_empty_or_whitespaces()
str_is_integer()
str_starts_with()
str_ends_with()
str_join()
substr()
str_split()
get_number_of_strings_in_strings_array()
print_strings_array()
free_strings_array()

Functions to implement are:
-------------------------------------

str_is_float()
str_is_ascii()
str_strip()
str_strip_leading()
str_strip_trailing()
str_trim()
str_trim_leading()
str_trim_trailing()
str_compare_range()
str_hash_code()
str_remove_leading_characters()
str_remove trailing_characters()
str_r_str()
str_to_lower_case()
str_to_upper_case()
strtok_new()
str_left_pad(char, count)
str_right_pad(char, count)
str_replace_str()
str_replace_chr()
str_insert_str()
str_insert_chr()
str_remove_str()
str_remove_chr()
str_repeat(int count)
str_shuffle()
str_format_in_usa_currency()
get_file_name_from_path()
get_parent_directory_of()

// regex functions
str_match_regex()
str_starts_with_regex()
str_ends_with_regex()
str_replace_regex()
str_remove_regex()
str_split_on_regex_delim()

The code is below:
-------------------------

--------------------
string_library.c
--------------------

/*
 * License: This file has been released under APACHE LICENSE, VERSION 2.0.
 * The license details can be found here:
 *                            https://www.apache.org/licenses/LICENSE-2.0
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>

#include "string_library.h"

/* The description of functions in this file is in the header file
 * "string_library.h".
 */

static void free_all_allocated_memory(char **strings_array, long n);
static char **transform_str_to_string_array(const char *str);

char *get_input_from_stdin_and_discard_extra_characters(char *str, long size)
{

    int c = 0;
    long i = 0;

    // If 'size' is 0 then this function will discard all input and return NULL.
    // No need to check 'str' if 'size' is 0.
    if (size == 0) {
        // discard all input
        while ((c = getchar()) && (c != '\n') && (c != EOF));
        return NULL;
    }

    if (!str)
        return str;

    if (size < 0)
        return NULL;

    for (i = 0; i < (size - 1); i = i + 1) {

        c = getchar();

        if ((c == '\n') || (c == EOF)) {
            str[i] = 0;
            return str;
        }

        str[i] = (char)(c);

    } // end of for loop

    str[i] = 0;

    // discard rest of input
    while ((c = getchar()) && (c != '\n') && (c != EOF));

    return str;

} // end of get_input_from_stdin_and_discard_extra_characters

int str_is_null_or_empty(const char *str)
{

    if ((!str) || (!*str))
        return STR_LIB_TRUE;

    return STR_LIB_FALSE;

} // end of str_is_null_or_empty

int str_is_whitespaces(const char *str)
{

    long i = 0;

    if (!str)
        return STR_LIB_ALL_STRING_ARGS_ARE_NULL;

    while (str[i]) {
        if (!isspace(str[i]))
            return STR_LIB_FALSE;
        i++;
    }

    return STR_LIB_TRUE;

} // end of str_is_whitespaces

int str_is_null_or_empty_or_whitespaces(const char *str)
{

    if ((!str) || (!*str))
        return STR_LIB_TRUE;

    return str_is_whitespaces(str);

} // end of str_is_null_or_empty_or_whitespaces

int str_is_integer(const char *str)
{

    long i = 0;

    if (!str)
        return STR_LIB_ALL_STRING_ARGS_ARE_NULL;

    while (str[i]) {
        if (!isdigit(str[i]))
            return STR_LIB_FALSE;
        i++;
    }

    return STR_LIB_TRUE;

} // end of str_is_integer

int str_starts_with(const char *str, const char *prefix,
                    long num_chars_to_compare)
{

    if ((!str) && (!prefix))
        return STR_LIB_ALL_STRING_ARGS_ARE_NULL;

    if ((!str) || (!prefix))
        return STR_LIB_FALSE;

    if (num_chars_to_compare < 0)
        return STR_LIB_AT_LEAST_ONE_ARG_IS_INVALID;

    if ((long)(strlen(str)) < num_chars_to_compare)
        return STR_LIB_ARGS_NOT_CONSISTENT_WITH_EACH_OTHER;

    if ((long)(strlen(prefix)) < num_chars_to_compare)
        return STR_LIB_ARGS_NOT_CONSISTENT_WITH_EACH_OTHER;

    if (strncmp(str, prefix, (size_t)(num_chars_to_compare)) == 0)
        return STR_LIB_TRUE;

    return STR_LIB_FALSE;

} // end of str_starts_with

int str_ends_with(const char *str, const char *suffix,
                  long num_chars_to_compare)
{

    long len_str = 0;
    long len_suffix = 0;

    if ((!str) && (!suffix))
        return STR_LIB_ALL_STRING_ARGS_ARE_NULL;

    if ((!str) || (!suffix))
        return STR_LIB_FALSE;

    if (num_chars_to_compare < 0)
        return STR_LIB_AT_LEAST_ONE_ARG_IS_INVALID;

    len_str = (long)(strlen(str));
    len_suffix = (long)(strlen(suffix));

    if (len_str < num_chars_to_compare)
        return STR_LIB_ARGS_NOT_CONSISTENT_WITH_EACH_OTHER;

    if (len_suffix < num_chars_to_compare)
        return STR_LIB_ARGS_NOT_CONSISTENT_WITH_EACH_OTHER;

    if (strncmp(str + len_str - num_chars_to_compare,
                suffix + len_suffix - num_chars_to_compare,
                (size_t)(num_chars_to_compare)) == 0)
        return STR_LIB_TRUE;

    return STR_LIB_FALSE;

} // end of str_ends_with

char *str_join(unsigned int skip_null_and_empty_input_strings,
               const char *delim, long num_args, ...)
{

    va_list valist;
    long i = 0;
    size_t iica = 0; // iica - index into character array
    size_t len = 0;
    size_t delim_len = 0;
    size_t total_len = 0;
    long num_delim_to_concat = -1;
    char *new_char_array = NULL;
    char *temp = NULL;

    if (num_args <= 0)
        return NULL;

    if (delim) {
        delim_len = strlen(delim);
    }

    va_start(valist, num_args);
    for (i = 0; i < num_args; i++) {

        temp = va_arg(valist, char *);

        if (skip_null_and_empty_input_strings) {
            if ((!temp) || (!*temp))
                continue;
        }

        if ((!temp) || (!*temp))
            len = 0;
        else
            len = strlen(temp);

        total_len = total_len + len;
        num_delim_to_concat = num_delim_to_concat + 1;
        if (num_delim_to_concat > 0)
            total_len = total_len + delim_len;

    }
    va_end(valist);

    if (total_len == 0)
        return NULL;

    total_len = total_len + 1; // 1 extra for terminating null byte

    new_char_array = malloc(total_len);
    if (!new_char_array)
        return NULL;

    va_start(valist, num_args);
    for (i = 0; i < num_args; i++) {

        temp = va_arg(valist, char *);

        if (skip_null_and_empty_input_strings) {
            if ((!temp) || (!*temp))
                continue;
        }

        if ((!temp) || (!*temp))
            len = 0;
        else
            len = strlen(temp);

        memmove(&(new_char_array[iica]), temp, len);
        iica = iica + len;

        if (num_delim_to_concat > 0) {
            memmove(&(new_char_array[iica]), delim, delim_len);
            iica = iica + delim_len;
            num_delim_to_concat = num_delim_to_concat - 1;
        }

    }
    va_end(valist);

    new_char_array[iica] = 0;

    return new_char_array;

} // end of str_join

char *substr(const char *str, long start_index, long end_index)
{

    char *substring = NULL;
    long len = 0;
    long substr_len = 0;

    if ((!str) || (!*str))
        return NULL;

    if ((start_index < 0) || (end_index < 0) || (end_index < start_index))
        return NULL;

    len = (long)(strlen(str));

    if ((start_index > (len - 1)) || (end_index > (len - 1)))
        return NULL;

    substr_len = end_index - start_index + 1;

    substring = malloc((size_t)(substr_len + 1)); // extra 1 byte for null byte
    if (!substring)
        return NULL;

    memmove(substring, str + start_index, (size_t)(substr_len));
    substring[substr_len] = 0;

    return substring;

} // end of substr

char **str_split(const char *str, const char *delim, long max_splits)
{

    char **output_strings_array = NULL;
    char *temp = NULL;
    char *prev_temp = NULL;
    long num_tokens = 0;
    size_t delim_len = 0;
    size_t len = 0;
    long i = 0;

    if ((!str) || (!*str))
        return NULL;

    if ((!delim) || (!*delim))
        return transform_str_to_string_array(str);

    if (max_splits == 0)
        return transform_str_to_string_array(str);

    // handle special case where delim does not occur in str
    if (strstr(str, delim) == NULL)
        return transform_str_to_string_array(str);

    delim_len = strlen(delim);

    temp = (char *)(str);
    prev_temp = (char *)(str);

    while (1) {

        temp = strstr(temp, delim);

        num_tokens = num_tokens + 1;

        if (!temp)
            break;

        temp = temp + delim_len;
        prev_temp = temp;

    } // end of while loop

    if ((max_splits > 0) && (max_splits < num_tokens))
        num_tokens = max_splits + 1;

    // allocate 1 extra character pointer to terminate output_strings_array with
    // a NULL pointer.
    output_strings_array = calloc((size_t)(num_tokens) + 1,
                                  (sizeof(*output_strings_array)));
    if (!output_strings_array)
        return NULL;

    temp = (char *)(str);
    prev_temp = (char *)(str);
    i = 0;

    while (1) {

        temp = strstr(temp, delim);

        len = (size_t)(temp - prev_temp);

        // allocate 1 extra byte for null terminator
        output_strings_array[i] = malloc(len + 1);
        if (!output_strings_array[i]) {
            free_all_allocated_memory(output_strings_array, i);
            return NULL;
        }

        memmove(output_strings_array[i], prev_temp, len);
        (output_strings_array[i])[len] = 0;
        i = i + 1;

        temp = temp + delim_len;
        prev_temp = temp;

        if ((num_tokens - i) == 1) { // last token

            len = (size_t)(str + strlen(str) - prev_temp);

            // allocate 1 extra byte for null terminator
            output_strings_array[i] = malloc(len + 1);
            if (!output_strings_array[i]) {
                free_all_allocated_memory(output_strings_array, i);
                return NULL;
            }

            memmove(output_strings_array[i], prev_temp, len);
            (output_strings_array[i])[len] = 0;
            i = i + 1;

            break;

        } // end of if ((num_tokens - i) == 1)

    } // end of while loop

    output_strings_array[i] = 0;

    return output_strings_array;

} // end of str_split

/*
 * static char **transform_str_to_string_array(const char *str):
 *
 * Function transform_str_to_string_array() basically allocates a
 * pointer to pointer to character (means a pointer to an array of
 * strings/elements). This array of strings have two elements - the first
 * element is a pointer to a copy of 'str' and the second element is a NULL
 * pointer/string/element.
 *
 * This is a static function and this function should not be called from outside
 * this file.
 */
static char **transform_str_to_string_array(const char *str)
{

    char **output_strings_array = NULL;
    size_t num_tokens = 1;
    size_t len = strlen(str);

    // allocate 1 extra character pointer to terminate output_strings_array with
    // a NULL pointer.
    output_strings_array = calloc(num_tokens + 1,
                                  (sizeof(*output_strings_array)));
    if (!output_strings_array)
        return NULL;

    // allocate 1 extra byte for null terminator
    output_strings_array[0] = malloc(len + 1);
    if (!output_strings_array[0]) {
        free(output_strings_array);
        return NULL;
    }

    memmove(output_strings_array[0], str, len);
    (output_strings_array[0])[len] = 0;

    output_strings_array[num_tokens] = 0;

    return output_strings_array;

} // end of transform_str_to_string_array

/*
 * static void free_all_allocated_memory(char **strings_array, long n):
 *
 * Function free_all_allocated_memory() frees all elements of the array of
 * strings that is passed to this function. It also frees the pointer to the
 * array of strings ('strings_array').
 *
 * This is a static function and this function should not be called from outside
 * this file.
 */
static void free_all_allocated_memory(char **strings_array, long n)
{

    long i = 0;

    if (!strings_array)
        return;

    for (i = 0; i < n; i = i + 1) {
        free(strings_array[i]);
    }

    free(strings_array);

} // end of free_all_allocated_memory

long get_number_of_strings_in_strings_array(const char **strings_array)
{

    long i = 0;

    if (!strings_array)
        return 0;

    while (strings_array[i]) {
        i = i + 1;
    }

    return i;

} // end of get_number_of_strings_in_strings_array

void print_strings_array(const char **strings_array)
{

    long i = 0;

    printf("Tokens are printed below (within single quotes):\n\n");
    printf("---- Start of Tokens ----\n");

    if (!strings_array) {
        printf("---- End of Tokens ----\n\n");
        return;
    }

    while (strings_array[i]) {
        printf("'%s'\n", strings_array[i]);
        i = i + 1;
    }

    printf("---- End of Tokens ----\n\n");

} // end of print_strings_array

void free_strings_array(char **strings_array)
{

    long i = 0;

    if (!strings_array)
        return;

    while (strings_array[i]) {
        free(strings_array[i]);
        i = i + 1;
    }

    free(strings_array);

} // end of free_strings_array

--------------------
string_library.h
--------------------

/*
 * License: This file has been released under APACHE LICENSE, VERSION 2.0.
 * The license details can be found here:
 *                            https://www.apache.org/licenses/LICENSE-2.0
 */

#ifndef _STRING_LIBRARY_H_
#define _STRING_LIBRARY_H_

#define STR_LIB_TRUE 1
#define STR_LIB_FALSE 0
#define STR_LIB_ALL_STRING_ARGS_ARE_NULL -1
#define STR_LIB_AT_LEAST_ONE_ARG_IS_INVALID -2
#define STR_LIB_ARGS_NOT_CONSISTENT_WITH_EACH_OTHER -3

/*
 * get_input_from_stdin_and_discard_extra_characters(char *str, long size):
 *
 * Function get_input_from_stdin_and_discard_extra_characters() reads at most
 * 'size - 1' characters into 'str' from stdin and then appends the null
 * character ('\0'). If 'size' is 0 then this function will discard all input
 * and return NULL. So, to discard all input, this function can be called with
 * 'str' having value NULL and 'size' having value 0.
 *
 * In all cases, reading input stops after encountering a newline ('\n') or EOF
 * even if 'size - 1' characters have not been read. If a newline ('\n') or EOF
 * is read then it is replaced by null character ('\0'). If there are extra
 * characters in input, they are read and discarded.
 * In all cases, 'str' or NULL is returned.
 */
char *get_input_from_stdin_and_discard_extra_characters(char *str, long size);

/*
 * char **str_split(const char *str, const char *delim, long max_splits):
 *
 * Function str_split() splits a string ('str') into tokens. It uses the 'delim'
 * string to split 'str' into tokens. If a 'delim' is found at position "i",
 * then the token ends at position "i - 1".
 *
 * If there are "n" 'delim' in 'str' then "n + 1" tokens are generated/returned.
 * However, some or all of these tokens may be empty strings. For example, if
 * 'str' contains only a single 'delim' then two empty tokens are generated.
 *
 * The reason that empty tokens are returned is that some users may want empty
 * tokens. One use case is that, if they are splitting records from a file to
 * insert in a database, then when an empty token is found, then they can insert
 * NULL value or 0 or empty string, etc. in that column.
 *
 * Users who don't want empty tokens can skip them by testing which token is
 * empty and which is not.
 *
 * The return value of this function is a pointer to pointer to character (means
 * a pointer to an array of strings/elements). This array of strings is
 * terminated by a NULL pointer/string/element which means that the last element
 * in this strings of array is a NULL pointer/string. So, you can loop through
 * this array of strings until you get a NULL pointer/string.
 *
 * The code of looping through this array of strings is:
 *
 *          long i = 0;
 *          while (strings_array[i]) {
 *              ..do stuff here..
 *              i = i + 1;
 *          }
 *
 * The above can be achieved using a for loop also:
 *
 *          long i = 0;
 *          for (i = 0; strings_array[i]; i = i + 1) {
 *              ..do stuff here..
 *          }
 *
 * If you want to skip the empty tokens then the following would be the code for
 * looping through this array of strings:
 *
 *          long i = 0;
 *          while (strings_array[i]) {
 *              if (!*(strings_array[i])) {
 *                  i = i + 1;
 *                  continue;
 *              }
 *              ..do stuff here..
 *              i = i + 1;
 *          }
 *
 * The above can be achieved using a for loop also:
 *
 *          long i = 0;
 *          for (i = 0; strings_array[i]; i = i + 1) {
 *              if (!*(strings_array[i]))
 *                  continue;
 *              ..do stuff here..
 *          }
 *
 *
 * If 'str' is NULL or empty then NULL is returned. NULL is also returned if
 * memory was not available. To find out what exactly happened, the user can
 * check whether 'str' is NULL or empty. In case, 'str' is neither NULL nor
 * empty then it means that memory was not available.
 *
 * 'max_splits' argument is used to control hwo many times 'str' should be
 * split. If 'max_splits' is less than the number of tokens that would be
 * ideally generated then the number of tokens is reduced to "max_splits + 1".
 * If max_splits is negative then it means that all tokens should be returned.
 *
 * If 'max_splits' is 0 or 'delim' is NULL or empty string or 'delim' is not
 * found in 'str' then an array of strings is returned which will have two
 * elements - the first element will be a pointer to a copy of 'str' and the
 * second element will be a NULL pointer/string/element.
 *
 * The return value of this function is a pointer to pointer to character (means
 * a pointer to an array of strings/elements) and it had been allocated using
 * malloc, so it is user's responsibility to free this memory. The user can
 * use the function free_strings_array() to free the strings_array returned by
 * this function.
 */
char **str_split(const char *str, const char *delim, long max_splits);

/*
 * void print_strings_array(const char **strings_array):
 *
 * Function print_strings_array() prints all the string elements of
 * 'strings_array'.
 */
void print_strings_array(const char **strings_array);

/*
 * void free_strings_array(char **strings_array):
 *
 * Function free_strings_array() frees all the string elements of
 * 'strings_array'. It also frees 'strings_array'.
 */
void free_strings_array(char **strings_array);

/*
 * long get_number_of_strings_in_strings_array(const char **strings_array):
 *
 * Function get_number_of_strings_in_strings_array() returns the count of number
 * of elements in 'strings_array'. It is assumed that this array of strings is
 * terminated by a NULL pointer/string/element.
 *
 */
long get_number_of_strings_in_strings_array(const char **strings_array);

/*
 * char *str_join(unsigned int skip_null_and_empty_input_strings,
 *                const char *delim, long num_args, ...):
 *
 * Parameters:
 *
 *      num_args: number of variable arguments that are passed to this function
 *                excluding the 'delim' string.
 *      ...: Variable number of "char *" pointers.
 *
 * Description:
 *
 *      Function str_join() concatenates all the strings/character arrays passed
 *      to it. If 'delim' is not NULL or not empty then between every two
 *      strings, the 'delim' string is concatenated.
 *
 *      If skip_null_and_empty_input_strings is zero then this means that
 *      NULL/empty strings should be considered valid strings for the purpose of
 *      concatenating 'delim' string - this means that if there is a
 *      NON-NULL/non-empty string in the variable arguments list which is then
 *      followed or preceded by a NULL/empty string then one 'delim' string will
 *      be concatenated between NON-NULL/non-empty string and NULL/empty string.
 *      This can be useful in case columns of a database are concatenated to
 *      form a record which will then be written in a file - so here, a column
 *      containing NULL/empty value will be represented as empty by having two
 *      consecutive 'delim' strings.
 *
 *      If skip_null_and_empty_input_strings is non-zero then this means that
 *      NULL/empty strings should be skipped and no 'delim' string should be
 *      concatenated for them.
 *
 *      Function str_join() allocates a new character array whose size is equal
 *      to the sum of the lengths of all strings passed to it plus 1 (extra 1
 *      for terminating null byte). It then concatenates all the strings passed
 *      to it (these strings are separated by 'delim' string but please see
 *      above for NULL/empty strings) into the newly allocated character array
 *      and then returns the pointer to the newly allocated character array.
 *      If memory allocation fails then NULL is returned.
 *
 *      It is the responsibility of the caller to free the allocated memory
 *      (that is, to free the returned pointer from this function).
 */
char *str_join(unsigned int skip_null_and_empty_input_strings,
               const char *delim, long num_args, ...);

/*
 * char *substr(const char *str, long start_index, long end_index):
 *
 * Function substr() allocates memory and returns a pointer to a
 * string / character array which is a substring of 'str' starting from index
 * 'start_index' till 'end_index' (inclusive). This substring is terminated by
 * null byte at the end.
 *
 * If 'str' is NULL or 'str' is empty or 'start_index' is less than 0 or
 * 'end_index' is less than 0 or 'end_index' is less than 'start_index' or
 * 'start_index' is greater than length of 'str' - 1 or 'end_index' is greater
 * than length of 'str' - 1 then NULL is returned.
 *
 * The returned pointer points to a memory region containing the substring and
 * this memory region was allocated using malloc. So, it is the user's
 * responsibility to free the allocated memory.
 *
 */
char *substr(const char *str, long start_index, long end_index);

int str_is_null_or_empty(const char *str);
int str_is_whitespaces(const char *str);
int str_is_null_or_empty_or_whitespaces(const char *str);
int str_is_integer(const char *str);
int str_starts_with(const char *str, const char *prefix,
                    long num_chars_to_compare);
int str_ends_with(const char *str, const char *suffix,
                  long num_chars_to_compare);

#endif

---- End of code ----

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ