mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 18:33:16 +00:00

My recent fix for the issue where a '\' followed by ':' in a Yara regex string would fail to parse introduced a new issue that broke loading a signature in the current daily.ldb database. Unbeknownst to me at the time, you can have multiple PCRE subsignatures in a logical signature, so long as they're the last subsignatures. The previous fix made it so the signature parser muddled more than one PCRE subsignature into one messed up regex string. This commit essentially reverts the previous fix, while keeping some of the code readability improvements in that function. Instead, it addresses the problem a different way. To resolve the original problem, I'm simply checking if the signame starts with "YARA". If it does, we don't tokenize it by ':' delimiters.
1099 lines
29 KiB
C
1099 lines
29 KiB
C
/*
|
|
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
|
*
|
|
* Authors: Tomasz Kojm, Nigel Horne, Török Edvin
|
|
*
|
|
* Acknowledgements: cli_strcasestr() contains a public domain code from:
|
|
* http://unixpapa.com/incnote/string.html
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#include "str.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#ifdef HAVE_STRINGS_H
|
|
#include <strings.h>
|
|
#endif
|
|
#include <ctype.h>
|
|
#include <sys/types.h>
|
|
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "matcher.h"
|
|
#include "jsparse/textbuf.h"
|
|
#include "platform.h"
|
|
|
|
// clang-format off
|
|
|
|
static const int hex_chars[256] = {
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
|
|
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
};
|
|
|
|
// clang-format on
|
|
|
|
static inline int cli_hex2int(const char c)
|
|
{
|
|
return hex_chars[(const unsigned char)c];
|
|
}
|
|
|
|
int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len)
|
|
{
|
|
uint16_t val;
|
|
unsigned int i;
|
|
int c;
|
|
|
|
for (i = 0; i < len; i += 2) {
|
|
val = 0;
|
|
|
|
if (hex[i] == '?' && hex[i + 1] == '?') {
|
|
val |= CLI_MATCH_IGNORE;
|
|
|
|
} else if (hex[i + 1] == '?') {
|
|
if ((c = cli_hex2int(hex[i])) >= 0) {
|
|
val = c << 4;
|
|
} else {
|
|
return 0;
|
|
}
|
|
val |= CLI_MATCH_NIBBLE_HIGH;
|
|
|
|
} else if (hex[i] == '?') {
|
|
if ((c = cli_hex2int(hex[i + 1])) >= 0) {
|
|
val = c;
|
|
} else {
|
|
return 0;
|
|
}
|
|
val |= CLI_MATCH_NIBBLE_LOW;
|
|
|
|
} else if (hex[i] == '(') {
|
|
val |= CLI_MATCH_SPECIAL;
|
|
|
|
} else {
|
|
if ((c = cli_hex2int(hex[i])) >= 0) {
|
|
val = c;
|
|
if ((c = cli_hex2int(hex[i + 1])) >= 0) {
|
|
val = (val << 4) + c;
|
|
} else {
|
|
return 0;
|
|
}
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
*ptr++ = val;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
uint16_t *cli_hex2ui(const char *hex)
|
|
{
|
|
uint16_t *str;
|
|
unsigned int len;
|
|
|
|
len = strlen(hex);
|
|
|
|
if (len % 2 != 0) {
|
|
cli_errmsg("cli_hex2ui(): Malformed hexstring: %s (length: %u)\n", hex,
|
|
len);
|
|
return NULL;
|
|
}
|
|
|
|
str = cli_calloc((len / 2) + 1, sizeof(uint16_t));
|
|
if (!str)
|
|
return NULL;
|
|
|
|
if (cli_realhex2ui(hex, str, len))
|
|
return str;
|
|
|
|
free(str);
|
|
return NULL;
|
|
}
|
|
|
|
char *cli_hex2str(const char *hex)
|
|
{
|
|
char *str;
|
|
size_t len;
|
|
|
|
len = strlen(hex);
|
|
|
|
if (len % 2 != 0) {
|
|
cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex,
|
|
(unsigned)len);
|
|
return NULL;
|
|
}
|
|
|
|
str = cli_calloc((len / 2) + 1, sizeof(char));
|
|
if (!str)
|
|
return NULL;
|
|
|
|
if (cli_hex2str_to(hex, str, len) == -1) {
|
|
free(str);
|
|
return NULL;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
int cli_hex2str_to(const char *hex, char *ptr, size_t len)
|
|
{
|
|
size_t i;
|
|
int c;
|
|
char val;
|
|
|
|
for (i = 0; i < len; i += 2) {
|
|
if ((c = cli_hex2int(hex[i])) >= 0) {
|
|
val = c;
|
|
if ((c = cli_hex2int(hex[i + 1])) >= 0) {
|
|
val = (val << 4) + c;
|
|
} else {
|
|
return -1;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
*ptr++ = val;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cli_hex2num(const char *hex)
|
|
{
|
|
int hexval, ret = 0, len, i;
|
|
|
|
len = strlen(hex);
|
|
|
|
if (len % 2 != 0) {
|
|
cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex,
|
|
len);
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < len; i++) {
|
|
if ((hexval = cli_hex2int(hex[i])) < 0)
|
|
break;
|
|
ret = (ret << 4) | hexval;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int cli_xtoi(const char *hex)
|
|
{
|
|
int len, val, i;
|
|
char *hexbuf;
|
|
|
|
len = strlen(hex);
|
|
|
|
if (len % 2 == 0)
|
|
return cli_hex2num(hex);
|
|
|
|
hexbuf = cli_calloc(len + 2, sizeof(char));
|
|
if (hexbuf == NULL) {
|
|
cli_errmsg("cli_xtoi(): cli_malloc fails.\n");
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < len; i++)
|
|
hexbuf[i + 1] = hex[i];
|
|
val = cli_hex2num(hexbuf);
|
|
free(hexbuf);
|
|
return val;
|
|
}
|
|
|
|
char *cli_str2hex(const char *string, unsigned int len)
|
|
{
|
|
char *hexstr;
|
|
char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7',
|
|
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
|
|
unsigned int i, j;
|
|
|
|
if ((hexstr = (char *)cli_calloc(2 * len + 1, sizeof(char))) == NULL)
|
|
return NULL;
|
|
|
|
for (i = 0, j = 0; i < len; i++, j += 2) {
|
|
hexstr[j] = HEX[(string[i] >> 4) & 0xf];
|
|
hexstr[j + 1] = HEX[string[i] & 0xf];
|
|
}
|
|
|
|
return hexstr;
|
|
}
|
|
|
|
int cli_strbcasestr(const char *haystack, const char *needle)
|
|
{
|
|
const char *pt = haystack;
|
|
int i, j;
|
|
|
|
i = strlen(haystack);
|
|
j = strlen(needle);
|
|
|
|
if (i < j)
|
|
return 0;
|
|
|
|
pt += i - j;
|
|
|
|
return !strcasecmp(pt, needle);
|
|
}
|
|
|
|
/**
|
|
* @brief Remove trailing NL and CR characters from the end of the given string.
|
|
*
|
|
* @param string string input
|
|
* @return int the new length of the string (ala strlen)
|
|
* @return int -1 if string was NULL.
|
|
*/
|
|
int cli_chomp(char *string)
|
|
{
|
|
int l;
|
|
|
|
if (string == NULL)
|
|
return -1;
|
|
|
|
l = strlen(string);
|
|
|
|
if (l == 0)
|
|
return 0;
|
|
|
|
--l;
|
|
|
|
while ((l >= 0) && ((string[l] == '\n') || (string[l] == '\r')))
|
|
string[l--] = '\0';
|
|
|
|
return l + 1;
|
|
}
|
|
|
|
/*
|
|
* char *cli_strok(const char *line, int fieldno, char *delim)
|
|
* Return a copy of field <fieldno> from the string <line>, where
|
|
* fields are delimited by any char from <delim>, or NULL if <line>
|
|
* doesn't have <fieldno> fields or not enough memory is available.
|
|
* The caller has to free() the result afterwards.
|
|
*/
|
|
char *cli_strtok(const char *line, int fieldno, const char *delim)
|
|
{
|
|
int counter = 0, i, j;
|
|
char *buffer = NULL;
|
|
|
|
/* step to arg # <fieldno> */
|
|
for (i = 0; line[i] && counter != fieldno; i++) {
|
|
if (strchr(delim, line[i])) {
|
|
counter++;
|
|
while (line[i + 1] && strchr(delim, line[i + 1])) {
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
if (!line[i]) {
|
|
/* end of buffer before field reached */
|
|
return NULL;
|
|
}
|
|
|
|
for (j = i; line[j]; j++) {
|
|
if (strchr(delim, line[j])) {
|
|
break;
|
|
}
|
|
}
|
|
if (i == j) {
|
|
return NULL;
|
|
}
|
|
buffer = cli_malloc(j - i + 1);
|
|
if (!buffer) {
|
|
cli_errmsg("cli_strtok: Unable to allocate memory for buffer\n");
|
|
return NULL;
|
|
}
|
|
strncpy(buffer, line + i, j - i);
|
|
buffer[j - i] = '\0';
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/*
|
|
* Like cli_strtok, but this puts the output into a given argument, rather
|
|
* than allocating fresh memory
|
|
* Returns NULL for error, or a pointer to output
|
|
* njh@bandsman.co.uk
|
|
*/
|
|
char *cli_strtokbuf(const char *input, int fieldno, const char *delim,
|
|
char *output)
|
|
{
|
|
int counter = 0, i, j;
|
|
|
|
/* step to arg # <fieldno> */
|
|
for (i = 0; input[i] && counter != fieldno; i++) {
|
|
if (strchr(delim, input[i])) {
|
|
counter++;
|
|
while (input[i + 1] && strchr(delim, input[i + 1])) {
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
if (input[i] == '\0') {
|
|
/* end of buffer before field reached */
|
|
return NULL;
|
|
}
|
|
|
|
for (j = i; input[j]; j++) {
|
|
if (strchr(delim, input[j])) {
|
|
break;
|
|
}
|
|
}
|
|
if (i == j) {
|
|
return NULL;
|
|
}
|
|
strncpy(output, input + i, j - i);
|
|
output[j - i] = '\0';
|
|
|
|
return output;
|
|
}
|
|
|
|
const char *cli_memstr(const char *haystack, size_t hs, const char *needle, size_t ns)
|
|
{
|
|
size_t i, s1, s2;
|
|
|
|
if (!hs || !ns || hs < ns)
|
|
return NULL;
|
|
|
|
if (needle == haystack)
|
|
return haystack;
|
|
|
|
if (ns == 1)
|
|
return memchr(haystack, needle[0], hs);
|
|
|
|
if (needle[0] == needle[1]) {
|
|
s1 = 2;
|
|
s2 = 1;
|
|
} else {
|
|
s1 = 1;
|
|
s2 = 2;
|
|
}
|
|
for (i = 0; i <= hs - ns;) {
|
|
if (needle[1] != haystack[i + 1]) {
|
|
i += s1;
|
|
} else {
|
|
if ((needle[0] == haystack[i]) &&
|
|
!memcmp(needle + 2, haystack + i + 2, ns - 2))
|
|
return &haystack[i];
|
|
i += s2;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
char *cli_strrcpy(char *dest, const char *source) /* by NJH */
|
|
{
|
|
|
|
if (!dest || !source) {
|
|
cli_errmsg("cli_strrcpy: NULL argument\n");
|
|
return NULL;
|
|
}
|
|
|
|
while ((*dest++ = *source++))
|
|
;
|
|
|
|
return --dest;
|
|
}
|
|
|
|
const char *__cli_strcasestr(const char *haystack, const char *needle)
|
|
{
|
|
size_t l;
|
|
char f[3];
|
|
const size_t strlen_a = strlen(haystack);
|
|
const size_t strlen_b = strlen(needle);
|
|
|
|
f[0] = tolower(*needle);
|
|
f[1] = toupper(*needle);
|
|
f[2] = '\0';
|
|
for (l = strcspn(haystack, f); l != strlen_a; l += strcspn(haystack + l + 1, f) + 1)
|
|
if (strncasecmp(haystack + l, needle, strlen_b) == 0)
|
|
return (haystack + l);
|
|
return (NULL);
|
|
}
|
|
|
|
char *__cli_strndup(const char *s, size_t n)
|
|
{
|
|
char *alloc;
|
|
size_t len;
|
|
|
|
if (!s) {
|
|
return NULL;
|
|
}
|
|
|
|
len = CLI_STRNLEN(s, n);
|
|
alloc = malloc(len + 1);
|
|
|
|
if (!alloc) {
|
|
return NULL;
|
|
} else
|
|
memcpy(alloc, s, len);
|
|
|
|
alloc[len] = '\0';
|
|
return alloc;
|
|
}
|
|
|
|
size_t __cli_strnlen(const char *s, size_t n)
|
|
{
|
|
size_t i = 0;
|
|
for (; (i < n) && s[i] != '\0'; ++i)
|
|
;
|
|
return i;
|
|
}
|
|
|
|
/*
|
|
* @brief Find the first occurrence of find in s.
|
|
*
|
|
* The search is limited to the first slen characters of s.
|
|
*
|
|
* Copyright (c) 2001 Mike Barcroft <mike@FreeBSD.org>
|
|
* Copyright (c) 1990, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* Chris Torek.
|
|
*
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* @param s haystack
|
|
* @param find needle
|
|
* @param slen haystack length
|
|
* @return char* Address of the needle, if found, else NULL.
|
|
*/
|
|
char *__cli_strnstr(const char *s, const char *find, size_t slen)
|
|
{
|
|
char c, sc;
|
|
size_t len;
|
|
|
|
if ((c = *find++) != '\0') {
|
|
len = strlen(find);
|
|
do {
|
|
do {
|
|
if (slen-- < 1 || (sc = *s++) == '\0')
|
|
return (NULL);
|
|
} while (sc != c);
|
|
if (len > slen)
|
|
return (NULL);
|
|
} while (strncmp(s, find, len) != 0);
|
|
s--;
|
|
}
|
|
return ((char *)s);
|
|
}
|
|
|
|
size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
|
|
const char **tokens)
|
|
{
|
|
size_t tokens_found, i;
|
|
|
|
for (tokens_found = 0; tokens_found < token_count;) {
|
|
tokens[tokens_found++] = buffer;
|
|
buffer = strchr(buffer, delim);
|
|
if (buffer) {
|
|
*buffer++ = '\0';
|
|
} else {
|
|
i = tokens_found;
|
|
while (i < token_count)
|
|
tokens[i++] = NULL;
|
|
|
|
return tokens_found;
|
|
}
|
|
}
|
|
return tokens_found;
|
|
}
|
|
|
|
/**
|
|
* @brief The strntol() function converts the string in str to a long value.
|
|
* Modifications made to validate the length of the string for non-null term
|
|
* strings.
|
|
*
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* @param nptr Pointer to start of string.
|
|
* @param n Max length of buffer in bytes.
|
|
* @param[out] endptr [optional] If endptr is not NULL, strtol() stores the
|
|
* address of the first invalid character in *endptr. If there were no digits at
|
|
* all, however, strtol() stores the original value of str in *endptr. Nota
|
|
* Bene: If the buffer is non-null terminated and the number comprises the
|
|
* entire buffer, endptr will point past the end of the buffer, and the caller
|
|
* should check if endptr >= nptr + n.
|
|
*
|
|
* @param int The conversion is done according to the given base,
|
|
* which must be between 2 and 36 inclusive, or be the special value 0.
|
|
* @return long The signed long value.
|
|
*/
|
|
long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
|
|
{
|
|
register const char *s = nptr;
|
|
register unsigned long acc = 0;
|
|
register int c;
|
|
register unsigned long cutoff;
|
|
register int neg = 0, any = 0, cutlim;
|
|
|
|
if (0 == n) {
|
|
goto done;
|
|
}
|
|
/*
|
|
* Skip white space and pick up leading +/- sign if any.
|
|
* If base is 0, allow 0x for hex and 0 for octal, else
|
|
* assume decimal; if base is already 16, allow 0x.
|
|
*/
|
|
do {
|
|
c = *s;
|
|
} while (isspace(c) && (++s < nptr + n));
|
|
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
|
|
if (c == '-') {
|
|
neg = 1;
|
|
c = *s++;
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
} else if (c == '+') {
|
|
c = *s++;
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (base == 0 || base == 16) {
|
|
if (c == '0' && (s + 1 < nptr + n) &&
|
|
(*(s + 1) == 'x' || *(s + 1) == 'X')) {
|
|
if (s + 2 >= nptr + n) {
|
|
goto done;
|
|
}
|
|
c = s[1];
|
|
s += 2;
|
|
base = 16;
|
|
}
|
|
}
|
|
|
|
if (base == 0)
|
|
base = c == '0' ? 8 : 10;
|
|
|
|
/*
|
|
* Compute the cutoff value between legal numbers and illegal
|
|
* numbers. That is the largest legal value, divided by the
|
|
* base. An input number that is greater than this value, if
|
|
* followed by a legal input character, is too big. One that
|
|
* is equal to this value may be valid or not; the limit
|
|
* between valid and invalid numbers is then based on the last
|
|
* digit. For instance, if the range for longs is
|
|
* [-2147483648..2147483647] and the input base is 10,
|
|
* cutoff will be set to 214748364 and cutlim to either
|
|
* 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
|
|
* a value > 214748364, or equal but the next digit is > 7 (or 8),
|
|
* the number is too big, and we will return a range error.
|
|
*
|
|
* Set any if any `digits' consumed; make it negative to indicate
|
|
* overflow.
|
|
*/
|
|
cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
|
|
cutlim = cutoff % (unsigned long)base;
|
|
cutoff /= (unsigned long)base;
|
|
for (acc = 0, any = 0; s < nptr + n; s++) {
|
|
c = *s;
|
|
|
|
if (isdigit(c))
|
|
c -= '0';
|
|
else if (isalpha(c))
|
|
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
|
|
else
|
|
break;
|
|
if (c >= base)
|
|
break;
|
|
if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
|
|
any = -1;
|
|
else {
|
|
any = 1;
|
|
acc *= base;
|
|
acc += c;
|
|
}
|
|
}
|
|
if (any < 0) {
|
|
acc = neg ? LONG_MIN : LONG_MAX;
|
|
errno = ERANGE;
|
|
} else if (neg)
|
|
acc = -acc;
|
|
|
|
done:
|
|
if (endptr != 0)
|
|
*endptr = (char *)(any ? s : nptr);
|
|
return (acc);
|
|
}
|
|
|
|
/**
|
|
* @brief The strntoul() function converts the string in str to an unsigned long
|
|
* value. Modifications made to validate the length of the string for non-null
|
|
* term strings.
|
|
*
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* @param nptr Pointer to start of string.
|
|
* @param n Max length of buffer in bytes.
|
|
* @param[out] endptr [optional] If endptr is not NULL, strtol() stores the
|
|
* address of the first invalid character in *endptr. If there were no digits at
|
|
* all, however, strtol() stores the original value of str in *endptr. Nota
|
|
* Bene: If the buffer is non-null terminated and the number comprises the
|
|
* entire buffer, endptr will point past the end of the buffer, and the caller
|
|
* should check if endptr >= nptr + n.
|
|
*
|
|
* @param int The conversion is done according to the given base,
|
|
* which must be between 2 and 36 inclusive, or be the special value 0.
|
|
* @return unsigned long The unsigned long value.
|
|
*/
|
|
unsigned long cli_strntoul(const char *nptr, size_t n, char **endptr,
|
|
register int base)
|
|
{
|
|
register const char *s = nptr;
|
|
register unsigned long acc = 0;
|
|
register int c;
|
|
register unsigned long cutoff;
|
|
register int neg = 0, any = 0, cutlim;
|
|
|
|
/*
|
|
* See cli_strntol for comments as to the logic used.
|
|
*/
|
|
do {
|
|
c = *s;
|
|
} while (isspace(c) && (++s < nptr + n));
|
|
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
|
|
if (c == '-') {
|
|
neg = 1;
|
|
c = *s++;
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
} else if (c == '+') {
|
|
c = *s++;
|
|
if (s >= nptr + n) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (base == 0 || base == 16) {
|
|
if (c == '0' && (s + 1 < nptr + n) &&
|
|
(*(s + 1) == 'x' || *(s + 1) == 'X')) {
|
|
if (s + 2 >= nptr + n) {
|
|
goto done;
|
|
}
|
|
c = s[1];
|
|
s += 2;
|
|
base = 16;
|
|
}
|
|
}
|
|
if (base == 0)
|
|
base = c == '0' ? 8 : 10;
|
|
|
|
cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
|
|
cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
|
|
for (acc = 0, any = 0; s < nptr + n; s++) {
|
|
c = *s;
|
|
|
|
if (isdigit(c))
|
|
c -= '0';
|
|
else if (isalpha(c))
|
|
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
|
|
else
|
|
break;
|
|
if (c >= base)
|
|
break;
|
|
if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
|
|
any = -1;
|
|
else {
|
|
any = 1;
|
|
acc *= base;
|
|
acc += c;
|
|
}
|
|
}
|
|
if (any < 0) {
|
|
acc = ULONG_MAX;
|
|
errno = ERANGE;
|
|
} else if (neg)
|
|
acc = -acc;
|
|
|
|
done:
|
|
if (endptr != 0)
|
|
*endptr = (char *)(any ? s : nptr);
|
|
return (acc);
|
|
}
|
|
|
|
/**
|
|
* @brief cli_strntol_wrap() converts the string in str to a long value.
|
|
*
|
|
* Wrapper for cli_strntol() that provides incentive to check for failure.
|
|
*
|
|
* @param buf Pointer to start of string.
|
|
* @param buf_size Max length of buffer to convert to
|
|
* integer.
|
|
* @param fail_at_nondigit If 1, fail out if the a non-digit character is found
|
|
* before the end of the buffer. If 0, non-digit character represents end of
|
|
* number and is not a failure.
|
|
* @param base The conversion is done according to the given base,
|
|
* which must be between 2 and 36 inclusive, or be the special value 0.
|
|
* @param[out] result Long integer value of ascii number.
|
|
* @return CL_SUCCESS Success
|
|
* @return CL_EPARSE Failure
|
|
*/
|
|
cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size,
|
|
int fail_at_nondigit, int base, long *result)
|
|
{
|
|
char *endptr = NULL;
|
|
long num;
|
|
|
|
if (buf_size == 0 || !buf || !result) {
|
|
/* invalid parameter */
|
|
return CL_EPARSE;
|
|
}
|
|
errno = 0;
|
|
num = cli_strntol(buf, buf_size, &endptr, base);
|
|
if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) {
|
|
/* under- or overflow */
|
|
return CL_EPARSE;
|
|
}
|
|
if (endptr == buf) {
|
|
/* no digits */
|
|
return CL_EPARSE;
|
|
}
|
|
if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
|
|
/* non-digit encountered */
|
|
return CL_EPARSE;
|
|
}
|
|
/* success */
|
|
*result = num;
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* @brief cli_strntoul_wrap() converts the string in str to a long value.
|
|
*
|
|
* Wrapper for cli_strntoul() that provides incentive to check for failure.
|
|
*
|
|
* @param buf Pointer to start of string.
|
|
* @param buf_size Max length of buffer to convert to
|
|
* integer.
|
|
* @param fail_at_nondigit If 1, fail out if the a non-digit character is found
|
|
* before the end of the buffer. If 0, non-digit character represents end of
|
|
* number and is not a failure.
|
|
* @param base The conversion is done according to the given base,
|
|
* which must be between 2 and 36 inclusive, or be the special value 0.
|
|
* @param[out] result Unsigned long integer value of ascii number.
|
|
* @return CL_SUCCESS Success
|
|
* @return CL_EPARSE Failure
|
|
*/
|
|
cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size,
|
|
int fail_at_nondigit, int base,
|
|
unsigned long *result)
|
|
{
|
|
char *endptr = NULL;
|
|
unsigned long num;
|
|
|
|
if (buf_size == 0 || !buf || !result) {
|
|
/* invalid parameter */
|
|
return CL_EPARSE;
|
|
}
|
|
errno = 0;
|
|
num = cli_strntoul(buf, buf_size, &endptr, base);
|
|
if ((num == ULONG_MAX) && (errno == ERANGE)) {
|
|
/* under- or overflow */
|
|
return CL_EPARSE;
|
|
}
|
|
if (endptr == buf) {
|
|
/* no digits */
|
|
return CL_EPARSE;
|
|
}
|
|
if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
|
|
/* non-digit encountered */
|
|
return CL_EPARSE;
|
|
}
|
|
/* success */
|
|
*result = num;
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count,
|
|
const char **tokens, size_t token_skip)
|
|
{
|
|
size_t tokens_found = 0;
|
|
size_t token_index = 0;
|
|
size_t buffer_index = 0;
|
|
bool within_pcre = false;
|
|
|
|
while (tokens_found < token_count) {
|
|
tokens[tokens_found++] = &buffer[buffer_index];
|
|
|
|
while (buffer[buffer_index] != '\0') {
|
|
if (!within_pcre && (buffer[buffer_index] == delim)) {
|
|
break;
|
|
} else if ((tokens_found > token_skip) &&
|
|
// LDB PCRE rules must escape the '/' character with a '\'.
|
|
// If the character sequence is "\/", then we are still within the PCRE string.
|
|
((buffer_index > 0) && (buffer[buffer_index - 1] != '\\')) && (buffer[buffer_index] == '/')) {
|
|
within_pcre = !within_pcre;
|
|
}
|
|
buffer_index++;
|
|
}
|
|
|
|
if (buffer[buffer_index] != '\0') {
|
|
buffer[buffer_index] = '\0';
|
|
buffer_index++;
|
|
} else {
|
|
token_index = tokens_found;
|
|
while (token_index < token_count) {
|
|
tokens[token_index] = NULL;
|
|
token_index++;
|
|
}
|
|
|
|
return tokens_found;
|
|
}
|
|
}
|
|
|
|
return tokens_found;
|
|
}
|
|
|
|
int cli_isnumber(const char *str)
|
|
{
|
|
if (NULL == str) {
|
|
return 0;
|
|
}
|
|
|
|
while (*str)
|
|
if (!strchr("0123456789", *str++))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* encodes the unicode character as utf-8 */
|
|
static inline size_t output_utf8(uint16_t u, unsigned char *dst)
|
|
{
|
|
if (!u) {
|
|
*dst = 0x1; /* don't add \0, add \1 instead */
|
|
return 1;
|
|
}
|
|
if (u < 0x80) {
|
|
*dst = u & 0xff;
|
|
return 1;
|
|
}
|
|
if (u < 0x800) {
|
|
*dst++ = 0xc0 | (u >> 6); /* 110yyyyy */
|
|
*dst = 0x80 | (u & 0x3f); /* 10zzzzzz */
|
|
return 2;
|
|
}
|
|
/* u < 0x10000 because we only handle utf-16,
|
|
* values in range 0xd800 - 0xdfff aren't valid, but we don't check for
|
|
* that*/
|
|
*dst++ = 0xe0 | (u >> 12); /* 1110xxxx */
|
|
*dst++ = 0x80 | ((u >> 6) & 0x3f); /* 10yyyyyy */
|
|
*dst = 0x80 | (u & 0x3f); /* 10zzzzzz */
|
|
return 3;
|
|
}
|
|
|
|
/* javascript-like unescape() function */
|
|
char *cli_unescape(const char *str)
|
|
{
|
|
char *R;
|
|
size_t k, i = 0;
|
|
const size_t len = strlen(str);
|
|
/* unescaped string is at most as long as original,
|
|
* it will usually be shorter */
|
|
R = cli_malloc(len + 1);
|
|
if (!R) {
|
|
cli_errmsg("cli_unescape: Unable to allocate memory for string\n");
|
|
return NULL;
|
|
}
|
|
for (k = 0; k < len; k++) {
|
|
unsigned char c = str[k];
|
|
if (str[k] == '%') {
|
|
if (k + 5 >= len || str[k + 1] != 'u' || !isxdigit(str[k + 2]) ||
|
|
!isxdigit(str[k + 3]) || !isxdigit(str[k + 4]) ||
|
|
!isxdigit(str[k + 5])) {
|
|
if (k + 2 < len && isxdigit(str[k + 1]) && isxdigit(str[k + 2])) {
|
|
c = ((cli_hex2int(str[k + 1]) < 0 ? 0 : cli_hex2int(str[k + 1]))
|
|
<< 4) |
|
|
cli_hex2int(str[k + 2]);
|
|
k += 2;
|
|
}
|
|
} else {
|
|
uint16_t u =
|
|
((cli_hex2int(str[k + 2]) < 0 ? 0 : cli_hex2int(str[k + 2]))
|
|
<< 12) |
|
|
((cli_hex2int(str[k + 3]) < 0 ? 0 : cli_hex2int(str[k + 3])) << 8) |
|
|
((cli_hex2int(str[k + 4]) < 0 ? 0 : cli_hex2int(str[k + 4])) << 4) |
|
|
cli_hex2int(str[k + 5]);
|
|
i += output_utf8(u, (unsigned char *)&R[i]);
|
|
k += 5;
|
|
continue;
|
|
}
|
|
}
|
|
if (!c)
|
|
c = 1; /* don't add \0 */
|
|
R[i++] = c;
|
|
}
|
|
R[i++] = '\0';
|
|
R = cli_realloc2(R, i);
|
|
return R;
|
|
}
|
|
|
|
/* handle javascript's escape sequences inside strings */
|
|
int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str,
|
|
size_t len)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < len; i++) {
|
|
char c = str[i];
|
|
if (c == '\\' && i + 1 < len) {
|
|
i++;
|
|
switch (str[i]) {
|
|
case '0':
|
|
c = 0;
|
|
break;
|
|
case 'b':
|
|
c = 8;
|
|
break;
|
|
case 't':
|
|
c = 9;
|
|
break;
|
|
case 'n':
|
|
c = 10;
|
|
break;
|
|
case 'v':
|
|
c = 11;
|
|
break;
|
|
case 'f':
|
|
c = 12;
|
|
break;
|
|
case 'r':
|
|
c = 13;
|
|
break;
|
|
case 'x':
|
|
if (i + 2 < len)
|
|
c = ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
|
|
<< 4) |
|
|
cli_hex2int(str[i + 2]);
|
|
i += 2;
|
|
break;
|
|
case 'u':
|
|
if (i + 4 < len) {
|
|
uint16_t u =
|
|
((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
|
|
<< 12) |
|
|
((cli_hex2int(str[i + 2]) < 0 ? 0 : cli_hex2int(str[i + 2]))
|
|
<< 8) |
|
|
((cli_hex2int(str[i + 3]) < 0 ? 0 : cli_hex2int(str[i + 3]))
|
|
<< 4) |
|
|
cli_hex2int(str[i + 4]);
|
|
if (textbuffer_ensure_capacity(buf, 4) == -1)
|
|
return -1;
|
|
buf->pos += output_utf8(u, (unsigned char *)&buf->data[buf->pos]);
|
|
i += 4;
|
|
continue;
|
|
}
|
|
break;
|
|
default:
|
|
c = str[i];
|
|
break;
|
|
}
|
|
}
|
|
if (!c)
|
|
c = 1; /* we don't insert \0 */
|
|
if (textbuffer_putc(buf, c) == -1)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int cli_hexnibbles(char *str, int len)
|
|
{
|
|
int i;
|
|
for (i = 0; i < len; i++) {
|
|
int c = cli_hex2int(str[i]);
|
|
if (c < 0)
|
|
return 1;
|
|
str[i] = c;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
cl_error_t cli_basename(const char *filepath, size_t filepath_len,
|
|
char **filebase)
|
|
{
|
|
cl_error_t status = CL_EARG;
|
|
const char *index = NULL;
|
|
|
|
if (NULL == filepath || NULL == filebase || filepath_len == 0) {
|
|
cli_dbgmsg("cli_basename: Invalid arguments.\n");
|
|
goto done;
|
|
}
|
|
|
|
index = filepath + filepath_len - 1;
|
|
|
|
while (index > filepath) {
|
|
if (index[0] == PATHSEP[0])
|
|
break;
|
|
index--;
|
|
}
|
|
if ((index != filepath) || (index[0] == PATHSEP[0]))
|
|
index++;
|
|
|
|
if (0 == CLI_STRNLEN(index, filepath_len - (index - filepath))) {
|
|
cli_dbgmsg("cli_basename: Provided path does not include a file name.\n");
|
|
status = CL_EFORMAT;
|
|
goto done;
|
|
}
|
|
|
|
*filebase = CLI_STRNDUP(index, filepath_len - (index - filepath));
|
|
if (NULL == *filebase) {
|
|
cli_errmsg("cli_basename: Failed to allocate memory for file basename.\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
done:
|
|
return status;
|
|
}
|