2006-12-26 16:17:02 +00:00
/*
* HTML Entity & Encoding normalization .
*
2007-03-11 11:14:35 +00:00
* Copyright ( C ) 2006 - 2007 T <EFBFBD> r <EFBFBD> k Edvin < edwin @ clamav . net >
2006-12-26 16:17:02 +00:00
*
* This program is free software ; you can redistribute it and / or modify
2007-06-30 11:50:56 +00:00
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
2006-12-26 16:17:02 +00:00
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
*
*/
# include <clamav-config.h>
# include <stdlib.h>
# include <stdio.h>
# include <string.h>
2007-02-01 11:50:29 +00:00
# include "cltypes.h"
2006-12-26 16:17:02 +00:00
# include "clamav.h"
# include "others.h"
# include "hashtab.h"
2007-03-11 11:14:35 +00:00
2007-02-01 11:50:29 +00:00
static const size_t prime_list [ ] =
2006-12-26 16:17:02 +00:00
{
53ul , 97ul , 193ul , 389ul , 769ul ,
1543ul , 3079ul , 6151ul , 12289ul , 24593ul ,
49157ul , 98317ul , 196613ul , 393241ul , 786433ul ,
1572869ul , 3145739ul , 6291469ul , 12582917ul , 25165843ul ,
50331653ul , 100663319ul , 201326611ul , 402653189ul , 805306457ul ,
1610612741ul , 3221225473ul
} ;
static const size_t prime_n = sizeof ( prime_list ) / sizeof ( prime_list [ 0 ] ) ;
2008-01-21 15:52:21 +00:00
static const char DELETED_KEY [ ] = " " ;
2006-12-26 16:17:02 +00:00
static size_t get_nearest_capacity ( const size_t capacity )
{
size_t i ;
for ( i = 0 ; i < prime_n ; i + + ) {
if ( prime_list [ i ] > capacity )
return prime_list [ i ] ;
}
cli_errmsg ( " Requested hashtable size is too big! " ) ;
return prime_list [ prime_n - 1 ] ;
}
# ifdef PROFILE_HASHTABLE
/* I know, this is ugly, most of these functions get a const s, that gets its const-ness discarded,
* and then these functions modify something the compiler assumes is readonly .
* Please , never use PROFILE_HASHTABLE in production code , and in releases . Use it for development only ! */
static inline void PROFILE_INIT ( struct hashtable * s )
{
memset ( & s - > PROFILE_STRUCT , 0 , sizeof ( s - > PROFILE_STRUCT ) ) ;
}
static inline void PROFILE_CALC_HASH ( struct hashtable * s )
{
s - > PROFILE_STRUCT . calc_hash + + ;
}
static inline void PROFILE_FIND_ELEMENT ( struct hashtable * s )
{
s - > PROFILE_STRUCT . find_req + + ;
}
static inline void PROFILE_FIND_NOTFOUND ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . not_found + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . not_found_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_FIND_FOUND ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . found + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . found_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_HASH_EXHAUSTED ( struct hashtable * s )
{
s - > PROFILE_STRUCT . hash_exhausted + + ;
}
static inline void PROFILE_GROW_START ( struct hashtable * s )
{
s - > PROFILE_STRUCT . grow + + ;
}
static inline void PROFILE_GROW_FOUND ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . grow_found + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . grow_found_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_GROW_DONE ( struct hashtable * s )
{
}
static inline void PROFILE_DELETED_REUSE ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . deleted_reuse + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . deleted_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_INSERT ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . inserts + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . insert_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_DATA_UPDATE ( struct hashtable * s , size_t tries )
{
s - > PROFILE_STRUCT . update + + ;
2007-03-11 11:14:35 +00:00
s - > PROFILE_STRUCT . update_tries + = tries ;
2006-12-26 16:17:02 +00:00
}
static inline void PROFILE_HASH_DELETE ( struct hashtable * s )
{
s - > PROFILE_STRUCT . deletes + + ;
}
static inline void PROFILE_HASH_CLEAR ( struct hashtable * s )
{
s - > PROFILE_STRUCT . clear + + ;
}
static inline void PROFILE_REPORT ( const struct hashtable * s )
{
size_t lookups , queries , insert_tries , inserts ;
2007-03-11 11:14:35 +00:00
cli_dbgmsg ( " --------Hashtable usage report for %p-------------- \n " , ( const void * ) s ) ;
2006-12-26 16:17:02 +00:00
cli_dbgmsg ( " hash function calculations:%ld \n " , s - > PROFILE_STRUCT . calc_hash ) ;
cli_dbgmsg ( " successfull finds/total searches: %ld/%ld; lookups: %ld \n " , s - > PROFILE_STRUCT . found , s - > PROFILE_STRUCT . find_req , s - > PROFILE_STRUCT . found_tries ) ;
cli_dbgmsg ( " unsuccessfull finds/total searches: %ld/%ld; lookups: %ld \n " , s - > PROFILE_STRUCT . not_found , s - > PROFILE_STRUCT . find_req , s - > PROFILE_STRUCT . not_found_tries ) ;
cli_dbgmsg ( " successfull finds during grow:%ld; lookups: %ld \n " , s - > PROFILE_STRUCT . grow_found , s - > PROFILE_STRUCT . grow_found_tries ) ;
lookups = s - > PROFILE_STRUCT . found_tries + s - > PROFILE_STRUCT . not_found_tries + s - > PROFILE_STRUCT . grow_found_tries ;
queries = s - > PROFILE_STRUCT . find_req + s - > PROFILE_STRUCT . grow_found ;
cli_dbgmsg ( " Find Lookups/total queries: %ld/%ld = %3f \n " , lookups , queries , lookups * 1.0 / queries ) ;
insert_tries = s - > PROFILE_STRUCT . insert_tries + s - > PROFILE_STRUCT . update_tries + s - > PROFILE_STRUCT . deleted_tries ;
cli_dbgmsg ( " new item insert tries/new items: %ld/%ld \n " , s - > PROFILE_STRUCT . insert_tries , s - > PROFILE_STRUCT . inserts ) ;
cli_dbgmsg ( " update tries/updates: %ld/%ld \n " , s - > PROFILE_STRUCT . update_tries , s - > PROFILE_STRUCT . update ) ;
cli_dbgmsg ( " deleted item reuse tries/deleted&reused items: %ld/%ld \n " , s - > PROFILE_STRUCT . deleted_tries , s - > PROFILE_STRUCT . deleted_reuse ) ;
inserts = s - > PROFILE_STRUCT . inserts + s - > PROFILE_STRUCT . update + s - > PROFILE_STRUCT . deleted_reuse ;
cli_dbgmsg ( " Insert tries/total inserts: %ld/%ld = %3f \n " , insert_tries , inserts , insert_tries * 1.0 / inserts ) ;
cli_dbgmsg ( " Grows: %ld, Deletes : %ld, hashtable clears: %ld \n " , s - > PROFILE_STRUCT . grow , s - > PROFILE_STRUCT . deletes , s - > PROFILE_STRUCT . clear ) ;
cli_dbgmsg ( " --------Report end------------- \n " ) ;
}
# else
# define PROFILE_INIT(s)
# define PROFILE_CALC_HASH(s)
# define PROFILE_FIND_ELEMENT(s)
# define PROFILE_FIND_NOTFOUND(s, tries)
# define PROFILE_FIND_FOUND(s, tries)
# define PROFILE_HASH_EXHAUSTED(s)
# define PROFILE_GROW_START(s)
# define PROFILE_GROW_FOUND(s, tries)
# define PROFILE_GROW_DONE(s)
# define PROFILE_DELETED_REUSE(s, tries)
# define PROFILE_INSERT(s, tries)
# define PROFILE_DATA_UPDATE(s, tries)
# define PROFILE_HASH_DELETE(s)
# define PROFILE_HASH_CLEAR(s)
# define PROFILE_REPORT(s)
# endif
int hashtab_init ( struct hashtable * s , size_t capacity )
{
if ( ! s )
return CL_ENULLARG ;
PROFILE_INIT ( s ) ;
capacity = get_nearest_capacity ( capacity ) ;
s - > htable = cli_calloc ( capacity , sizeof ( * s - > htable ) ) ;
if ( ! s - > htable )
return CL_EMEM ;
s - > capacity = capacity ;
s - > used = 0 ;
s - > maxfill = 8 * capacity / 10 ;
return 0 ;
}
2007-03-11 11:14:35 +00:00
static size_t hash ( const unsigned char * k , const size_t len , const size_t SIZE )
2006-12-26 16:17:02 +00:00
{
size_t Hash = 0 ;
size_t i ;
for ( i = len ; i > 0 ; i - - )
Hash = ( ( Hash < < 8 ) + k [ i - 1 ] ) % SIZE ;
return Hash ;
}
/* if returned element has key==NULL, then key was not found in table */
2008-01-21 15:52:21 +00:00
struct element * hashtab_find ( const struct hashtable * s , const char * key , const size_t len )
2006-12-26 16:17:02 +00:00
{
struct element * element ;
2008-01-21 15:52:21 +00:00
size_t tries = 1 ;
2006-12-26 16:17:02 +00:00
size_t idx ;
if ( ! s )
2008-01-21 15:52:21 +00:00
return NULL ;
2006-12-26 16:17:02 +00:00
PROFILE_CALC_HASH ( s ) ;
PROFILE_FIND_ELEMENT ( s ) ;
2008-01-21 15:52:21 +00:00
idx = hash ( ( const unsigned char * ) key , len , s - > capacity ) ;
2006-12-26 16:17:02 +00:00
element = & s - > htable [ idx ] ;
do {
if ( ! element - > key ) {
PROFILE_FIND_NOTFOUND ( s , tries ) ;
return NULL ; /* element not found, place is empty*/
}
2008-01-23 20:19:14 +00:00
else if ( element - > key ! = DELETED_KEY & & len = = element - > len & & strncmp ( key , element - > key , len ) = = 0 ) {
2006-12-26 16:17:02 +00:00
PROFILE_FIND_FOUND ( s , tries ) ;
return element ; /* found */
}
else {
idx = ( idx + tries + + ) % s - > capacity ;
element = & s - > htable [ idx ] ;
}
} while ( tries < = s - > capacity ) ;
PROFILE_HASH_EXHAUSTED ( s ) ;
return NULL ; /* not found */
}
static int hashtab_grow ( struct hashtable * s )
{
const size_t new_capacity = get_nearest_capacity ( s - > capacity ) ;
struct element * htable = cli_calloc ( new_capacity , sizeof ( * s - > htable ) ) ;
size_t i , idx , used = 0 ;
if ( new_capacity = = s - > capacity | | ! htable )
return CL_EMEM ;
PROFILE_GROW_START ( s ) ;
cli_dbgmsg ( " hashtab.c: Warning: growing open-addressing hashtables is slow. Either allocate more storage when initializing, or use other hashtable types! \n " ) ;
for ( i = 0 ; i < s - > capacity ; i + + ) {
if ( s - > htable [ i ] . key & & s - > htable [ i ] . key ! = DELETED_KEY ) {
struct element * element ;
2008-01-21 15:52:21 +00:00
size_t tries = 1 ;
2006-12-26 16:17:02 +00:00
PROFILE_CALC_HASH ( s ) ;
2008-01-23 20:19:14 +00:00
idx = hash ( ( const unsigned char * ) s - > htable [ i ] . key , s - > htable [ i ] . len , new_capacity ) ;
2006-12-26 16:17:02 +00:00
element = & htable [ idx ] ;
while ( element - > key & & tries < = new_capacity ) {
idx = ( idx + tries + + ) % new_capacity ;
element = & htable [ idx ] ;
}
if ( ! element - > key ) {
/* copy element from old hashtable to new */
PROFILE_GROW_FOUND ( s , tries ) ;
* element = s - > htable [ i ] ;
used + + ;
}
else {
cli_errmsg ( " hashtab.c: Impossible - unable to rehash table " ) ;
return CL_EMEM ; /* this means we didn't find enough room for all elements in the new table, should never happen */
}
}
}
free ( s - > htable ) ;
s - > htable = htable ;
s - > used = used ;
s - > capacity = new_capacity ;
s - > maxfill = new_capacity * 8 / 10 ;
2007-03-11 11:14:35 +00:00
cli_dbgmsg ( " Table %p size after grow:%ld \n " , ( void * ) s , s - > capacity ) ;
2006-12-26 16:17:02 +00:00
PROFILE_GROW_DONE ( s ) ;
return CL_SUCCESS ;
}
2008-01-21 15:52:21 +00:00
int hashtab_insert ( struct hashtable * s , const char * key , const size_t len , const element_data data )
2006-12-26 16:17:02 +00:00
{
struct element * element ;
struct element * deleted_element = NULL ;
2008-01-21 15:52:21 +00:00
size_t tries = 1 ;
2006-12-26 16:17:02 +00:00
size_t idx ;
if ( ! s )
2008-01-21 15:52:21 +00:00
return CL_ENULLARG ;
2006-12-26 16:17:02 +00:00
do {
PROFILE_CALC_HASH ( s ) ;
2008-01-21 15:52:21 +00:00
idx = hash ( ( const unsigned char * ) key , len , s - > capacity ) ;
2006-12-26 16:17:02 +00:00
element = & s - > htable [ idx ] ;
do {
if ( ! element - > key ) {
2008-01-21 15:52:21 +00:00
char * thekey ;
2006-12-26 16:17:02 +00:00
/* element not found, place is empty, insert*/
if ( deleted_element ) {
/* reuse deleted elements*/
element = deleted_element ;
PROFILE_DELETED_REUSE ( s , tries ) ;
}
else {
PROFILE_INSERT ( s , tries ) ;
}
thekey = cli_malloc ( len + 1 ) ;
if ( ! thekey )
return CL_EMEM ;
2008-01-23 20:19:14 +00:00
strncpy ( thekey , key , len + 1 ) ;
2006-12-26 16:17:02 +00:00
element - > key = thekey ;
element - > data = data ;
2008-01-23 20:19:14 +00:00
element - > len = len ;
2008-01-21 15:52:21 +00:00
s - > used + + ;
2006-12-26 16:17:02 +00:00
if ( s - > used > s - > maxfill ) {
2007-03-11 11:14:35 +00:00
cli_dbgmsg ( " hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld \n " , ( void * ) s , s - > capacity ) ;
2006-12-26 16:17:02 +00:00
hashtab_grow ( s ) ;
}
return 0 ;
}
else if ( element - > key = = DELETED_KEY ) {
deleted_element = element ;
}
2008-01-23 20:19:14 +00:00
else if ( len = = element - > len & & strncmp ( key , element - > key , len ) = = 0 ) {
2006-12-26 16:17:02 +00:00
PROFILE_DATA_UPDATE ( s , tries ) ;
element - > data = data ; /* key found, update */
2008-01-23 20:19:14 +00:00
return 0 ;
2006-12-26 16:17:02 +00:00
}
else {
idx = ( idx + tries + + ) % s - > capacity ;
element = & s - > htable [ idx ] ;
}
} while ( tries < = s - > capacity ) ;
/* no free place found*/
PROFILE_HASH_EXHAUSTED ( s ) ;
2007-03-11 11:14:35 +00:00
cli_dbgmsg ( " hashtab.c: Growing hashtable %p, because its full, old size:%ld. \n " , ( void * ) s , s - > capacity ) ;
2006-12-26 16:17:02 +00:00
} while ( hashtab_grow ( s ) > = 0 ) ;
cli_warnmsg ( " hashtab.c: Unable to grow hashtable \n " ) ;
return CL_EMEM ;
}
2008-01-21 15:52:21 +00:00
void hashtab_delete ( struct hashtable * s , const char * key , const size_t len )
2006-12-26 16:17:02 +00:00
{
struct element * e = hashtab_find ( s , key , len ) ;
2008-01-21 15:52:21 +00:00
if ( e & & e - > key ) {
2006-12-26 16:17:02 +00:00
PROFILE_HASH_DELETE ( s ) ;
2008-01-21 14:29:49 +00:00
free ( ( void * ) e - > key ) ;
2006-12-26 16:17:02 +00:00
e - > key = DELETED_KEY ;
s - > used - - ;
}
}
void hashtab_clear ( struct hashtable * s )
{
size_t i ;
PROFILE_HASH_CLEAR ( s ) ;
for ( i = 0 ; i < s - > capacity ; i + + ) {
if ( s - > htable [ i ] . key & & s - > htable [ i ] . key ! = DELETED_KEY )
2008-01-21 14:29:49 +00:00
free ( ( void * ) s - > htable [ i ] . key ) ;
2006-12-26 16:17:02 +00:00
}
memset ( s - > htable , 0 , s - > capacity ) ;
s - > used = 0 ;
}
int hashtab_store ( const struct hashtable * s , FILE * out )
{
size_t i ;
for ( i = 0 ; i < s - > capacity ; i + + ) {
const struct element * e = & s - > htable [ i ] ;
if ( e - > key & & e - > key ! = DELETED_KEY ) {
fprintf ( out , " %ld %s \n " , e - > data , e - > key ) ;
}
}
return CL_SUCCESS ;
}
int hashtab_generate_c ( const struct hashtable * s , const char * name )
{
size_t i ;
printf ( " /* TODO: include GPL headers */ \n " ) ;
printf ( " #include <hashtab.h> \n " ) ;
printf ( " static struct element %s_elements[] = { \n " , name ) ;
for ( i = 0 ; i < s - > capacity ; i + + ) {
const struct element * e = & s - > htable [ i ] ;
if ( ! e - > key )
2008-01-23 20:19:14 +00:00
printf ( " \t {NULL,0,0}, \n " ) ;
2006-12-26 16:17:02 +00:00
else if ( e - > key = = DELETED_KEY )
2008-01-23 20:19:14 +00:00
printf ( " \t {DELETED_KEY,0,0}, \n " ) ;
2006-12-26 16:17:02 +00:00
else
2008-01-23 20:19:14 +00:00
printf ( " \t { \" %s \" , %ld, %ld}, \n " , e - > key , e - > data , e - > len ) ;
2006-12-26 16:17:02 +00:00
}
printf ( " }; \n " ) ;
printf ( " const struct hashtable %s = { \n " , name ) ;
printf ( " \t %s_elements, %ld, %ld, %ld " , name , s - > capacity , s - > used , s - > maxfill ) ;
printf ( " \n }; \n " ) ;
PROFILE_REPORT ( s ) ;
return 0 ;
}
int hashtab_load ( FILE * in , struct hashtable * s )
{
char line [ 1024 ] ;
while ( fgets ( line , sizeof ( line ) , in ) ) {
2008-01-21 15:52:21 +00:00
char l [ 1024 ] ;
2006-12-26 16:17:02 +00:00
int val ;
sscanf ( line , " %d %1023s " , & val , l ) ;
2008-01-21 15:52:21 +00:00
hashtab_insert ( s , l , strlen ( l ) , val ) ;
2006-12-26 16:17:02 +00:00
}
return CL_SUCCESS ;
}