| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *  Javascript normalizer. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Copyright (C) 2008 Sourcefire, Inc. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Authors: Török Edvin | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  This program is free software; you can redistribute it and/or modify | 
					
						
							|  |  |  |  *  it under the terms of the GNU General Public License version 2 as | 
					
						
							|  |  |  |  *  published by the Free Software Foundation. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  *  GNU General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  *  along with this program; if not, write to the Free Software | 
					
						
							|  |  |  |  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | 
					
						
							|  |  |  |  *  MA 02110-1301, USA. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | #ifdef HAVE_CONFIG_H
 | 
					
						
							|  |  |  | #include "clamav-config.h"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							| 
									
										
										
										
											2008-07-10 11:10:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef HAVE_UNISTD_H
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | #include <unistd.h>
 | 
					
						
							| 
									
										
										
										
											2008-07-10 11:10:54 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | #include <fcntl.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include <ctype.h>
 | 
					
						
							|  |  |  | #include <assert.h>
 | 
					
						
							| 
									
										
										
										
											2008-07-10 11:10:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include "cltypes.h"
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | #include "lexglobal.h"
 | 
					
						
							|  |  |  | #include "hashtab.h"
 | 
					
						
							|  |  |  | #include "others.h"
 | 
					
						
							| 
									
										
										
										
											2008-08-24 21:25:42 +00:00
										 |  |  | #include "str.h"
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | #include "js-norm.h"
 | 
					
						
							|  |  |  | #include "jsparse/generated/operators.h"
 | 
					
						
							|  |  |  | #include "jsparse/generated/keywords.h"
 | 
					
						
							| 
									
										
										
										
											2008-07-08 19:02:15 +00:00
										 |  |  | #include "jsparse/textbuf.h"
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* ----------- tokenizer ---------------- */ | 
					
						
							|  |  |  | enum tokenizer_state { | 
					
						
							|  |  |  | 	Initial, | 
					
						
							|  |  |  | 	MultilineComment, | 
					
						
							|  |  |  | 	SinglelineComment, | 
					
						
							|  |  |  | 	Number, | 
					
						
							|  |  |  | 	DoubleQString, | 
					
						
							|  |  |  | 	SingleQString, | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 	Identifier, | 
					
						
							|  |  |  | 	Dummy | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct scanner { | 
					
						
							|  |  |  | 	struct text_buffer buf; | 
					
						
							|  |  |  | 	const char *yytext; | 
					
						
							|  |  |  | 	size_t yylen; | 
					
						
							|  |  |  | 	const char *in; | 
					
						
							|  |  |  | 	size_t insize; | 
					
						
							|  |  |  | 	size_t pos; | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 	size_t lastpos; | 
					
						
							|  |  |  | 	enum tokenizer_state state; | 
					
						
							|  |  |  | 	enum tokenizer_state last_state; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } *yyscan_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef int YY_BUFFER_STATE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yylex( YYSTYPE *lvalp, yyscan_t  ); | 
					
						
							|  |  |  | static void yy_delete_buffer( YY_BUFFER_STATE, yyscan_t); | 
					
						
							|  |  |  | static YY_BUFFER_STATE yy_scan_bytes( const char *, size_t, yyscan_t scanner ); | 
					
						
							|  |  |  | static const char *yyget_text ( yyscan_t scanner ); | 
					
						
							|  |  |  | static int yyget_leng ( yyscan_t scanner ); | 
					
						
							|  |  |  | static int yylex_init ( yyscan_t * ptr_yy_globals ) ; | 
					
						
							|  |  |  | static void yyset_debug (int debug_flag ,yyscan_t yyscanner ); | 
					
						
							|  |  |  | static int yylex_destroy ( yyscan_t yyscanner ) ; | 
					
						
							|  |  |  | /* ----------- tokenizer end ---------------- */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum fsm_state { | 
					
						
							|  |  |  | 	Base, | 
					
						
							|  |  |  | 	InsideVar, | 
					
						
							|  |  |  | 	InsideInitializer, | 
					
						
							|  |  |  | 	WaitFunctionName, | 
					
						
							|  |  |  | 	WaitParameterList, | 
					
						
							|  |  |  | 	InsideFunctionDecl | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct scope { | 
					
						
							|  |  |  | 	struct hashtable id_map; | 
					
						
							|  |  |  | 	struct scope *parent;/* hierarchy */ | 
					
						
							|  |  |  | 	struct scope *nxt;/* all scopes kept in a list so we can easily free all of them */ | 
					
						
							|  |  |  | 	enum fsm_state fsm_state; | 
					
						
							|  |  |  | 	int  last_token; | 
					
						
							|  |  |  | 	unsigned int brackets; | 
					
						
							|  |  |  | 	unsigned int blocks; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct tokens { | 
					
						
							|  |  |  | 	yystype *data; | 
					
						
							|  |  |  | 	size_t   cnt; | 
					
						
							|  |  |  | 	size_t   capacity; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* state for the current JS file being parsed */ | 
					
						
							|  |  |  | struct parser_state { | 
					
						
							|  |  |  | 	unsigned long     var_uniq; | 
					
						
							|  |  |  | 	unsigned long     syntax_errors; | 
					
						
							| 
									
										
										
										
											2008-07-29 16:52:48 +00:00
										 |  |  | 	unsigned int      rec; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	struct scope *global; | 
					
						
							|  |  |  | 	struct scope *current; | 
					
						
							|  |  |  | 	struct scope *list; | 
					
						
							|  |  |  | 	yyscan_t scanner; | 
					
						
							|  |  |  | 	struct tokens tokens; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct scope* scope_new(struct parser_state *state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct scope *parent = state->current; | 
					
						
							|  |  |  | 	struct scope *s = cli_calloc(1, sizeof(*s)); | 
					
						
							|  |  |  | 	if(!s) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	if(hashtab_init(&s->id_map, 10) < 0) { | 
					
						
							|  |  |  | 		free(s); | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	s->parent = parent; | 
					
						
							|  |  |  | 	s->fsm_state = Base; | 
					
						
							|  |  |  | 	s->nxt = state->list; | 
					
						
							|  |  |  | 	state->list = s; | 
					
						
							|  |  |  | 	state->current = s; | 
					
						
							|  |  |  | 	return s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct scope* scope_done(struct scope *s) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct scope* parent = s->parent; | 
					
						
							|  |  |  | 	/* TODO: have a hashtab_destroy */ | 
					
						
							|  |  |  | 	hashtab_clear(&s->id_map); | 
					
						
							|  |  |  | 	free(s->id_map.htable); | 
					
						
							|  |  |  | 	free(s); | 
					
						
							|  |  |  | 	return parent; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* transitions:
 | 
					
						
							|  |  |  |  *   Base --(VAR)--> InsideVar | 
					
						
							|  |  |  |  *   InsideVar --(Identifier)-->InsideInitializer | 
					
						
							|  |  |  |  *   InsideVar --(anything_else) --> POP (to Base) | 
					
						
							|  |  |  |  *   InsideInitializer --(COMMA)--> POP (to InsideVar) | 
					
						
							|  |  |  |  *   InsideInitializer | InsideVar --(SEMICOLON) --> POP (to Base) | 
					
						
							|  |  |  |  *   InsideInitializer --(BRACKET_OPEN) --> WaitBrClose | 
					
						
							|  |  |  |  *   InsideInitializer --(PAR_OPEN) --> WaitParClose | 
					
						
							|  |  |  |  *   WaitBrClose --(BRACKET_OPEN) --> increase depth | 
					
						
							|  |  |  |  *   WaitBrClose --(BRACKET_CLOSE) --> POP | 
					
						
							|  |  |  |  *   WaitParClose --(PAR_CLOSE) --> POP | 
					
						
							|  |  |  |  *   WaitParClose --(PAR_OPEN) --> increase depth | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Base --(VAR)--> PUSH, to InsideVar
 | 
					
						
							|  |  |  |  * InsideVar --(Identifier)--> InsideInitializer | 
					
						
							|  |  |  |  * InsideVar --(ELSE)--> POP, inc. syntax_errors | 
					
						
							|  |  |  |  * InsideInitializer --(COMMA)--> POP (to InsideVar) | 
					
						
							|  |  |  |  * --(BRACKET_OPEN)--> inc bracket_counter | 
					
						
							|  |  |  |  * --(PAR_OPEN)--> inc par_counter | 
					
						
							|  |  |  |  * --(BRACKET_CLOSE) --> dec bracket_counter | 
					
						
							|  |  |  |  * --(PAR_CLOSE)--> dec par_counter | 
					
						
							|  |  |  |  * --(VAR)--> PUSH, to InsideVar (if bracket_counter != 0 || par_counter != 0) | 
					
						
							|  |  |  |  *        --> POP, to InsideVar, inc. syntax_errors (if bracket_counter == 0  && par_counter == 0) | 
					
						
							|  |  |  |  *  POP only allowed if bracket_counter == 0 && par_counter == 0  | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * InsideInitializer acts differently, make it only a flag | 
					
						
							|  |  |  |  * .................... | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Pushing, Poping is done when entering / exiting function scopes, | 
					
						
							|  |  |  |  * tracking { and function ( is done by the function scope tracker too. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * we only need to track brackets. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * var x = document; | 
					
						
							|  |  |  |  * x.writeln(...); | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * ^we must not normalize member method names | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Variables are declared at function scope, and their initial value is | 
					
						
							|  |  |  |  * undefined. At the point where the initializer is, and from there on the value | 
					
						
							|  |  |  |  * is defined. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * { doesn't introduce a new variable scope, they are in function's scope too | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * function foo() { | 
					
						
							|  |  |  |  *  alert(x); -> x exists, undefined | 
					
						
							|  |  |  |  *  var x=5;  | 
					
						
							|  |  |  |  *  alert(x); -> x exists, =5 | 
					
						
							|  |  |  |  * } | 
					
						
							|  |  |  |  *  | 
					
						
							|  |  |  |  * vs. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * function bar() { | 
					
						
							|  |  |  |  *   alert(x);//error, x not declared
 | 
					
						
							|  |  |  |  *   x=5; | 
					
						
							|  |  |  |  *   } | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * vs. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * but we can declare variables without var, only valid if we use them after | 
					
						
							|  |  |  |  * assigning. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * function foobar() { | 
					
						
							|  |  |  |  *   x=5; | 
					
						
							|  |  |  |  *   alert(x);//x is defined, value is 5
 | 
					
						
							|  |  |  |  *   } | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * other examples: | 
					
						
							|  |  |  |  * function foo2() { | 
					
						
							|  |  |  |  *   alert(x); -> x exists, undefined | 
					
						
							|  |  |  |  *   { | 
					
						
							|  |  |  |  *       var x=5; -> x equals to 5 | 
					
						
							|  |  |  |  *   } | 
					
						
							|  |  |  |  *   alert(x); -> x is 5 | 
					
						
							|  |  |  |  * } | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * function foo3() { | 
					
						
							|  |  |  |  *   var x=4; -> x exists, equals to 4 | 
					
						
							|  |  |  |  *   alert(x); -> x exists, equals to 4 | 
					
						
							|  |  |  |  *   { | 
					
						
							|  |  |  |  *       var x=5; -> x equals to 5 | 
					
						
							|  |  |  |  *   } | 
					
						
							|  |  |  |  *   alert(x); -> x is 5 | 
					
						
							|  |  |  |  * } | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * function bar3() { | 
					
						
							|  |  |  |  *   //same as foo3
 | 
					
						
							|  |  |  |  *   var x=4; | 
					
						
							|  |  |  |  *   alert(x); | 
					
						
							|  |  |  |  *   {  | 
					
						
							|  |  |  |  *        x=5; | 
					
						
							|  |  |  |  *   } | 
					
						
							|  |  |  |  *   alert(x); | 
					
						
							|  |  |  |  * } | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const char* scope_declare(struct scope *s, const char *token, const size_t len, struct parser_state *state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const struct element *el = hashtab_insert(&s->id_map, token, len, state->var_uniq++); | 
					
						
							|  |  |  | 	/* hashtab_insert either finds an already existing entry, or allocates a
 | 
					
						
							|  |  |  | 	 * new one, we return the allocated string */ | 
					
						
							|  |  |  | 	return el ? el->key : NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const char* scope_use(struct scope *s, const char *token, const size_t len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const struct element *el = hashtab_find(&s->id_map, token, len); | 
					
						
							|  |  |  | 	if(el) { | 
					
						
							|  |  |  | 		/* identifier already found in current scope,
 | 
					
						
							|  |  |  | 		 * return here to avoid overwriting uniq id */ | 
					
						
							|  |  |  | 		return el->key; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	/* identifier not yet in current scope's hashtab, add with ID -1.
 | 
					
						
							|  |  |  | 	 * Later if we find a declaration it will automatically assign a uniq ID | 
					
						
							|  |  |  | 	 * to it. If not, we'll know that we have to push ID == -1 tokens to an | 
					
						
							|  |  |  | 	 * outer scope.*/ | 
					
						
							|  |  |  | 	el = hashtab_insert(&s->id_map, token, len, -1); | 
					
						
							|  |  |  | 	return el ? el->key : NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static long scope_lookup(struct scope *s, const char *token, const size_t len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	while(s) { | 
					
						
							|  |  |  | 		const struct element *el = hashtab_find(&s->id_map, token, len); | 
					
						
							|  |  |  | 		if(el && el->data != -1) { | 
					
						
							|  |  |  | 			return el->data; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		/* not found in current scope, try in outer scope */ | 
					
						
							|  |  |  | 		s = s->parent; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int tokens_ensure_capacity(struct tokens *tokens, size_t cap) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(tokens->capacity < cap) { | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 		cap += 1024; | 
					
						
							|  |  |  | 		tokens->data = cli_realloc(tokens->data, cap * sizeof(*tokens->data)); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 		if(!tokens->data) | 
					
						
							|  |  |  | 			return CL_EMEM; | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 		tokens->capacity = cap; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int add_token(struct parser_state *state, const yystype *token) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	if(tokens_ensure_capacity(&state->tokens, state->tokens.cnt + 1) < 0) | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 		return -1; | 
					
						
							|  |  |  | 	state->tokens.data[state->tokens.cnt++] = *token; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct buf { | 
					
						
							|  |  |  | 	size_t pos; | 
					
						
							|  |  |  | 	int outfd; | 
					
						
							|  |  |  | 	char buf[65536]; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int buf_outc(char c, struct buf *buf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(buf->pos >= sizeof(buf->buf)) { | 
					
						
							|  |  |  | 		if(write(buf->outfd, buf->buf, sizeof(buf->buf)) != sizeof(buf->buf)) | 
					
						
							|  |  |  | 			return CL_EIO; | 
					
						
							|  |  |  | 		buf->pos = 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	buf->buf[buf->pos++] = c; | 
					
						
							|  |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int buf_outs(const char *s, struct buf *buf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const size_t buf_len = sizeof(buf->buf); | 
					
						
							| 
									
										
										
										
											2008-08-01 14:50:45 +00:00
										 |  |  | 	size_t i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	i = buf->pos; | 
					
						
							|  |  |  | 	while(*s) { | 
					
						
							|  |  |  | 		while(i < buf_len && *s) { | 
					
						
							| 
									
										
										
										
											2008-09-23 20:52:22 +00:00
										 |  |  | 			if(isspace(*s)) | 
					
						
							|  |  |  | 				buf->buf[i++] = ' '; | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 				buf->buf[i++] = tolower((unsigned char)(*s)); | 
					
						
							|  |  |  | 			++s; | 
					
						
							| 
									
										
										
										
											2008-08-01 14:50:45 +00:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		if(i == buf_len) { | 
					
						
							|  |  |  | 			if(write(buf->outfd, buf->buf, buf_len) < 0) | 
					
						
							|  |  |  | 				return CL_EIO; | 
					
						
							|  |  |  | 		       i = 0; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-08-01 14:50:45 +00:00
										 |  |  | 	buf->pos = i; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void output_space(char last, char current, struct buf *out) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(isalnum(last) && isalnum(current)) | 
					
						
							|  |  |  | 		buf_outc(' ', out); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* return class of last character */ | 
					
						
							|  |  |  | static char output_token(const yystype *token, struct scope *scope, struct buf *out, char lastchar) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	char sbuf[128]; | 
					
						
							|  |  |  | 	const char *s = TOKEN_GET(token, cstring); | 
					
						
							|  |  |  | 	/* TODO: use a local buffer, instead of FILE* */ | 
					
						
							|  |  |  | 	switch(token->type) { | 
					
						
							|  |  |  | 		case TOK_StringLiteral: | 
					
						
							|  |  |  | 			output_space(lastchar,'"', out); | 
					
						
							|  |  |  | 			buf_outc('"', out); | 
					
						
							|  |  |  | 			if(s) { | 
					
						
							|  |  |  | 				buf_outs(s, out); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			buf_outc('"', out); | 
					
						
							|  |  |  | 			return '\"'; | 
					
						
							|  |  |  | 		case TOK_NumericInt: | 
					
						
							|  |  |  | 			output_space(lastchar,'0', out); | 
					
						
							|  |  |  | 			snprintf(sbuf, sizeof(sbuf), "%ld", TOKEN_GET(token, ival)); | 
					
						
							|  |  |  | 			buf_outs(sbuf, out); | 
					
						
							|  |  |  | 			return '0'; | 
					
						
							|  |  |  | 		case TOK_NumericFloat: | 
					
						
							|  |  |  | 			output_space(lastchar,'0', out); | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 			snprintf(sbuf, sizeof(sbuf), "%g", TOKEN_GET(token, dval)); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 			buf_outs(sbuf, out); | 
					
						
							|  |  |  | 			return '0'; | 
					
						
							|  |  |  | 		case TOK_IDENTIFIER_NAME: | 
					
						
							|  |  |  | 			output_space(lastchar,'a', out); | 
					
						
							|  |  |  | 			if(s) { | 
					
						
							|  |  |  | 				long id = scope_lookup(scope, s, strlen(s)); | 
					
						
							|  |  |  | 				if(id == -1) { | 
					
						
							|  |  |  | 					/* identifier not normalized */ | 
					
						
							|  |  |  | 					buf_outs(s, out); | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					snprintf(sbuf, sizeof(sbuf), "n%03ld",id); | 
					
						
							|  |  |  | 					buf_outs(sbuf, out); | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return 'a'; | 
					
						
							|  |  |  | 		case TOK_FUNCTION: | 
					
						
							|  |  |  | 			output_space(lastchar,'a', out); | 
					
						
							|  |  |  | 			buf_outs("function",out); | 
					
						
							|  |  |  | 			return 'a'; | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			if(s) { | 
					
						
							|  |  |  | 				const size_t len = strlen(s); | 
					
						
							|  |  |  | 				output_space(lastchar,s[0], out); | 
					
						
							|  |  |  | 				buf_outs(s, out); | 
					
						
							|  |  |  | 				return len ? s[len-1] : '\0'; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return '\0'; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * We can't delete the scope as soon as we see a }, because | 
					
						
							|  |  |  |  * we still need the hashmap from it. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * If we would normalize all the identifiers, and output when a scope is closed, | 
					
						
							|  |  |  |  * then it would be impossible to normalize calls to other functions. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * So we need to keep all scopes in memory, to do this instead of scope_done, we | 
					
						
							|  |  |  |  * simply just set current = current->parent when a scope is closed. | 
					
						
							|  |  |  |  * We keep a list of all scopes created in parser_state-> When we parsed | 
					
						
							|  |  |  |  * everything, we output everything, and then delete all scopes. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * We also need to know where to switch scopes on the second pass, so for | 
					
						
							|  |  |  |  * TOK_FUNCTION types we will use another pointer, that points to the scope | 
					
						
							|  |  |  |  * (added to yystype's union). | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * We lookup the identifier in the scope (using scope_lookup, it looks in parent | 
					
						
							|  |  |  |  * scopes too), if ID is found then output (n%3d, Id), | 
					
						
							|  |  |  |  * otherwise output the identifier as is. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * To make  it easier to match sigs, we do a xfrm :  | 
					
						
							|  |  |  |  * 'function ID1 (..'. => 'n%3d = function (...' | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * we'll add all identifier to the scope's map | 
					
						
							|  |  |  |  * those that are not decl. will have initial ID -1 | 
					
						
							|  |  |  |  * if we later see a decl for it in same scope, it'll automatically get a | 
					
						
							|  |  |  |  * correct ID. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * When parsing of local scope is done, we take any ID -1 identifiers, | 
					
						
							|  |  |  |  * and push them up one level (careful not to overwrite existing IDs). | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * it would be nice if the tokens would contain a link to the entry in the | 
					
						
							|  |  |  |  * hashtab, a link that automatically gets updated when the element is moved | 
					
						
							|  |  |  |  * (pushed up). This would prevent subsequent lookups in the map, | 
					
						
							|  |  |  |  * when we want to output the tokens. | 
					
						
							|  |  |  |  * There is no easy way to do that, so we just do another lookup | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * This actually works, redefining foo: | 
					
						
							|  |  |  |  * function foo() { | 
					
						
							|  |  |  |  *   var foo=5; alert(foo); | 
					
						
							|  |  |  |  * } | 
					
						
							|  |  |  |  * So we can't treat function names just as any other identifier? | 
					
						
							|  |  |  |  * We can, because you can no longer call foo, if you redefined it as a var. | 
					
						
							|  |  |  |  * So if we rename both foo-s with same name, it will have same behaviour. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This means that a new scope should begin after function, and not after | 
					
						
							|  |  |  |  * function ... (. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void scope_free_all(struct scope *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct scope *nxt; | 
					
						
							|  |  |  | 	do { | 
					
						
							|  |  |  | 		nxt = p->nxt; | 
					
						
							|  |  |  | 		scope_done(p); | 
					
						
							|  |  |  | 		p = nxt; | 
					
						
							|  |  |  | 	} while(p); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens); | 
					
						
							|  |  |  | static int match_parameters(const yystype *tokens, const char ** param_names, size_t count) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	size_t i,j=0; | 
					
						
							|  |  |  | 	if(tokens[0].type != TOK_PAR_OPEN) | 
					
						
							|  |  |  | 		return -1; | 
					
						
							|  |  |  | 	i=1; | 
					
						
							|  |  |  | 	while(count--) { | 
					
						
							|  |  |  | 		const char *token_val = TOKEN_GET(&tokens[i], cstring); | 
					
						
							|  |  |  | 		if(tokens[i].type != TOK_IDENTIFIER_NAME || | 
					
						
							|  |  |  | 		   !token_val || | 
					
						
							|  |  |  | 		   strcmp(token_val, param_names[j++])) | 
					
						
							|  |  |  | 			return -1; | 
					
						
							|  |  |  | 		++i; | 
					
						
							|  |  |  | 		if((count && tokens[i].type != TOK_COMMA) | 
					
						
							|  |  |  | 		   || (!count && tokens[i].type != TOK_PAR_CLOSE)) | 
					
						
							|  |  |  | 			return -1; | 
					
						
							|  |  |  | 		++i; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const char *de_packer_3[] = {"p","a","c","k","e","r"}; | 
					
						
							|  |  |  | static const char *de_packer_2[] = {"p","a","c","k","e","d"}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef MAX
 | 
					
						
							|  |  |  | #define MAX(a, b) ((a)>(b) ? (a) : (b))
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline char *textbuffer_done(yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* free unusued memory */ | 
					
						
							|  |  |  | 	char *str = cli_realloc(scanner->buf.data, scanner->buf.pos); | 
					
						
							|  |  |  | 	if(!str) { | 
					
						
							|  |  |  | 		str = scanner->buf.data; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-07-16 08:44:37 +00:00
										 |  |  | 	scanner->yytext = str; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	scanner->yylen = scanner->buf.pos - 1; | 
					
						
							|  |  |  | 	memset(&scanner->buf, 0, sizeof(scanner->buf)); | 
					
						
							|  |  |  | 	return str; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MODULE "JS-Norm: "
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void free_token(yystype *token) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(token->vtype == vtype_string) { | 
					
						
							|  |  |  | 		free(token->val.string); | 
					
						
							|  |  |  | 		token->val.string = NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int replace_token_range(struct tokens *dst, size_t start, size_t end, const struct tokens *with) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const size_t len = with ? with->cnt : 0; | 
					
						
							|  |  |  | 	size_t i; | 
					
						
							|  |  |  | 	cli_dbgmsg(MODULE "Replacing tokens %lu - %lu with %lu tokens\n",start, end, len); | 
					
						
							|  |  |  | 	if(start >= dst->cnt || end > dst->cnt) | 
					
						
							|  |  |  | 		return -1; | 
					
						
							|  |  |  | 	for(i=start;i<end;i++) { | 
					
						
							|  |  |  | 		free_token(&dst->data[i]); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if(tokens_ensure_capacity(dst, dst->cnt - (end-start) + len) < 0) | 
					
						
							|  |  |  | 		return CL_EMEM; | 
					
						
							|  |  |  | 	memmove(&dst->data[start+len], &dst->data[end], (dst->cnt - end) * sizeof(dst->data[0])); | 
					
						
							|  |  |  | 	if(with && len > 0) { | 
					
						
							|  |  |  | 		memcpy(&dst->data[start], with->data, len * sizeof(dst->data[0])); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	dst->cnt = dst->cnt - (end-start) + len; | 
					
						
							|  |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int append_tokens(struct tokens *dst, const struct tokens *src) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(!dst || !src) | 
					
						
							|  |  |  | 		return CL_ENULLARG; | 
					
						
							|  |  |  | 	if(tokens_ensure_capacity(dst, dst->cnt + src->cnt) == -1) | 
					
						
							|  |  |  | 		return CL_EMEM; | 
					
						
							|  |  |  | 	cli_dbgmsg(MODULE "Appending %lu tokens\n", src->cnt); | 
					
						
							|  |  |  | 	memcpy(&dst->data[dst->cnt], src->data, src->cnt * sizeof(dst->data[0])); | 
					
						
							|  |  |  | 	dst->cnt += src->cnt; | 
					
						
							|  |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void decode_de(yystype *params[], struct text_buffer *txtbuf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const char *p = TOKEN_GET(params[0], cstring); | 
					
						
							|  |  |  | 	const long a = TOKEN_GET(params[1], ival); | 
					
						
							|  |  |  | 	/*const char *c = params[2];*/ | 
					
						
							|  |  |  | 	char *k = TOKEN_GET(params[3], string); | 
					
						
							|  |  |  | 	/*const char *r = params[5];*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	unsigned val=0; | 
					
						
							|  |  |  | 	unsigned nsplit = 0; | 
					
						
							|  |  |  | 	const char* o; | 
					
						
							|  |  |  | 	const char **tokens; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	memset(txtbuf, 0, sizeof(*txtbuf)); | 
					
						
							|  |  |  | 	if(!p || !k ) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	for(o = k; *o; o++) if(*o == '|') nsplit++; | 
					
						
							|  |  |  | 	nsplit++; | 
					
						
							|  |  |  | 	tokens = malloc(sizeof(char*)*nsplit); | 
					
						
							|  |  |  | 	if(!tokens) { | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	cli_strtokenize(k,'|',nsplit, tokens); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	do { | 
					
						
							|  |  |  | 		while(*p && !isalnum(*p)) { | 
					
						
							|  |  |  | 			if(*p=='\\' && (p[1] == '\'' || p[1] == '\"')) | 
					
						
							|  |  |  | 				p++; | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 				textbuffer_putc(txtbuf, *p++); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if(!*p) break; | 
					
						
							|  |  |  | 		val = 0; | 
					
						
							|  |  |  | 		o = p; | 
					
						
							|  |  |  | 		while(*p && isalnum(*p)) { | 
					
						
							|  |  |  | 			unsigned x; | 
					
						
							|  |  |  | 			unsigned char v = *p++; | 
					
						
							|  |  |  | 			/* TODO: use a table here */ | 
					
						
							|  |  |  | 			if(v >= 'a') x = 10+v-'a'; | 
					
						
							|  |  |  | 			else if(v >= 'A') x = 36+v-'A'; | 
					
						
							|  |  |  | 			else x = v-'0'; | 
					
						
							|  |  |  | 			val = val*a+x; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if(val >= nsplit || !tokens[val] || !tokens[val][0]) | 
					
						
							|  |  |  | 			while(o!=p) | 
					
						
							|  |  |  | 				textbuffer_putc(txtbuf, *o++); | 
					
						
							|  |  |  | 		else	textbuffer_append(txtbuf, tokens[val]); | 
					
						
							|  |  |  | 	} while (*p); | 
					
						
							|  |  |  | 	free(tokens); | 
					
						
							|  |  |  | 	textbuffer_append(txtbuf, "\0"); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct decode_result { | 
					
						
							|  |  |  | 	struct text_buffer txtbuf; | 
					
						
							|  |  |  | 	size_t pos_begin; | 
					
						
							|  |  |  | 	size_t pos_end; | 
					
						
							|  |  |  |         unsigned append:1; /* 0: tokens are replaced with new token(s),
 | 
					
						
							|  |  |  |                             1: old tokens are deleted, new ones appended at the end */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void handle_de(yystype *tokens, size_t start, const size_t cnt, const char *name, struct decode_result *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* find function decl. end */ | 
					
						
							|  |  |  | 	size_t i, nesting = 1, j; | 
					
						
							|  |  |  | 	yystype* parameters [6]; | 
					
						
							|  |  |  | 	const size_t parameters_cnt = 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for(i=start;i < cnt; i++) { | 
					
						
							|  |  |  | 		if(tokens[i].type == TOK_FUNCTION) { | 
					
						
							|  |  |  | 			if(TOKEN_GET(&tokens[i], scope)) | 
					
						
							|  |  |  | 				nesting++; | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 				nesting--; | 
					
						
							|  |  |  | 			if(!nesting) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if(nesting) | 
					
						
							|  |  |  | 		return; | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	memset(parameters, 0, sizeof(parameters)); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	if(name) { | 
					
						
							|  |  |  | 		/* find call to function */ | 
					
						
							|  |  |  | 		for(;i+2 < cnt; i++) { | 
					
						
							|  |  |  | 			const char* token_val = TOKEN_GET(&tokens[i], cstring); | 
					
						
							|  |  |  | 			if(tokens[i].type == TOK_IDENTIFIER_NAME && | 
					
						
							|  |  |  | 			   token_val && | 
					
						
							|  |  |  | 			   !strcmp(name, token_val) && | 
					
						
							|  |  |  | 			   tokens[i+1].type == TOK_PAR_OPEN) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				i += 2; | 
					
						
							|  |  |  | 				for(j = 0;j < parameters_cnt && i < cnt;j++) { | 
					
						
							|  |  |  | 					parameters[j] = &tokens[i++]; | 
					
						
							|  |  |  | 					if(j != parameters_cnt-1) | 
					
						
							|  |  |  | 						while (tokens[i].type != TOK_COMMA && i < cnt) i++; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++; | 
					
						
							|  |  |  | 					i++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if(j == parameters_cnt) | 
					
						
							|  |  |  | 					decode_de(parameters, &res->txtbuf); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		while(i<cnt && tokens[i].type != TOK_PAR_OPEN) i++; | 
					
						
							|  |  |  | 		++i; | 
					
						
							|  |  |  | 		if(i >= cnt) return; | 
					
						
							|  |  |  | 		/* TODO: move this v to another func */ | 
					
						
							|  |  |  | 				for(j = 0;j < parameters_cnt && i < cnt;j++) { | 
					
						
							|  |  |  | 					parameters[j] = &tokens[i++]; | 
					
						
							|  |  |  | 					if(j != parameters_cnt-1) | 
					
						
							|  |  |  | 						while (tokens[i].type != TOK_COMMA && i < cnt) i++; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++; | 
					
						
							|  |  |  | 					i++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if(j == parameters_cnt) | 
					
						
							|  |  |  | 					decode_de(parameters, &res->txtbuf); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	if(parameters[0] && parameters[parameters_cnt-1]) { | 
					
						
							|  |  |  | 		res->pos_begin = parameters[0] - tokens; | 
					
						
							|  |  |  | 		res->pos_end = parameters[parameters_cnt-1] - tokens + 1; | 
					
						
							|  |  |  | 		if(tokens[res->pos_end].type == TOK_BRACKET_OPEN && | 
					
						
							|  |  |  | 				tokens[res->pos_end+1].type == TOK_BRACKET_CLOSE && | 
					
						
							|  |  |  | 				tokens[res->pos_end+2].type == TOK_PAR_CLOSE) | 
					
						
							|  |  |  | 			res->pos_end += 3; /* {}) */ | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			res->pos_end++; /* ) */ | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int handle_unescape(struct tokens *tokens, size_t start, const size_t cnt) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(tokens->data[start].type == TOK_StringLiteral) { | 
					
						
							|  |  |  | 		char *R; | 
					
						
							|  |  |  | 		struct tokens new_tokens; | 
					
						
							|  |  |  | 		yystype tok; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		R = cli_unescape(TOKEN_GET(&tokens->data[start], cstring)); | 
					
						
							|  |  |  | 		tok.type = TOK_StringLiteral; | 
					
						
							|  |  |  | 		TOKEN_SET(&tok, string, R); | 
					
						
							|  |  |  | 		new_tokens.capacity = new_tokens.cnt = 1; | 
					
						
							|  |  |  | 		new_tokens.data = &tok; | 
					
						
							|  |  |  | 		if(replace_token_range(tokens, start-2, start+2, &new_tokens) < 0) | 
					
						
							|  |  |  | 			return CL_EMEM; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* scriptasylum dot com's JS encoder */ | 
					
						
							|  |  |  | static void handle_df(const yystype *tokens, size_t start, const size_t cnt, struct decode_result *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	char *str, *s1; | 
					
						
							|  |  |  | 	size_t len, s1_len, i; | 
					
						
							|  |  |  | 	unsigned char clast; | 
					
						
							|  |  |  | 	char *R; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(tokens[start].type != TOK_StringLiteral) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	str = TOKEN_GET(&tokens[start], string); | 
					
						
							|  |  |  | 	if(!str) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	len = strlen(str); | 
					
						
							|  |  |  | 	clast = str[len-1] - '0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	str[len-1] = '\0'; | 
					
						
							|  |  |  | 	s1 = cli_unescape(str); | 
					
						
							|  |  |  | 	s1_len = strlen(s1); | 
					
						
							|  |  |  | 	for(i=0;i<s1_len;i++) { | 
					
						
							|  |  |  | 		s1[i] -= clast; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	R = cli_unescape(s1); | 
					
						
							|  |  |  | 	free(s1); | 
					
						
							|  |  |  | 	res->pos_begin = start-2; | 
					
						
							|  |  |  | 	res->pos_end = start+2; | 
					
						
							|  |  |  | 	res->txtbuf.data = R; | 
					
						
							|  |  |  | 	res->txtbuf.pos = strlen(R); | 
					
						
							|  |  |  | 	res->append = 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void handle_eval(struct tokens *tokens, size_t start, struct decode_result *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	res->txtbuf.data = TOKEN_GET(&tokens->data[start], string); | 
					
						
							|  |  |  | 	if(res->txtbuf.data && tokens->data[start+1].type == TOK_PAR_CLOSE) { | 
					
						
							|  |  |  | 		TOKEN_SET(&tokens->data[start], string, NULL); | 
					
						
							|  |  |  | 		res->txtbuf.pos = strlen(res->txtbuf.data); | 
					
						
							|  |  |  | 		res->pos_begin = start-2; | 
					
						
							|  |  |  | 		res->pos_end = start+2; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void run_folders(struct tokens *tokens) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   size_t i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(i = 0; i < tokens->cnt; i++) { | 
					
						
							|  |  |  | 	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring); | 
					
						
							|  |  |  | 	  if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME && | 
					
						
							|  |  |  | 		    cstring && | 
					
						
							|  |  |  | 		    !strcmp("unescape", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		  handle_unescape(tokens, i+2, tokens->cnt); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int state_update_scope(struct parser_state *state, const yystype *token) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(token->type == TOK_FUNCTION) { | 
					
						
							|  |  |  | 		struct scope *scope = TOKEN_GET(token, scope); | 
					
						
							|  |  |  | 		if(scope) { | 
					
						
							|  |  |  | 			state->current = scope; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else { | 
					
						
							|  |  |  | 			/* dummy token marking function end */ | 
					
						
							|  |  |  | 			if(state->current->parent) | 
					
						
							|  |  |  | 				state->current = state->current->parent; | 
					
						
							|  |  |  | 			/* don't output this token, it is just a dummy marker */ | 
					
						
							|  |  |  | 			return 0; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void run_decoders(struct parser_state *state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   size_t i; | 
					
						
							|  |  |  |   const char* name; | 
					
						
							|  |  |  |   struct tokens *tokens = &state->tokens; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(i = 0; i < tokens->cnt; i++) { | 
					
						
							|  |  |  | 	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring); | 
					
						
							|  |  |  | 	  struct decode_result res; | 
					
						
							|  |  |  | 	  res.pos_begin = res.pos_end = 0; | 
					
						
							|  |  |  | 	  res.append = 0; | 
					
						
							|  |  |  | 	  if(tokens->data[i].type == TOK_FUNCTION && i+13 < tokens->cnt) { | 
					
						
							|  |  |  | 		  name = NULL; | 
					
						
							|  |  |  | 		  ++i; | 
					
						
							|  |  |  | 		  if(tokens->data[i].type == TOK_IDENTIFIER_NAME) { | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 			  cstring = TOKEN_GET(&tokens->data[i], cstring); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 			  name = cstring; | 
					
						
							|  |  |  | 			  ++i; | 
					
						
							|  |  |  | 		  } | 
					
						
							|  |  |  | 		  if(match_parameters(&tokens->data[i], de_packer_3, sizeof(de_packer_3)/sizeof(de_packer_3[0])) != -1 | 
					
						
							|  |  |  | 		     || match_parameters(&tokens->data[i], de_packer_2, sizeof(de_packer_2)/sizeof(de_packer_2[0])) != -1)  { | 
					
						
							|  |  |  | 			  /* find function decl. end */ | 
					
						
							|  |  |  | 			  handle_de(tokens->data, i, tokens->cnt, name, &res); | 
					
						
							|  |  |  | 		  } | 
					
						
							|  |  |  | 	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME && | 
					
						
							|  |  |  | 		    cstring && | 
					
						
							|  |  |  | 		    !strcmp("dF", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) { | 
					
						
							|  |  |  | 		  /* TODO: also match signature of dF function (possibly
 | 
					
						
							|  |  |  | 		   * declared using unescape */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		  handle_df(tokens->data, i+2, tokens->cnt, &res); | 
					
						
							|  |  |  | 	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME && | 
					
						
							|  |  |  | 			  cstring && | 
					
						
							|  |  |  | 			  !strcmp("eval", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) { | 
					
						
							|  |  |  | 		  handle_eval(tokens, i+2, &res); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	if(res.pos_end > res.pos_begin) { | 
					
						
							|  |  |  | 		struct tokens parent_tokens; | 
					
						
							|  |  |  | 		if(res.pos_end < tokens->cnt && tokens->data[res.pos_end].type == TOK_SEMICOLON) | 
					
						
							|  |  |  | 			res.pos_end++; | 
					
						
							|  |  |  | 		parent_tokens = state->tokens;/* save current tokens */ | 
					
						
							|  |  |  | 		/* initialize embedded context */ | 
					
						
							|  |  |  | 		memset(&state->tokens, 0, sizeof(state->tokens)); | 
					
						
							| 
									
										
										
										
											2008-07-29 16:52:48 +00:00
										 |  |  | 		if(++state->rec > 16) | 
					
						
							| 
									
										
										
										
											2008-10-07 13:35:21 +00:00
										 |  |  | 			cli_dbgmsg(MODULE "recursion limit reached\n"); | 
					
						
							| 
									
										
										
										
											2008-07-29 16:52:48 +00:00
										 |  |  | 		else { | 
					
						
							|  |  |  | 			cli_js_process_buffer(state, res.txtbuf.data, res.txtbuf.pos); | 
					
						
							|  |  |  | 			--state->rec; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 		free(res.txtbuf.data); | 
					
						
							|  |  |  | 		/* state->tokens still refers to the embedded/nested context
 | 
					
						
							|  |  |  | 		 * here */ | 
					
						
							|  |  |  | 		if(!res.append) { | 
					
						
							|  |  |  | 			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, &state->tokens); | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			/* delete tokens */ | 
					
						
							|  |  |  | 			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, NULL); | 
					
						
							|  |  |  | 			append_tokens(&parent_tokens, &state->tokens); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		/* end of embedded context, restore tokens state */ | 
					
						
							|  |  |  | 		free(state->tokens.data); | 
					
						
							|  |  |  | 		state->tokens = parent_tokens; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	  state_update_scope(state, &state->tokens.data[i]); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void cli_js_parse_done(struct parser_state* state) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2008-10-11 10:27:27 +00:00
										 |  |  | 	struct tokens * tokens = &state->tokens; | 
					
						
							|  |  |  | 	size_t par_balance = 0, i; | 
					
						
							|  |  |  | 	char end = '\0'; | 
					
						
							|  |  |  | 	YYSTYPE val; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	cli_dbgmsg(MODULE "in cli_js_parse_done()\n"); | 
					
						
							| 
									
										
										
										
											2008-10-11 10:27:27 +00:00
										 |  |  | 	/* close unfinished token */ | 
					
						
							|  |  |  | 	switch (state->scanner->state) { | 
					
						
							|  |  |  | 		case DoubleQString: | 
					
						
							|  |  |  | 			end = '"'; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		case SingleQString: | 
					
						
							|  |  |  | 			end = '\''; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (end != '\0') | 
					
						
							|  |  |  | 		cli_js_process_buffer(state, &end, 1); | 
					
						
							|  |  |  | 	/* close remaining paranthesis */ | 
					
						
							|  |  |  | 	for (i=0;i<tokens->cnt;i++) { | 
					
						
							|  |  |  | 		if (tokens->data[i].type == TOK_PAR_OPEN) | 
					
						
							|  |  |  | 			par_balance++; | 
					
						
							|  |  |  | 		else if (tokens->data[i].type == TOK_PAR_CLOSE && par_balance > 0) | 
					
						
							|  |  |  | 			par_balance--; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (par_balance > 0) { | 
					
						
							|  |  |  | 		memset(&val, 0, sizeof(val)); | 
					
						
							|  |  |  | 		val.type = TOK_PAR_CLOSE; | 
					
						
							|  |  |  | 		TOKEN_SET(&val, cstring, ")"); | 
					
						
							|  |  |  | 		while (par_balance-- > 0) { | 
					
						
							|  |  |  | 			add_token(state, &val); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* we had to close unfinished strings, paranthesis,
 | 
					
						
							|  |  |  | 	 * so that the folders/decoders can run properly */ | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	run_folders(&state->tokens); | 
					
						
							|  |  |  | 	run_decoders(state); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	yylex_destroy(state->scanner); | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	state->scanner = NULL; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | void cli_js_output(struct parser_state *state, const char *tempdir) | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	unsigned i; | 
					
						
							|  |  |  | 	struct buf buf; | 
					
						
							|  |  |  | 	char lastchar = '\0'; | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	char filename[1024]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	snprintf(filename, 1024, "%s/javascript", tempdir); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	buf.pos = 0; | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	buf.outfd = open(filename, O_CREAT | O_WRONLY, 0600); | 
					
						
							|  |  |  | 	if(buf.outfd < 0) { | 
					
						
							|  |  |  | 		cli_errmsg(MODULE "cannot open output file for writing: %s\n", filename); | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	/* append to file */ | 
					
						
							|  |  |  | 	if(lseek(buf.outfd, 0, SEEK_END) != 0) { | 
					
						
							|  |  |  | 		/* separate multiple scripts with \n */ | 
					
						
							|  |  |  | 		buf_outc('\n', &buf); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-09-23 20:52:22 +00:00
										 |  |  | 	buf_outs("<script>", &buf); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	state->current = state->global; | 
					
						
							|  |  |  | 	for(i = 0; i < state->tokens.cnt; i++) { | 
					
						
							|  |  |  | 		if(state_update_scope(state, &state->tokens.data[i])) | 
					
						
							|  |  |  | 			lastchar = output_token(&state->tokens.data[i], state->current, &buf, lastchar); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-09-23 20:52:22 +00:00
										 |  |  | 	/* add /script if not already there */ | 
					
						
							|  |  |  | 	if(buf.pos < 9 || memcmp(buf.buf + buf.pos - 9, "</script>", 9)) | 
					
						
							|  |  |  | 		buf_outs("</script>", &buf); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	if(write(buf.outfd, buf.buf, buf.pos) < 0) { | 
					
						
							| 
									
										
										
										
											2008-10-07 13:35:21 +00:00
										 |  |  | 		cli_dbgmsg(MODULE "I/O error\n"); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	close(buf.outfd); | 
					
						
							|  |  |  | 	cli_dbgmsg(MODULE "dumped/appended normalized script to: %s\n",filename); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void cli_js_destroy(struct parser_state *state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	size_t i; | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	if(!state) | 
					
						
							|  |  |  | 		return; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	scope_free_all(state->list); | 
					
						
							|  |  |  | 	for(i=0;i<state->tokens.cnt;i++) { | 
					
						
							|  |  |  | 		free_token(&state->tokens.data[i]); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	free(state->tokens.data); | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	/* detect use after free */ | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	if(state->scanner) | 
					
						
							|  |  |  | 		yylex_destroy(state->scanner); | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	memset(state, 0x55, sizeof(*state)); | 
					
						
							|  |  |  | 	free(state); | 
					
						
							|  |  |  | 	cli_dbgmsg(MODULE "cli_js_destroy() done\n"); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* buffer is html-normlike "chunk", if original file is bigger than buffer,
 | 
					
						
							|  |  |  |  * we rewind to a space, so we'll know that tokens won't be broken in half at | 
					
						
							|  |  |  |  * the end of a buffer. All tokens except string-literals of course. | 
					
						
							|  |  |  |  * So we can assume that after the buffer there is either a space, EOF, or a | 
					
						
							|  |  |  |  * chunk of text not containing whitespace at all (for which we care only if its | 
					
						
							|  |  |  |  * a stringliteral)*/ | 
					
						
							|  |  |  | void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct scope* current = state->current; | 
					
						
							|  |  |  | 	YYSTYPE val; | 
					
						
							|  |  |  | 	int yv; | 
					
						
							|  |  |  | 	YY_BUFFER_STATE yyb; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(!state->global) { | 
					
						
							|  |  |  | 		/* this state has either not been initialized,
 | 
					
						
							|  |  |  | 		 * or cli_js_parse_done() was already called on it */ | 
					
						
							| 
									
										
										
										
											2008-10-07 13:35:21 +00:00
										 |  |  | 		cli_warnmsg(MODULE "invalid state\n"); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	yyb = yy_scan_bytes(buf, n, state->scanner); | 
					
						
							|  |  |  | 	memset(&val, 0, sizeof(val)); | 
					
						
							|  |  |  | 	val.vtype = vtype_undefined; | 
					
						
							|  |  |  | 	/* on EOF yylex will return 0 */ | 
					
						
							|  |  |  | 	while( (yv=yylex(&val, state->scanner)) != 0) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		const char *text; | 
					
						
							|  |  |  | 		size_t leng; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		val.type = yv; | 
					
						
							|  |  |  | 		switch(yv) { | 
					
						
							|  |  |  | 			case TOK_VAR: | 
					
						
							|  |  |  | 				current->fsm_state = InsideVar; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_IDENTIFIER_NAME: | 
					
						
							|  |  |  | 				text = yyget_text(state->scanner); | 
					
						
							|  |  |  | 				leng = yyget_leng(state->scanner); | 
					
						
							|  |  |  | 				if(current->last_token == TOK_DOT) { | 
					
						
							|  |  |  | 					/* this is a member name, don't normalize
 | 
					
						
							|  |  |  | 					*/ | 
					
						
							|  |  |  | 					TOKEN_SET(&val, string, cli_strdup(text)); | 
					
						
							|  |  |  | 					val.type = TOK_UNNORM_IDENTIFIER; | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					switch(current->fsm_state) { | 
					
						
							|  |  |  | 						case WaitParameterList: | 
					
						
							|  |  |  | 							state->syntax_errors++; | 
					
						
							|  |  |  | 							/* fall through */ | 
					
						
							|  |  |  | 						case Base: | 
					
						
							|  |  |  | 						case InsideInitializer: | 
					
						
							|  |  |  | 							TOKEN_SET(&val, cstring, scope_use(current, text, leng)); | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						case InsideVar: | 
					
						
							|  |  |  | 						case InsideFunctionDecl: | 
					
						
							|  |  |  | 							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state)); | 
					
						
							|  |  |  | 							current->fsm_state = InsideInitializer; | 
					
						
							|  |  |  | 							current->brackets = 0; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						case WaitFunctionName: | 
					
						
							|  |  |  | 							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state)); | 
					
						
							|  |  |  | 							current->fsm_state = WaitParameterList; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_PAR_OPEN: | 
					
						
							|  |  |  | 				switch(current->fsm_state) { | 
					
						
							|  |  |  | 					case WaitFunctionName: | 
					
						
							|  |  |  | 						/* fallthrough */ | 
					
						
							|  |  |  | 					case WaitParameterList: | 
					
						
							|  |  |  | 						current->fsm_state = InsideFunctionDecl; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					default: | 
					
						
							|  |  |  | 						/* noop */ | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_PAR_CLOSE: | 
					
						
							|  |  |  | 				switch(current->fsm_state) { | 
					
						
							|  |  |  | 					case WaitFunctionName: | 
					
						
							|  |  |  | 						state->syntax_errors++; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					case WaitParameterList: | 
					
						
							|  |  |  | 						current->fsm_state = Base; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					default: | 
					
						
							|  |  |  | 						/* noop */ | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_CURLY_BRACE_OPEN: | 
					
						
							|  |  |  | 				switch(current->fsm_state) { | 
					
						
							|  |  |  | 					case WaitFunctionName: | 
					
						
							|  |  |  | 						/* fallthrough */ | 
					
						
							|  |  |  | 					case WaitParameterList: | 
					
						
							|  |  |  | 					case InsideFunctionDecl: | 
					
						
							|  |  |  | 						/* in a syntactically correct
 | 
					
						
							|  |  |  | 						 * file, we would already be in | 
					
						
							|  |  |  | 						 * the Base state when we see a { | 
					
						
							|  |  |  | 						 */ | 
					
						
							|  |  |  | 						current->fsm_state = Base; | 
					
						
							|  |  |  | 						/* fall-through */ | 
					
						
							|  |  |  | 					case InsideVar: | 
					
						
							|  |  |  | 					case InsideInitializer: | 
					
						
							|  |  |  | 						state->syntax_errors++; | 
					
						
							|  |  |  | 						/* fall-through */ | 
					
						
							|  |  |  | 					case Base: | 
					
						
							|  |  |  | 					default: | 
					
						
							|  |  |  | 						current->blocks++; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 					case TOK_CURLY_BRACE_CLOSE: | 
					
						
							|  |  |  | 				if(current->blocks > 0) | 
					
						
							|  |  |  | 					current->blocks--; | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					state->syntax_errors++; | 
					
						
							|  |  |  | 				if(!current->blocks) { | 
					
						
							|  |  |  | 					if(current->parent) { | 
					
						
							|  |  |  | 						/* add dummy FUNCTION token to
 | 
					
						
							|  |  |  | 						 * mark function end */ | 
					
						
							|  |  |  | 						TOKEN_SET(&val, cstring, "}"); | 
					
						
							|  |  |  | 						add_token(state, &val); | 
					
						
							|  |  |  | 						TOKEN_SET(&val, scope, NULL); | 
					
						
							|  |  |  | 						val.type = TOK_FUNCTION; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						state->current = current = current->parent; | 
					
						
							|  |  |  | 					} else{ | 
					
						
							|  |  |  | 						/* extra } */ | 
					
						
							|  |  |  | 						state->syntax_errors++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_BRACKET_OPEN: | 
					
						
							|  |  |  | 				current->brackets++; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_BRACKET_CLOSE: | 
					
						
							|  |  |  | 				if(current->brackets > 0) | 
					
						
							|  |  |  | 					current->brackets--; | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					state->syntax_errors++; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_COMMA: | 
					
						
							|  |  |  | 				if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) { | 
					
						
							|  |  |  | 					/* initializer ended only if we
 | 
					
						
							|  |  |  | 					 * encountered a comma, and [] are | 
					
						
							|  |  |  | 					 * balanced. | 
					
						
							|  |  |  | 					 * This avoids switching state on: | 
					
						
							|  |  |  | 					 * var x = [4,y,u];*/ | 
					
						
							|  |  |  | 					current->fsm_state = InsideVar; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_SEMICOLON: | 
					
						
							|  |  |  | 				if (current->brackets == 0 && current->blocks == 0) { | 
					
						
							|  |  |  | 					/* avoid switching state on unbalanced []:
 | 
					
						
							|  |  |  | 					 * var x = [test;testi]; */ | 
					
						
							|  |  |  | 					current->fsm_state = Base; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_FUNCTION: | 
					
						
							|  |  |  | 				current = scope_new(state); | 
					
						
							|  |  |  | 				current->fsm_state = WaitFunctionName; | 
					
						
							|  |  |  | 				TOKEN_SET(&val, scope, state->current); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case TOK_StringLiteral: | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 				if(state->tokens.cnt > 0 && state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) { | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 					/* see if can fold */ | 
					
						
							|  |  |  | 					yystype *prev_string = &state->tokens.data[state->tokens.cnt-2]; | 
					
						
							|  |  |  | 					if(prev_string->type == TOK_StringLiteral) { | 
					
						
							|  |  |  | 						char *str = TOKEN_GET(prev_string, string); | 
					
						
							|  |  |  | 						size_t str_len = strlen(str); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						text = yyget_text(state->scanner); | 
					
						
							|  |  |  | 						leng = yyget_leng(state->scanner); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						/* delete TOK_PLUS */ | 
					
						
							|  |  |  | 						free_token(&state->tokens.data[--state->tokens.cnt]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						str = cli_realloc(str, str_len + leng + 1); | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 						strncpy(str+str_len, text, leng); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 						str[str_len + leng] = '\0'; | 
					
						
							|  |  |  | 						TOKEN_SET(prev_string, string, str); | 
					
						
							|  |  |  | 						free(val.val.string); | 
					
						
							|  |  |  | 						memset(&val, 0, sizeof(val)); | 
					
						
							|  |  |  | 						val.vtype = vtype_undefined; | 
					
						
							|  |  |  | 						continue; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if(val.vtype == vtype_undefined) { | 
					
						
							|  |  |  | 			text = yyget_text(state->scanner); | 
					
						
							|  |  |  | 			TOKEN_SET(&val, string, cli_strdup(text)); | 
					
						
							|  |  |  | 			abort(); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		add_token(state, &val); | 
					
						
							|  |  |  | 		current->last_token = yv; | 
					
						
							|  |  |  | 		memset(&val, 0, sizeof(val)); | 
					
						
							|  |  |  | 		val.vtype = vtype_undefined; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	yy_delete_buffer(yyb, state->scanner); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | struct parser_state *cli_js_init(void) | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	struct parser_state *state = cli_calloc(1, sizeof(*state)); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	if(!state) | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 		return NULL; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	if(!scope_new(state)) { | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 		free(state); | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	state->global = state->current; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(yylex_init(&state->scanner)) { | 
					
						
							|  |  |  | 		scope_done(state->global); | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 		free(state); | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	yyset_debug(1, state->scanner); | 
					
						
							| 
									
										
										
										
											2008-07-08 11:33:32 +00:00
										 |  |  | 	cli_dbgmsg(MODULE "cli_js_init() done\n"); | 
					
						
							|  |  |  | 	return state; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*-------------- tokenizer ---------------------*/ | 
					
						
							|  |  |  | enum char_class { | 
					
						
							|  |  |  | 	Whitespace, | 
					
						
							|  |  |  | 	Slash, | 
					
						
							|  |  |  | 	Operator, | 
					
						
							|  |  |  | 	DQuote, | 
					
						
							|  |  |  | 	SQuote, | 
					
						
							|  |  |  | 	Digit, | 
					
						
							|  |  |  | 	IdStart, | 
					
						
							|  |  |  | 	BracketOpen = TOK_BRACKET_OPEN, | 
					
						
							|  |  |  | 	BracketClose = TOK_BRACKET_CLOSE, | 
					
						
							|  |  |  | 	Comma = TOK_COMMA, | 
					
						
							|  |  |  | 	CurlyOpen = TOK_CURLY_BRACE_OPEN, | 
					
						
							|  |  |  | 	CurlyClose = TOK_CURLY_BRACE_CLOSE, | 
					
						
							|  |  |  | 	ParOpen = TOK_PAR_OPEN, | 
					
						
							|  |  |  | 	ParClose = TOK_PAR_CLOSE, | 
					
						
							|  |  |  | 	Dot = TOK_DOT, | 
					
						
							|  |  |  | 	SemiColon = TOK_SEMICOLON, | 
					
						
							|  |  |  | 	Nop | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define SL Slash
 | 
					
						
							|  |  |  | #define DG Digit
 | 
					
						
							|  |  |  | #define DQ DQuote
 | 
					
						
							|  |  |  | #define SQ SQuote
 | 
					
						
							|  |  |  | #define ID IdStart
 | 
					
						
							|  |  |  | #define OP Operator
 | 
					
						
							|  |  |  | #define WS Whitespace
 | 
					
						
							|  |  |  | #define BO BracketOpen
 | 
					
						
							|  |  |  | #define BC BracketClose
 | 
					
						
							|  |  |  | #define CM Comma
 | 
					
						
							|  |  |  | #define CO CurlyOpen
 | 
					
						
							|  |  |  | #define CC CurlyClose
 | 
					
						
							|  |  |  | #define PO ParOpen
 | 
					
						
							|  |  |  | #define PC ParClose
 | 
					
						
							|  |  |  | #define DT Dot
 | 
					
						
							|  |  |  | #define SC SemiColon
 | 
					
						
							|  |  |  | #define NA Nop
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const enum char_class ctype[256] = { | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, WS, WS, WS, NA, WS, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	WS, OP, DQ, NA, ID, OP, OP, SQ, PO, PC, OP, OP, CM, OP, DT, SL, | 
					
						
							|  |  |  | 	DG, DG, DG, DG, DG, DG, DG, DG, DG, DG, OP, SC, OP, OP, OP, OP, | 
					
						
							|  |  |  | 	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, | 
					
						
							|  |  |  | 	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, BO, ID, BC, OP, ID, | 
					
						
							|  |  |  | 	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, | 
					
						
							|  |  |  | 	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, CO, OP, CC, OP, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const enum char_class id_ctype[256] = { | 
					
						
							|  |  |  | 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, ID, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, | 
					
						
							|  |  |  |         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, OP, NA, NA, ID, | 
					
						
							|  |  |  |         NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, | 
					
						
							|  |  |  |         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  |         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define CASE_SPECIAL_CHAR(C, S) case C: TOKEN_SET(lvalp, cstring, (S)); return cClass;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define BUF_KEEP_SIZE 32768
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void textbuf_clean(struct text_buffer *buf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if(buf->capacity > BUF_KEEP_SIZE) { | 
					
						
							|  |  |  | 		buf->data = cli_realloc(buf->data, BUF_KEEP_SIZE); | 
					
						
							|  |  |  | 		buf->capacity = BUF_KEEP_SIZE; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	buf->pos = 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int parseString(YYSTYPE *lvalp, yyscan_t scanner, const char q, | 
					
						
							|  |  |  | 		enum tokenizer_state tostate) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	size_t len; | 
					
						
							|  |  |  | 	/* look for " terminating the string */ | 
					
						
							|  |  |  | 	const char *start = &scanner->in[scanner->pos], *end = start; | 
					
						
							|  |  |  | 	do { | 
					
						
							|  |  |  | 		const size_t siz = &scanner->in[scanner->insize] - end; | 
					
						
							|  |  |  | 		end = memchr(end, q, siz); | 
					
						
							| 
									
										
										
										
											2008-08-12 10:09:15 +00:00
										 |  |  | 		if(end && end > start && end[-1] == '\\') { | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 			++end; | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		break; | 
					
						
							|  |  |  | 	} while (1); | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	if(end && end >= start) | 
					
						
							| 
									
										
										
										
											2008-07-10 10:29:29 +00:00
										 |  |  | 		len = end - start; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		len = scanner->insize - scanner->pos; | 
					
						
							| 
									
										
										
										
											2008-07-08 19:37:18 +00:00
										 |  |  | 	cli_textbuffer_append_normalize(&scanner->buf, start, len); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	if(end) { | 
					
						
							|  |  |  | 		/* skip over end quote */ | 
					
						
							|  |  |  | 		scanner->pos += len + 1; | 
					
						
							|  |  |  | 		textbuffer_putc(&scanner->buf, '\0'); | 
					
						
							|  |  |  | 		TOKEN_SET(lvalp, string, textbuffer_done(scanner)); | 
					
						
							|  |  |  | 		scanner->state = Initial; | 
					
						
							|  |  |  | 		assert(lvalp->val.string); | 
					
						
							|  |  |  | 		return TOK_StringLiteral; | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		scanner->pos += len; | 
					
						
							|  |  |  | 		/* unfinished string */ | 
					
						
							|  |  |  | 		scanner->state = tostate; | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int parseDQString(YYSTYPE *lvalp, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return parseString(lvalp, scanner, '"', DoubleQString); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int parseSQString(YYSTYPE *lvalp, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return parseString(lvalp, scanner, '\'', SingleQString); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int parseNumber(YYSTYPE *lvalp, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const unsigned char *in = (const unsigned char*)scanner->in; | 
					
						
							|  |  |  | 	int is_float = 0; | 
					
						
							|  |  |  | 	while(scanner->pos < scanner->insize) { | 
					
						
							|  |  |  | 		unsigned char c = in[scanner->pos++]; | 
					
						
							|  |  |  | 		if(isdigit(c)) { | 
					
						
							|  |  |  | 			textbuffer_putc(&scanner->buf, c); | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if(c =='.' && !is_float) { | 
					
						
							|  |  |  | 			is_float = 1; | 
					
						
							|  |  |  | 			textbuffer_putc(&scanner->buf, '.'); | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if((c=='e' || c=='E') && is_float) { | 
					
						
							|  |  |  | 			textbuffer_putc(&scanner->buf, c); | 
					
						
							|  |  |  | 			if(scanner->pos < scanner->insize) { | 
					
						
							|  |  |  | 				c = in[scanner->pos++]; | 
					
						
							|  |  |  | 				if(c == '+' || c == '-' || isdigit(c)) { | 
					
						
							|  |  |  | 					textbuffer_putc(&scanner->buf, c); | 
					
						
							|  |  |  | 					continue; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		scanner->pos--; | 
					
						
							|  |  |  | 		textbuffer_putc(&scanner->buf, '\0'); | 
					
						
							|  |  |  | 		scanner->state = Initial; | 
					
						
							|  |  |  | 		if(is_float) { | 
					
						
							|  |  |  | 			TOKEN_SET(lvalp, dval, atof(scanner->buf.data)); | 
					
						
							|  |  |  | 			return TOK_NumericFloat; | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			TOKEN_SET(lvalp, ival, atoi(scanner->buf.data)); | 
					
						
							|  |  |  | 			return TOK_NumericInt; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	scanner->state = Number; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int parseId(YYSTYPE *lvalp, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const struct keyword *kw; | 
					
						
							|  |  |  | 	const unsigned char *in = (const unsigned char*)scanner->in; | 
					
						
							|  |  |  | 	scanner->state = Initial; | 
					
						
							|  |  |  | 	while(scanner->pos < scanner->insize) { | 
					
						
							|  |  |  | 		unsigned char c = in[scanner->pos++]; | 
					
						
							|  |  |  | 		enum char_class cClass = id_ctype[c]; | 
					
						
							|  |  |  | 		switch(cClass) { | 
					
						
							|  |  |  | 			case IdStart: | 
					
						
							|  |  |  | 				textbuffer_putc(&scanner->buf, c); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case Operator: | 
					
						
							|  |  |  | 				/* the table contains OP only for \ */ | 
					
						
							|  |  |  | 				assert(c == '\\'); | 
					
						
							|  |  |  | 				if(scanner->pos < scanner->insize && | 
					
						
							|  |  |  | 						in[scanner->pos++] == 'u') { | 
					
						
							|  |  |  | 					textbuffer_putc(&scanner->buf, c); | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 				if(scanner->pos == scanner->insize) { | 
					
						
							|  |  |  | 					scanner->pos++; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 				/* else fallthrough */ | 
					
						
							|  |  |  | 			default: | 
					
						
							|  |  |  | 				/* character is no longer part of identifier */ | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 				scanner->state = Initial; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 				textbuffer_putc(&scanner->buf, '\0'); | 
					
						
							|  |  |  | 				scanner->pos--; | 
					
						
							|  |  |  | 				kw = in_word_set(scanner->buf.data, scanner->buf.pos-1); | 
					
						
							|  |  |  | 				if(kw) { | 
					
						
							|  |  |  | 					/* we got a keyword */ | 
					
						
							|  |  |  | 					TOKEN_SET(lvalp, cstring, kw->name); | 
					
						
							|  |  |  | 					return kw->val; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				/* it is not a keyword, just an identifier */ | 
					
						
							|  |  |  | 				TOKEN_SET(lvalp, cstring, NULL); | 
					
						
							|  |  |  | 				return TOK_IDENTIFIER_NAME; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	scanner->state = Identifier; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef MIN
 | 
					
						
							|  |  |  | #define MIN(a,b) ((a)<(b) ? (a):(b))
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int parseOperator(YYSTYPE *lvalp, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	size_t len = MIN(5, scanner->insize - scanner->pos); | 
					
						
							|  |  |  | 	while(len) { | 
					
						
							|  |  |  | 		const struct operator *kw = in_op_set(&scanner->in[scanner->pos], len); | 
					
						
							|  |  |  | 		if(kw) { | 
					
						
							|  |  |  | 			TOKEN_SET(lvalp, cstring, kw->name); | 
					
						
							|  |  |  | 			scanner->pos += len; | 
					
						
							|  |  |  | 			return kw->val; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		len--; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 	/* never reached */ | 
					
						
							|  |  |  | 	assert(0); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	scanner->pos++; | 
					
						
							|  |  |  | 	TOKEN_SET(lvalp, cstring, NULL); | 
					
						
							|  |  |  | 	return TOK_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yylex_init(yyscan_t *scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	*scanner = cli_calloc(1, sizeof(**scanner)); | 
					
						
							|  |  |  | 	return *scanner ? 0 : -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yylex_destroy(yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	free(scanner->buf.data); | 
					
						
							|  |  |  | 	free(scanner); | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yy_scan_bytes(const char *p, size_t len, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	scanner->in = p; | 
					
						
							|  |  |  | 	scanner->insize = len; | 
					
						
							|  |  |  | 	scanner->pos = 0; | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 	scanner->lastpos = -1; | 
					
						
							|  |  |  | 	scanner->last_state = Dummy; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void yyset_debug (int debug_flag ,yyscan_t yyscanner ) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void yy_delete_buffer( YY_BUFFER_STATE yyb, yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const char *yyget_text(yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	assert(scanner->buf.data || scanner->yytext); | 
					
						
							|  |  |  | 	return scanner->yytext ? scanner->yytext : scanner->buf.data; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yyget_leng(yyscan_t scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* we have a \0 too */ | 
					
						
							|  |  |  | 	return scanner->yylen ? scanner->yylen : (scanner->buf.pos > 0 ? scanner->buf.pos - 1 : 0); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int yylex(YYSTYPE *lvalp, yyscan_t  scanner) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const size_t len = scanner->insize; | 
					
						
							|  |  |  | 	const unsigned char *in = (const unsigned char*)scanner->in; | 
					
						
							|  |  |  | 	unsigned char lookahead; | 
					
						
							|  |  |  | 	enum char_class cClass; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	scanner->yytext = NULL; | 
					
						
							|  |  |  | 	scanner->yylen = 0; | 
					
						
							| 
									
										
										
										
											2008-08-25 12:39:09 +00:00
										 |  |  | 	if(scanner->pos == scanner->lastpos) { | 
					
						
							|  |  |  | 		if(scanner->last_state == scanner->state) { | 
					
						
							|  |  |  | 			cli_dbgmsg(MODULE "infloop detected, skipping character\n"); | 
					
						
							|  |  |  | 			scanner->pos++; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		/* its not necesarely an infloop if it changed
 | 
					
						
							|  |  |  | 		 * state, and it shouldn't infloop between states */ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	scanner->lastpos = scanner->pos; | 
					
						
							|  |  |  | 	scanner->last_state = scanner->state; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 	while(scanner->pos < scanner->insize) { | 
					
						
							|  |  |  | 		switch(scanner->state) { | 
					
						
							|  |  |  | 			case Initial: | 
					
						
							|  |  |  | 				textbuf_clean(&scanner->buf); | 
					
						
							|  |  |  | 				cClass = ctype[in[scanner->pos++]]; | 
					
						
							|  |  |  | 				switch(cClass) { | 
					
						
							|  |  |  | 					case Whitespace: | 
					
						
							|  |  |  | 						/* eat whitespace */ | 
					
						
							|  |  |  | 						continue; | 
					
						
							|  |  |  | 					case Slash: | 
					
						
							|  |  |  | 						if(scanner->pos < len) { | 
					
						
							|  |  |  | 							lookahead = in[scanner->pos]; | 
					
						
							|  |  |  | 							switch(lookahead) { | 
					
						
							|  |  |  | 								case '*': | 
					
						
							|  |  |  | 									scanner->state = MultilineComment; | 
					
						
							|  |  |  | 									scanner->pos++; | 
					
						
							|  |  |  | 									continue; | 
					
						
							|  |  |  | 								case '/': | 
					
						
							|  |  |  | 									scanner->state = SinglelineComment; | 
					
						
							|  |  |  | 									scanner->pos++; | 
					
						
							|  |  |  | 									continue; | 
					
						
							|  |  |  | 							} | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						--scanner->pos; | 
					
						
							|  |  |  | 						return parseOperator(lvalp, scanner); | 
					
						
							|  |  |  | 					case Operator: | 
					
						
							|  |  |  | 						--scanner->pos; | 
					
						
							|  |  |  | 						return parseOperator(lvalp, scanner); | 
					
						
							|  |  |  | 					case DQuote: | 
					
						
							|  |  |  | 						return parseDQString(lvalp, scanner); | 
					
						
							|  |  |  | 					case SQuote: | 
					
						
							|  |  |  | 						return parseSQString(lvalp, scanner); | 
					
						
							|  |  |  | 					case Digit: | 
					
						
							|  |  |  | 						--scanner->pos; | 
					
						
							|  |  |  | 						return parseNumber(lvalp, scanner); | 
					
						
							|  |  |  | 					case IdStart: | 
					
						
							|  |  |  | 						--scanner->pos; | 
					
						
							|  |  |  | 						return parseId(lvalp,scanner); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(BracketOpen, "["); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(BracketClose, "]"); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(Comma, ","); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(CurlyOpen, "{"); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(CurlyClose, "}"); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(ParOpen, "("); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(ParClose, ")"); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(Dot, "."); | 
					
						
							|  |  |  | 					CASE_SPECIAL_CHAR(SemiColon, ";"); | 
					
						
							|  |  |  | 					case Nop: | 
					
						
							|  |  |  | 					       continue; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case DoubleQString: | 
					
						
							|  |  |  | 				return parseString(lvalp, scanner, '"', DoubleQString); | 
					
						
							|  |  |  | 			case SingleQString: | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 				return parseString(lvalp, scanner, '\'', SingleQString); | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 			case Identifier: | 
					
						
							|  |  |  | 				return parseId(lvalp, scanner); | 
					
						
							|  |  |  | 			case MultilineComment: | 
					
						
							|  |  |  | 				while(scanner->pos+1 < scanner->insize) { | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 					if(in[scanner->pos] == '*' && in[scanner->pos+1] == '/') { | 
					
						
							|  |  |  | 						scanner->state = Initial; | 
					
						
							|  |  |  | 						scanner->pos++; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 						break; | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 					} | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 					scanner->pos++; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2008-07-15 14:31:33 +00:00
										 |  |  | 				scanner->pos++; | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 				break; | 
					
						
							|  |  |  | 			case Number: | 
					
						
							|  |  |  | 				return parseNumber(lvalp, scanner); | 
					
						
							|  |  |  | 			case SinglelineComment: | 
					
						
							|  |  |  | 				while(scanner->pos < scanner->insize) { | 
					
						
							| 
									
										
										
										
											2008-09-23 20:52:22 +00:00
										 |  |  | 					/* htmlnorm converts \n to space, so
 | 
					
						
							|  |  |  | 					 * stop on space too */ | 
					
						
							|  |  |  | 					if(in[scanner->pos] == '\n' || in[scanner->pos] == ' ') | 
					
						
							| 
									
										
										
										
											2008-07-07 19:10:50 +00:00
										 |  |  | 						break; | 
					
						
							|  |  |  | 					scanner->pos++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				scanner->state = Initial; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } |