mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 01481f2dc1
			
		
	
	
		01481f2dc1
		
			
		
	
	
	
	
		
			
			* The lexer, which include the actual lexeme producing logic, goes into the `lexer` directory. * The wrappers, one wrapper per input mode (file, string, utf-8, and readline), go into the `tokenizer` directory and include logic for creating a lexer instance and managing the buffer for different modes. --------- Co-authored-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
		
			
				
	
	
		
			55 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			55 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include "Python.h"
 | |
| #include "errcode.h"
 | |
| 
 | |
| #include "helpers.h"
 | |
| #include "../lexer/state.h"
 | |
| 
 | |
| static int
 | |
| tok_underflow_string(struct tok_state *tok) {
 | |
|     char *end = strchr(tok->inp, '\n');
 | |
|     if (end != NULL) {
 | |
|         end++;
 | |
|     }
 | |
|     else {
 | |
|         end = strchr(tok->inp, '\0');
 | |
|         if (end == tok->inp) {
 | |
|             tok->done = E_EOF;
 | |
|             return 0;
 | |
|         }
 | |
|     }
 | |
|     if (tok->start == NULL) {
 | |
|         tok->buf = tok->cur;
 | |
|     }
 | |
|     tok->line_start = tok->cur;
 | |
|     ADVANCE_LINENO();
 | |
|     tok->inp = end;
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| /* Set up tokenizer for UTF-8 string */
 | |
| struct tok_state *
 | |
| _PyTokenizer_FromUTF8(const char *str, int exec_input, int preserve_crlf)
 | |
| {
 | |
|     struct tok_state *tok = _PyTokenizer_tok_new();
 | |
|     char *translated;
 | |
|     if (tok == NULL)
 | |
|         return NULL;
 | |
|     tok->input = translated = _PyTokenizer_translate_newlines(str, exec_input, preserve_crlf, tok);
 | |
|     if (translated == NULL) {
 | |
|         _PyTokenizer_Free(tok);
 | |
|         return NULL;
 | |
|     }
 | |
|     tok->decoding_state = STATE_NORMAL;
 | |
|     tok->enc = NULL;
 | |
|     tok->str = translated;
 | |
|     tok->encoding = _PyTokenizer_new_string("utf-8", 5, tok);
 | |
|     if (!tok->encoding) {
 | |
|         _PyTokenizer_Free(tok);
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     tok->buf = tok->cur = tok->inp = translated;
 | |
|     tok->end = translated;
 | |
|     tok->underflow = &tok_underflow_string;
 | |
|     return tok;
 | |
| }
 |