| 
									
										
										
										
											2015-05-04 11:04:17 -06:00
										 |  |  | package parse | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bufio" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"unicode" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type ( | 
					
						
							|  |  |  | 	// lexer is a utility which can get values, token by | 
					
						
							|  |  |  | 	// token, from a Reader. A token is a word, and tokens | 
					
						
							|  |  |  | 	// are separated by whitespace. A word can be enclosed | 
					
						
							|  |  |  | 	// in quotes if it contains whitespace. | 
					
						
							|  |  |  | 	lexer struct { | 
					
						
							|  |  |  | 		reader *bufio.Reader | 
					
						
							|  |  |  | 		token  token | 
					
						
							|  |  |  | 		line   int | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// token represents a single parsable unit. | 
					
						
							|  |  |  | 	token struct { | 
					
						
							|  |  |  | 		line int | 
					
						
							|  |  |  | 		text string | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // load prepares the lexer to scan an input for tokens. | 
					
						
							|  |  |  | func (l *lexer) load(input io.Reader) error { | 
					
						
							|  |  |  | 	l.reader = bufio.NewReader(input) | 
					
						
							|  |  |  | 	l.line = 1 | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // next loads the next token into the lexer. | 
					
						
							|  |  |  | // A token is delimited by whitespace, unless | 
					
						
							|  |  |  | // the token starts with a quotes character (") | 
					
						
							|  |  |  | // in which case the token goes until the closing | 
					
						
							|  |  |  | // quotes (the enclosing quotes are not included). | 
					
						
							| 
									
										
										
										
											2015-05-08 10:32:57 -06:00
										 |  |  | // Inside quoted strings, quotes may be escaped | 
					
						
							|  |  |  | // with a preceding \ character. No other chars | 
					
						
							|  |  |  | // may be escaped. The rest of the line is skipped | 
					
						
							|  |  |  | // if a "#" character is read in. Returns true if | 
					
						
							|  |  |  | // a token was loaded; false otherwise. | 
					
						
							| 
									
										
										
										
											2015-05-04 11:04:17 -06:00
										 |  |  | func (l *lexer) next() bool { | 
					
						
							|  |  |  | 	var val []rune | 
					
						
							|  |  |  | 	var comment, quoted, escaped bool | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	makeToken := func() bool { | 
					
						
							|  |  |  | 		l.token.text = string(val) | 
					
						
							|  |  |  | 		return true | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for { | 
					
						
							|  |  |  | 		ch, _, err := l.reader.ReadRune() | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			if len(val) > 0 { | 
					
						
							|  |  |  | 				return makeToken() | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if err == io.EOF { | 
					
						
							|  |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-05-24 22:52:34 -04:00
										 |  |  | 			panic(err) | 
					
						
							| 
									
										
										
										
											2015-05-04 11:04:17 -06:00
										 |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if quoted { | 
					
						
							|  |  |  | 			if !escaped { | 
					
						
							|  |  |  | 				if ch == '\\' { | 
					
						
							|  |  |  | 					escaped = true | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 				} else if ch == '"' { | 
					
						
							|  |  |  | 					quoted = false | 
					
						
							|  |  |  | 					return makeToken() | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if ch == '\n' { | 
					
						
							|  |  |  | 				l.line++ | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-05-08 10:32:57 -06:00
										 |  |  | 			if escaped { | 
					
						
							|  |  |  | 				// only escape quotes | 
					
						
							|  |  |  | 				if ch != '"' { | 
					
						
							|  |  |  | 					val = append(val, '\\') | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-05-04 11:04:17 -06:00
										 |  |  | 			val = append(val, ch) | 
					
						
							|  |  |  | 			escaped = false | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if unicode.IsSpace(ch) { | 
					
						
							|  |  |  | 			if ch == '\r' { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if ch == '\n' { | 
					
						
							|  |  |  | 				l.line++ | 
					
						
							|  |  |  | 				comment = false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if len(val) > 0 { | 
					
						
							|  |  |  | 				return makeToken() | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if ch == '#' { | 
					
						
							|  |  |  | 			comment = true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if comment { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if len(val) == 0 { | 
					
						
							|  |  |  | 			l.token = token{line: l.line} | 
					
						
							|  |  |  | 			if ch == '"' { | 
					
						
							|  |  |  | 				quoted = true | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		val = append(val, ch) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |