1use crate::lexer::scanners::LexerCore;
2use crate::lexer::utils::PositionManager;
3use crate::lexer::{LexerError, Token, TokenKind};
4use crate::vm::types::{ColumnNumber, LineNumber};
5
6#[derive(Debug)]
7pub struct Lexer {
8 source: Vec<char>,
9 pos: usize,
10 line: LineNumber,
11 column: ColumnNumber,
12}
13
14impl Lexer {
15 pub fn new(source: &str) -> Self {
16 Self {
17 source: source.chars().collect(),
18 pos: 0,
19 line: LineNumber::new(1),
20 column: ColumnNumber::new(1),
21 }
22 }
23
24 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
25 let mut tokens = Vec::new();
26
27 while self.pos < self.source.len() {
28 let start_line = self.line;
29 let start_col = self.column;
30
31 let token = self.next_token()?;
32
33 if matches!(token.kind, TokenKind::Eof) {
34 tokens.push(token);
35 break;
36 }
37
38 if !matches!(token.kind, TokenKind::Comment(_)) {
40 tokens.push(token);
41 }
42
43 <Self as PositionManager>::update_position(self, start_line, start_col);
44 }
45
46 if tokens.is_empty() || !matches!(tokens.last().unwrap().kind, TokenKind::Eof) {
47 tokens.push(Token::with_positions(
48 TokenKind::Eof,
49 self.line.as_usize(),
50 self.column.as_usize(),
51 self.line.as_usize(),
52 self.column.as_usize(),
53 ));
54 }
55
56 Ok(tokens)
57 }
58
59 pub fn next_token(&mut self) -> Result<Token, LexerError> {
60 <Self as PositionManager>::skip_whitespace(self);
61
62 if self.pos >= self.source.len() {
63 return Ok(Token::with_positions(
64 TokenKind::Eof,
65 self.line.as_usize(),
66 self.column.as_usize(),
67 self.line.as_usize(),
68 self.column.as_usize(),
69 ));
70 }
71
72 let start_line = self.line;
73 let start_col = self.column;
74 let c = self.source[self.pos];
75
76 let token_kind = if c.is_ascii_alphabetic() || c == '_' || c == '$' || !c.is_ascii() {
77 <Self as crate::lexer::scanners::IdentifierReader>::read_identifier_or_keyword(self)?
78 } else if c.is_ascii_digit() {
79 <Self as crate::lexer::scanners::NumberReader>::read_number(self)?
80 } else if c == '"' || c == '\'' {
81 <Self as crate::lexer::scanners::StringReader>::read_string(self)?
82 } else if c == '`' {
83 <Self as crate::lexer::scanners::StringReader>::read_template_string(self)?
84 } else if c == '#' {
85 self.advance(); let mut field_name = String::new();
88
89 while self.pos < self.source.len() {
90 let next_c = self.source[self.pos];
91 if next_c.is_ascii_alphanumeric() || next_c == '_' || next_c == '$' {
92 field_name.push(next_c);
93 self.advance();
94 } else {
95 break;
96 }
97 }
98
99 TokenKind::PrivateField
100 } else if c == '/' {
101 if <Self as PositionManager>::peek_char(self, 1) == Some('/') {
102 <Self as crate::lexer::scanners::CommentReader>::read_line_comment(self)?
103 } else if <Self as PositionManager>::peek_char(self, 1) == Some('*') {
104 <Self as crate::lexer::scanners::CommentReader>::read_block_comment(self)?
105 } else {
106 <Self as crate::lexer::scanners::OperatorReader>::read_operator(self)?
107 }
108 } else {
109 <Self as crate::lexer::scanners::OperatorReader>::read_operator(self)?
110 };
111
112 let end_line = self.line;
113 let end_col = self.column;
114
115 Ok(Token::with_positions(
116 token_kind,
117 start_line.as_usize(),
118 start_col.as_usize(),
119 end_line.as_usize(),
120 end_col.as_usize(),
121 ))
122 }
123}
124
125impl LexerCore for Lexer {
126 fn source(&self) -> &[char] {
127 &self.source
128 }
129
130 fn pos(&self) -> usize {
131 self.pos
132 }
133
134 fn advance_pos(&mut self) {
135 self.advance();
136 }
137}
138
139impl crate::lexer::utils::PositionCore for Lexer {
140 fn source(&self) -> &[char] {
141 &self.source
142 }
143
144 fn pos(&self) -> usize {
145 self.pos
146 }
147
148 fn line(&self) -> LineNumber {
149 self.line
150 }
151
152 fn column(&self) -> ColumnNumber {
153 self.column
154 }
155
156 fn set_pos(&mut self, pos: usize) {
157 self.pos = pos;
158 }
159
160 fn set_line(&mut self, line: LineNumber) {
161 self.line = line;
162 }
163
164 fn set_column(&mut self, column: ColumnNumber) {
165 self.column = column;
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 #[test]
174 fn test_tokenize_numbers() {
175 let mut lexer = Lexer::new("123");
176 let tokens = lexer.tokenize().unwrap();
177 assert_eq!(tokens.len(), 2);
178 assert!(matches!(tokens[0].kind, TokenKind::Number(123.0)));
179 assert!(matches!(tokens[1].kind, TokenKind::Eof));
180 }
181
182 #[test]
183 fn test_tokenize_strings() {
184 let mut lexer = Lexer::new("\"hello\"");
185 let tokens = lexer.tokenize().unwrap();
186 assert_eq!(tokens.len(), 2);
187 assert!(matches!(tokens[0].kind, TokenKind::String(ref s) if s == "hello"));
188 assert!(matches!(tokens[1].kind, TokenKind::Eof));
189 }
190
191 #[test]
192 fn test_tokenize_identifiers() {
193 let mut lexer = Lexer::new("x");
194 let tokens = lexer.tokenize().unwrap();
195 assert_eq!(tokens.len(), 2);
196 assert!(matches!(tokens[0].kind, TokenKind::Identifier(ref s) if s == "x"));
197 assert!(matches!(tokens[1].kind, TokenKind::Eof));
198 }
199
200 #[test]
201 fn test_tokenize_keywords() {
202 let mut lexer = Lexer::new("true false null undefined");
203 let tokens = lexer.tokenize().unwrap();
204 assert_eq!(tokens.len(), 5);
205 assert!(matches!(tokens[0].kind, TokenKind::Boolean(true)));
206 assert!(matches!(tokens[1].kind, TokenKind::Boolean(false)));
207 assert!(matches!(tokens[2].kind, TokenKind::Null));
208 assert!(matches!(tokens[3].kind, TokenKind::Undefined));
209 assert!(matches!(tokens[4].kind, TokenKind::Eof));
210 }
211
212 #[test]
213 fn test_tokenize_operators() {
214 let mut lexer = Lexer::new("+-*/%");
215 let tokens = lexer.tokenize().unwrap();
216 assert_eq!(tokens.len(), 6);
217 assert!(matches!(tokens[0].kind, TokenKind::Plus));
218 assert!(matches!(tokens[1].kind, TokenKind::Minus));
219 assert!(matches!(tokens[2].kind, TokenKind::Star));
220 assert!(matches!(tokens[3].kind, TokenKind::Slash));
221 assert!(matches!(tokens[4].kind, TokenKind::Percent));
222 assert!(matches!(tokens[5].kind, TokenKind::Eof));
223 }
224
225 #[test]
226 fn test_tokenize_whitespace() {
227 let mut lexer = Lexer::new(" \n \t 123 ");
228 let tokens = lexer.tokenize().unwrap();
229 assert_eq!(tokens.len(), 2);
230 assert!(matches!(tokens[0].kind, TokenKind::Number(123.0)));
231 assert!(matches!(tokens[1].kind, TokenKind::Eof));
232 }
233
234 #[test]
235 fn test_tokenize_only_whitespace() {
236 let mut lexer = Lexer::new(" \n \t ");
237 let tokens = lexer.tokenize().unwrap();
238 assert_eq!(tokens.len(), 1);
239 assert!(matches!(tokens[0].kind, TokenKind::Eof));
240 }
241
242 #[test]
243 fn test_tokenize_multiple_tokens() {
244 let mut lexer = Lexer::new("let x = 42");
245 let tokens = lexer.tokenize().unwrap();
246 assert_eq!(tokens.len(), 5);
247 assert!(matches!(tokens[0].kind, TokenKind::Keyword(ref s) if s == "let"));
248 assert!(matches!(tokens[1].kind, TokenKind::Identifier(ref s) if s == "x"));
249 assert!(matches!(tokens[2].kind, TokenKind::Assign));
250 assert!(matches!(tokens[3].kind, TokenKind::Number(42.0)));
251 assert!(matches!(tokens[4].kind, TokenKind::Eof));
252 }
253
254 #[test]
255 fn test_unterminated_string() {
256 let mut lexer = Lexer::new("\"hello");
257 let result = lexer.tokenize();
258 assert!(result.is_err());
259 assert!(matches!(
260 result.unwrap_err(),
261 LexerError::UnterminatedString
262 ));
263 }
264
265 #[test]
266 fn test_invalid_number() {
267 let mut lexer = Lexer::new("123.456.789");
268 let result = lexer.tokenize();
269 assert!(result.is_err());
270 assert!(
271 matches!(result.unwrap_err(), LexerError::InvalidNumber(ref s) if s == "123.456.789")
272 );
273 }
274}