jetcrab\lexer/
core.rs

1use crate::lexer::scanners::LexerCore;
2use crate::lexer::utils::PositionManager;
3use crate::lexer::{LexerError, Token, TokenKind};
4use crate::vm::types::{ColumnNumber, LineNumber};
5
6#[derive(Debug)]
7pub struct Lexer {
8    source: Vec<char>,
9    pos: usize,
10    line: LineNumber,
11    column: ColumnNumber,
12}
13
14impl Lexer {
15    pub fn new(source: &str) -> Self {
16        Self {
17            source: source.chars().collect(),
18            pos: 0,
19            line: LineNumber::new(1),
20            column: ColumnNumber::new(1),
21        }
22    }
23
24    pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
25        let mut tokens = Vec::new();
26
27        while self.pos < self.source.len() {
28            let start_line = self.line;
29            let start_col = self.column;
30
31            let token = self.next_token()?;
32
33            if matches!(token.kind, TokenKind::Eof) {
34                tokens.push(token);
35                break;
36            }
37
38            // Skip comment tokens - they should not be included in the output
39            if !matches!(token.kind, TokenKind::Comment(_)) {
40                tokens.push(token);
41            }
42
43            <Self as PositionManager>::update_position(self, start_line, start_col);
44        }
45
46        if tokens.is_empty() || !matches!(tokens.last().unwrap().kind, TokenKind::Eof) {
47            tokens.push(Token::with_positions(
48                TokenKind::Eof,
49                self.line.as_usize(),
50                self.column.as_usize(),
51                self.line.as_usize(),
52                self.column.as_usize(),
53            ));
54        }
55
56        Ok(tokens)
57    }
58
59    pub fn next_token(&mut self) -> Result<Token, LexerError> {
60        <Self as PositionManager>::skip_whitespace(self);
61
62        if self.pos >= self.source.len() {
63            return Ok(Token::with_positions(
64                TokenKind::Eof,
65                self.line.as_usize(),
66                self.column.as_usize(),
67                self.line.as_usize(),
68                self.column.as_usize(),
69            ));
70        }
71
72        let start_line = self.line;
73        let start_col = self.column;
74        let c = self.source[self.pos];
75
76        let token_kind = if c.is_ascii_alphabetic() || c == '_' || c == '$' || !c.is_ascii() {
77            <Self as crate::lexer::scanners::IdentifierReader>::read_identifier_or_keyword(self)?
78        } else if c.is_ascii_digit() {
79            <Self as crate::lexer::scanners::NumberReader>::read_number(self)?
80        } else if c == '"' || c == '\'' {
81            <Self as crate::lexer::scanners::StringReader>::read_string(self)?
82        } else if c == '`' {
83            <Self as crate::lexer::scanners::StringReader>::read_template_string(self)?
84        } else if c == '#' {
85            // Handle private fields
86            self.advance(); // consume '#'
87            let mut field_name = String::new();
88
89            while self.pos < self.source.len() {
90                let next_c = self.source[self.pos];
91                if next_c.is_ascii_alphanumeric() || next_c == '_' || next_c == '$' {
92                    field_name.push(next_c);
93                    self.advance();
94                } else {
95                    break;
96                }
97            }
98
99            TokenKind::PrivateField
100        } else if c == '/' {
101            if <Self as PositionManager>::peek_char(self, 1) == Some('/') {
102                <Self as crate::lexer::scanners::CommentReader>::read_line_comment(self)?
103            } else if <Self as PositionManager>::peek_char(self, 1) == Some('*') {
104                <Self as crate::lexer::scanners::CommentReader>::read_block_comment(self)?
105            } else {
106                <Self as crate::lexer::scanners::OperatorReader>::read_operator(self)?
107            }
108        } else {
109            <Self as crate::lexer::scanners::OperatorReader>::read_operator(self)?
110        };
111
112        let end_line = self.line;
113        let end_col = self.column;
114
115        Ok(Token::with_positions(
116            token_kind,
117            start_line.as_usize(),
118            start_col.as_usize(),
119            end_line.as_usize(),
120            end_col.as_usize(),
121        ))
122    }
123}
124
125impl LexerCore for Lexer {
126    fn source(&self) -> &[char] {
127        &self.source
128    }
129
130    fn pos(&self) -> usize {
131        self.pos
132    }
133
134    fn advance_pos(&mut self) {
135        self.advance();
136    }
137}
138
139impl crate::lexer::utils::PositionCore for Lexer {
140    fn source(&self) -> &[char] {
141        &self.source
142    }
143
144    fn pos(&self) -> usize {
145        self.pos
146    }
147
148    fn line(&self) -> LineNumber {
149        self.line
150    }
151
152    fn column(&self) -> ColumnNumber {
153        self.column
154    }
155
156    fn set_pos(&mut self, pos: usize) {
157        self.pos = pos;
158    }
159
160    fn set_line(&mut self, line: LineNumber) {
161        self.line = line;
162    }
163
164    fn set_column(&mut self, column: ColumnNumber) {
165        self.column = column;
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_tokenize_numbers() {
175        let mut lexer = Lexer::new("123");
176        let tokens = lexer.tokenize().unwrap();
177        assert_eq!(tokens.len(), 2);
178        assert!(matches!(tokens[0].kind, TokenKind::Number(123.0)));
179        assert!(matches!(tokens[1].kind, TokenKind::Eof));
180    }
181
182    #[test]
183    fn test_tokenize_strings() {
184        let mut lexer = Lexer::new("\"hello\"");
185        let tokens = lexer.tokenize().unwrap();
186        assert_eq!(tokens.len(), 2);
187        assert!(matches!(tokens[0].kind, TokenKind::String(ref s) if s == "hello"));
188        assert!(matches!(tokens[1].kind, TokenKind::Eof));
189    }
190
191    #[test]
192    fn test_tokenize_identifiers() {
193        let mut lexer = Lexer::new("x");
194        let tokens = lexer.tokenize().unwrap();
195        assert_eq!(tokens.len(), 2);
196        assert!(matches!(tokens[0].kind, TokenKind::Identifier(ref s) if s == "x"));
197        assert!(matches!(tokens[1].kind, TokenKind::Eof));
198    }
199
200    #[test]
201    fn test_tokenize_keywords() {
202        let mut lexer = Lexer::new("true false null undefined");
203        let tokens = lexer.tokenize().unwrap();
204        assert_eq!(tokens.len(), 5);
205        assert!(matches!(tokens[0].kind, TokenKind::Boolean(true)));
206        assert!(matches!(tokens[1].kind, TokenKind::Boolean(false)));
207        assert!(matches!(tokens[2].kind, TokenKind::Null));
208        assert!(matches!(tokens[3].kind, TokenKind::Undefined));
209        assert!(matches!(tokens[4].kind, TokenKind::Eof));
210    }
211
212    #[test]
213    fn test_tokenize_operators() {
214        let mut lexer = Lexer::new("+-*/%");
215        let tokens = lexer.tokenize().unwrap();
216        assert_eq!(tokens.len(), 6);
217        assert!(matches!(tokens[0].kind, TokenKind::Plus));
218        assert!(matches!(tokens[1].kind, TokenKind::Minus));
219        assert!(matches!(tokens[2].kind, TokenKind::Star));
220        assert!(matches!(tokens[3].kind, TokenKind::Slash));
221        assert!(matches!(tokens[4].kind, TokenKind::Percent));
222        assert!(matches!(tokens[5].kind, TokenKind::Eof));
223    }
224
225    #[test]
226    fn test_tokenize_whitespace() {
227        let mut lexer = Lexer::new("  \n  \t  123  ");
228        let tokens = lexer.tokenize().unwrap();
229        assert_eq!(tokens.len(), 2);
230        assert!(matches!(tokens[0].kind, TokenKind::Number(123.0)));
231        assert!(matches!(tokens[1].kind, TokenKind::Eof));
232    }
233
234    #[test]
235    fn test_tokenize_only_whitespace() {
236        let mut lexer = Lexer::new("  \n  \t  ");
237        let tokens = lexer.tokenize().unwrap();
238        assert_eq!(tokens.len(), 1);
239        assert!(matches!(tokens[0].kind, TokenKind::Eof));
240    }
241
242    #[test]
243    fn test_tokenize_multiple_tokens() {
244        let mut lexer = Lexer::new("let x = 42");
245        let tokens = lexer.tokenize().unwrap();
246        assert_eq!(tokens.len(), 5);
247        assert!(matches!(tokens[0].kind, TokenKind::Keyword(ref s) if s == "let"));
248        assert!(matches!(tokens[1].kind, TokenKind::Identifier(ref s) if s == "x"));
249        assert!(matches!(tokens[2].kind, TokenKind::Assign));
250        assert!(matches!(tokens[3].kind, TokenKind::Number(42.0)));
251        assert!(matches!(tokens[4].kind, TokenKind::Eof));
252    }
253
254    #[test]
255    fn test_unterminated_string() {
256        let mut lexer = Lexer::new("\"hello");
257        let result = lexer.tokenize();
258        assert!(result.is_err());
259        assert!(matches!(
260            result.unwrap_err(),
261            LexerError::UnterminatedString
262        ));
263    }
264
265    #[test]
266    fn test_invalid_number() {
267        let mut lexer = Lexer::new("123.456.789");
268        let result = lexer.tokenize();
269        assert!(result.is_err());
270        assert!(
271            matches!(result.unwrap_err(), LexerError::InvalidNumber(ref s) if s == "123.456.789")
272        );
273    }
274}