Fix Token enum.

Change enum to manage Tokens.
This commit is contained in:
mii
2021-12-19 23:22:55 +09:00
parent cb0aa27b4a
commit d8fd6dfadd
7 changed files with 67 additions and 30 deletions

3
Cargo.lock generated
View File

@ -5,6 +5,9 @@ version = 3
[[package]]
name = "catalysa"
version = "0.1.0"
dependencies = [
"catalysa-parser",
]
[[package]]
name = "catalysa-parser"

View File

@ -0,0 +1,5 @@
use crate::token::Token;
pub struct Lexer {
pub tokens: Vec<Token>
}

View File

@ -1,2 +1,3 @@
pub mod tokenizer;
pub mod token;
pub mod token;
pub mod lexer;

View File

@ -1,14 +1,15 @@
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum TokenKind {
RESERVED,
NUMBER,
IDENT,
TEXT,
}
#[derive(Clone, Debug)]
pub struct Token {
pub kind: TokenKind,
pub num: usize,
pub str: String
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Reserved {
text: String
},
Number {
number: usize
},
Ident {
text: String
},
Text {
text: String
},
}

View File

@ -3,21 +3,25 @@ use nom::{IResult, branch::alt, bytes::complete::{tag, take_while, take_while1},
use crate::token::*;
pub struct Tokenizer {
pub code: str
pub code: String
}
impl Tokenizer {
pub fn tokenize(&mut self) -> Vec<Token> {
let mut tokens: Vec<Token> = vec![];
let mut code = &self.code;
let mut code = self.code.as_str();
while code != "" {
{
if Tokenizer::is_whitespace(code.chars().next().unwrap()) {
code = &code[1..];
}
}
match Tokenizer::reserved(code) {
Ok((input, reserved)) => {
code = input;
tokens.push(Token {
kind: TokenKind::RESERVED,
num: 0,
str: reserved.to_string()
tokens.push(Token::Reserved {
text: reserved.to_string()
});
continue;
@ -28,10 +32,8 @@ impl Tokenizer {
match Tokenizer::number(code) {
Ok((input, number)) => {
code = input;
tokens.push(Token {
kind: TokenKind::NUMBER,
num: number,
str: String::default()
tokens.push(Token::Number {
number: number
});
continue;
},
@ -41,10 +43,8 @@ impl Tokenizer {
match Tokenizer::ident(code) {
Ok((input, ident)) => {
code = input;
tokens.push(Token {
kind: TokenKind::IDENT,
num: 0,
str: ident.to_string()
tokens.push(Token::Ident {
text: ident.to_string()
});
continue;
@ -57,6 +57,10 @@ impl Tokenizer {
tokens
}
fn is_whitespace(ch: char) -> bool {
ch == '\t' || ch == '\n' || ch == ' ' || ch == '\r'
}
fn is_ident(ch: char) -> bool {
is_alphabetic(ch as u8)
}
@ -119,6 +123,22 @@ impl Tokenizer {
mod tests {
use super::*;
#[test]
fn tokenize_test() {
let mut tokenizer = Tokenizer {
code: "1 + 2 abc".to_string()
};
assert_eq!(
tokenizer.tokenize(),
vec![
Token::Number { number: 1 },
Token::Reserved { text: "+".to_string() },
Token::Number { number: 2 },
Token::Ident { text: "abc".to_string() }
]
)
}
#[test]
fn reserved_test() {
assert_eq!(Tokenizer::reserved("+"), Ok(("", "+")));
@ -126,4 +146,4 @@ mod tests {
assert_eq!(Tokenizer::reserved("="), Ok(("", "=")));
assert_eq!(Tokenizer::reserved("for"), Ok(("", "for")));
}
}
}

View File

@ -6,3 +6,4 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
catalysa-parser = { path = "../catalysa-parser" }

View File

@ -1,3 +1,9 @@
use catalysa_parser::tokenizer::Tokenizer;
fn main() {
println!("Hello, world!");
let mut tokenizer = Tokenizer {
code: "1 + 2;".to_string()
};
println!("{:?}", tokenizer.tokenize());
}