Skip to content

Commit

Permalink
Add (word) command
Browse files Browse the repository at this point in the history
  • Loading branch information
Rexagon committed Aug 3, 2023
1 parent a8109a8 commit 9ddbd22
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 1 deletion.
104 changes: 103 additions & 1 deletion src/core/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use anyhow::Result;
use anyhow::{Context, Result};

use super::env::SourceBlock;
use crate::error::UnexpectedEof;
Expand Down Expand Up @@ -56,6 +56,14 @@ impl Lexer {
}
}

pub fn scan_classify(&mut self, delims: &str, space_class: u8) -> Result<Token<'_>> {
let Some(input) = self.blocks.last_mut() else {
return Ok(Token { data: "" });
};
let classifier = AsciiCharClassifier::with_delims(delims, space_class)?;
input.scan_classify(&classifier)
}

pub fn scan_until<P: Delimiter>(&mut self, p: P) -> Result<Token<'_>> {
if let Some(token) = self.use_last_block()?.scan_until(p)? {
Ok(token)
Expand Down Expand Up @@ -230,6 +238,41 @@ impl SourceBlockState {
})
}

fn scan_classify(&mut self, classifier: &AsciiCharClassifier) -> Result<Token<'_>> {
self.prev_line_offset = self.line_offset;

if (self.line.is_empty() || self.line_offset >= self.line.len()) && !self.read_line()? {
return Ok(Token { data: "" });
}

self.skip_whitespace()?;

let start = self.line_offset;

let mut skip = false;
let mut empty = true;
self.skip_until(|c| {
let class = classifier.classify(c);
if class & 0b01 != 0 && !empty {
return true;
} else if class & 0b10 != 0 {
skip = true;
return true;
}

empty = false;
false
});

if skip {
self.skip_symbol();
}

Ok(Token {
data: &self.line[start..self.line_offset],
})
}

fn rewind(&mut self, offset: usize) {
self.line_offset -= offset;
}
Expand Down Expand Up @@ -288,3 +331,62 @@ impl SourceBlockState {
Ok(n > 0)
}
}

struct AsciiCharClassifier {
/// A native representation of `[u2; 256]`
data: [u8; 64],
}

impl AsciiCharClassifier {
fn with_delims(delims: &str, space_class: u8) -> Result<Self> {
anyhow::ensure!(
delims.is_ascii(),
"Non-ascii symbols are not supported by character classifier"
);

let mut data = [0u8; 64];
let mut set_char_class = |c: u8, mut class: u8| {
// Ensure that class is in range 0..=3
class &= 0b11;

let offset = (c & 0b11) * 2;

// Each byte stores classes (0..=3) for 4 characters.
// 0: 00 00 00 11
// 1: 00 00 11 00
// 2: 00 11 00 00
// 3: 11 00 00 00
let mask = 0b11 << offset;
class <<= offset;

// Find a byte for the character
let p = &mut data[(c >> 2) as usize];
// Set character class whithin this byte
*p = (*p & !mask) | class;
};

set_char_class(b' ', space_class);
set_char_class(b'\t', space_class);

let mut class = 0b11u8;
for &c in delims.as_bytes() {
if c == b' ' {
class = class.checked_sub(1).context("Too many classes")?;
} else {
set_char_class(c, class);
}
}

Ok(Self { data })
}

fn classify(&self, c: char) -> u8 {
if c.is_ascii() {
let c = c as u8;
let offset = (c & 0b11) * 2;
(self.data[(c >> 2) as usize] >> offset) & 0b11
} else {
0
}
}
}
21 changes: 21 additions & 0 deletions src/modules/control.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,27 @@ impl Control {
ctx.stack.push(token.data.to_owned())
}

#[cmd(name = "(word)")]
fn interpret_word_ext(ctx: &mut Context) -> Result<()> {
const MODE_SKIP_SPACE_EOL: u8 = 0b100;
const MODE_SKIP_SPACE: u8 = 0b1000;

let mode = ctx.stack.pop_smallint_range(0, 11)? as u8;
let delims = ctx.stack.pop_string()?;

// TODO: these flags might be ignored?
if mode & MODE_SKIP_SPACE != 0 {
if mode & MODE_SKIP_SPACE_EOL != 0 {
ctx.input.scan_skip_whitespace()?;
} else {
ctx.input.skip_line_whitespace();
}
}

let word = ctx.input.scan_classify(&delims, mode & 0b11)?;
ctx.stack.push(word.data.to_owned())
}

#[cmd(name = "skipspc")]
fn interpret_skipspc(ctx: &mut Context) -> Result<()> {
ctx.input.scan_skip_whitespace()
Expand Down
1 change: 1 addition & 0 deletions src/modules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ impl FiftModule for BaseModule {
#[cmd(name = "integer?", stack, args(ty = StackValueType::Int))]
#[cmd(name = "string?", stack, args(ty = StackValueType::String))]
#[cmd(name = "tuple?", stack, args(ty = StackValueType::Tuple))]
#[cmd(name = "box?", stack, args(ty = StackValueType::SharedBox))]
#[cmd(name = "atom?", stack, args(ty = StackValueType::Atom))]
fn interpret_is_type(stack: &mut Stack, ty: StackValueType) -> Result<()> {
let is_ty = stack.pop()?.ty() == ty;
Expand Down

0 comments on commit 9ddbd22

Please sign in to comment.