initial commit

main
idylls 2 years ago
commit e58a701b40
Signed by: idylls
GPG Key ID: 8A7167CBC2CC9F0F

2
.gitignore vendored

@ -0,0 +1,2 @@
/target
/Cargo.lock

@ -0,0 +1,8 @@
[package]
name = "mup"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

@ -0,0 +1,9 @@
unstable_features = true
hard_tabs = true
max_width = 80
imports_granularity = 'Crate'
group_imports = 'StdExternalCrate'
format_strings = true
wrap_comments = true
blank_lines_lower_bound = 0
blank_lines_upper_bound = 2

@ -0,0 +1 @@
pub mod parse;

@ -0,0 +1,396 @@
#[derive(Debug, Copy, Clone)]
struct InvalidCharBoundary;
trait StrExt {
fn char_at_byte_offset<T>(
&self,
offset: T,
) -> core::result::Result<Option<char>, InvalidCharBoundary>
where
T: Into<usize>;
}
impl StrExt for str {
fn char_at_byte_offset<T>(
&self,
offset: T,
) -> core::result::Result<Option<char>, InvalidCharBoundary>
where
T: Into<usize>,
{
self.get(offset.into()..)
.ok_or(InvalidCharBoundary)
.map(|s| s.chars().next())
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct ByteOffset(pub usize);
impl From<usize> for ByteOffset {
fn from(bo: usize) -> Self {
Self(bo)
}
}
impl From<ByteOffset> for usize {
fn from(bo: ByteOffset) -> Self {
bo.0
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Span {
start: ByteOffset,
/// exclusive
end: ByteOffset,
}
fn span<A, B>(start: A, end: B) -> Span
where
A: Into<ByteOffset>,
B: Into<ByteOffset>,
{
Span {
start: start.into(),
end: end.into(),
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Spanned<T> {
span: Span,
value: T,
}
fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
where
A: Into<ByteOffset>,
B: Into<ByteOffset>,
{
let span = span(start, end);
Spanned { value, span }
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Error {}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Header<S> {
level: u8,
content: S,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Inline<S> {
Text(S),
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Paragraph<S> {
pieces: Vec<Spanned<Inline<S>>>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Block<S> {
Header(Header<S>),
Paragraph(Paragraph<S>),
}
pub type Result<T> = core::result::Result<T, Spanned<Error>>;
struct State<'a> {
corpus: &'a str,
current_offset: ByteOffset,
}
impl<'a> State<'a> {
fn is_eof(&self) -> bool {
self.current_offset.0 >= self.corpus.len()
}
fn peek(&self) -> Option<(char, ByteOffset)> {
if self.is_eof() {
return None;
}
Some((
self.corpus
.char_at_byte_offset(self.current_offset)
.unwrap()
.unwrap(),
self.current_offset,
))
}
fn next(&mut self) -> Option<(char, ByteOffset)> {
if self.is_eof() {
return None;
}
let out = Some((
self.corpus
.char_at_byte_offset(self.current_offset)
.unwrap()
.unwrap(),
self.current_offset,
));
self.current_offset.0 += 1;
out
}
fn forward(&mut self) {
self.next();
}
fn skip_whitespace(&mut self) {
while let Some((ch, _)) = self.peek() {
if !ch.is_whitespace() {
break;
}
self.forward();
}
}
}
fn header<'a, S>(state: &mut State<'a>) -> Result<Header<S>>
where
S: From<&'a str>,
{
let start = state.current_offset;
let mut level = 1;
state.forward();
while let Some((ch, _)) = state.peek() {
match ch {
'#' => {
level += 1;
state.forward();
}
_ => break,
};
}
state.skip_whitespace();
let content_start = state.current_offset;
let mut content_end = None;
while let Some((ch, bo)) = state.next() {
if ch == '\n' {
content_end = Some(bo);
break;
}
}
let end = state.current_offset;
let content_end = content_end.unwrap_or(end);
Ok(Header {
level,
content: state.corpus[content_start.0..content_end.0].into(),
})
}
fn inline_text<'a, S>(state: &mut State<'a>) -> Result<(S, bool)>
where
S: From<&'a str>,
{
let start = state.current_offset;
let mut end: Option<ByteOffset> = None;
while let Some((ch, bo)) = state.next() {
match ch {
'\n' => match end {
Some(end) => {
return Ok((state.corpus[start.0..end.0].into(), true))
}
None => end = Some(bo),
},
_ => {
end = None;
}
}
}
Ok((
state.corpus
[start.0..end.map(|e| e.0).unwrap_or_else(|| state.corpus.len())]
.into(),
true,
))
}
fn inline<'a, S>(state: &mut State<'a>) -> Result<(Inline<S>, bool)>
where
S: From<&'a str>,
{
match state.peek() {
_ => {
let (inline, done) = inline_text(state)?;
Ok((Inline::Text(inline), done))
}
}
}
fn paragraph<'a, S>(state: &mut State<'a>) -> Result<Paragraph<S>>
where
S: From<&'a str>,
{
let mut pieces = Vec::new();
loop {
let start = state.current_offset;
let (piece, done) = inline::<S>(state)?;
let end = state.current_offset;
pieces.push(spanned(start, end, piece));
if done {
break;
};
}
Ok(Paragraph { pieces })
}
fn blocks<'a, S>(state: &mut State<'a>) -> Result<Vec<Spanned<Block<S>>>>
where
S: From<&'a str>,
{
let mut out = Vec::new();
while let Some((ch, _)) = state.peek() {
state.skip_whitespace();
let start = state.current_offset;
let block = match ch {
'#' => Block::Header(header(state)?),
ch if ch.is_whitespace() => {
state.forward();
break;
}
_ => Block::Paragraph(paragraph(state)?),
};
let end = state.current_offset;
out.push(spanned(start, end, block));
}
Ok(out)
}
pub fn parse<'a, S>(corpus: &'a str) -> Result<Vec<Spanned<Block<S>>>>
where
S: From<&'a str>,
{
let mut state = State {
corpus,
current_offset: 0.into(),
};
blocks(&mut state)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn header_1() {
let corpus = "# Header";
let output = parse(corpus).unwrap();
assert_eq!(
&output,
&[spanned(
0,
corpus.len(),
Block::Header(Header {
level: 1,
content: "Header",
})
)]
)
}
#[test]
fn header_2() {
let corpus = "# Header\n## Header 2\n";
let output = parse(corpus).unwrap();
assert_eq!(
&output,
&[
spanned(
0,
9,
Block::Header(Header {
level: 1,
content: "Header",
})
),
spanned(
9,
25,
Block::Header(Header {
level: 2,
content: "Header 2",
})
),
]
)
}
#[test]
fn paragraph_1() {
let corpus = "Hello, world";
let output = parse(corpus).unwrap();
assert_eq!(
&output,
&[spanned(
0,
corpus.len(),
Block::Paragraph(Paragraph {
pieces: vec![spanned(
0,
corpus.len(),
Inline::Text("Hello, world")
)]
})
)],
)
}
#[test]
fn paragraph_2() {
let corpus = "Hello, world\n\nGoodbye, world\n";
let output = parse(corpus).unwrap();
assert_eq!(
&output,
&[
spanned(
0,
14,
Block::Paragraph(Paragraph {
pieces: vec![spanned(
0,
14,
Inline::Text("Hello, world")
)]
})
),
spanned(
14,
corpus.len(),
Block::Paragraph(Paragraph {
pieces: vec![spanned(
14,
corpus.len(),
Inline::Text("Goodbye, world")
)]
})
)
],
)
}
}
Loading…
Cancel
Save