Add strongly-typed extensions to parse functionality

This allows the user to supply custom handling to the extension element
parsing
main
idylls 2 years ago
parent 366fd53a67
commit cb91b7b9d6
Signed by: idylls
GPG Key ID: 8A7167CBC2CC9F0F

@ -1,6 +1,6 @@
[package] [package]
name = "mup" name = "mup"
version = "0.1.0" version = "0.0.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

@ -1,3 +1,46 @@
pub trait ParseExtensions<'a> {
type Output;
type Error;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error>;
}
impl<'a, T, E, F> ParseExtensions<'a> for F
where
F: Fn(&'a str, Span) -> core::result::Result<T, E>,
{
type Output = T;
type Error = E;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error> {
self(content, span)
}
}
// impl<'a, T, E> ParseExtension<'a>
// for &fn(&'a str, Span) -> core::result::Result<T, E>
// {
// type Output = T;
// type Error = E;
// fn parse(
// &self,
// content: &'a str,
// span: Span,
// ) -> core::result::Result<Self::Output, Self::Error> {
// self(content, span)
// }
// }
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
struct InvalidCharBoundary; struct InvalidCharBoundary;
trait StrExt { trait StrExt {
@ -38,11 +81,16 @@ impl From<ByteOffset> for usize {
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Span { pub struct Span {
start: ByteOffset, pub start: ByteOffset,
/// exclusive /// exclusive
end: ByteOffset, pub end: ByteOffset,
} }
fn span<A, B>(start: A, end: B) -> Span impl Span {
pub fn enclose<T>(self, t: T) -> Spanned<T> {
spanned(self.start, self.end, t)
}
}
pub fn span<A, B>(start: A, end: B) -> Span
where where
A: Into<ByteOffset>, A: Into<ByteOffset>,
B: Into<ByteOffset>, B: Into<ByteOffset>,
@ -55,11 +103,11 @@ where
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Spanned<T> { pub struct Spanned<T> {
span: Span, pub span: Span,
value: T, pub value: T,
} }
fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T> pub fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
where where
A: Into<ByteOffset>, A: Into<ByteOffset>,
B: Into<ByteOffset>, B: Into<ByteOffset>,
@ -70,14 +118,15 @@ where
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Error { pub enum Error<ExtError> {
UnclosedExtension, UnclosedExtension,
Extension(ExtError),
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Header<S> { pub struct Header<S> {
level: u8, pub level: u8,
content: S, pub content: S,
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -87,23 +136,24 @@ pub struct Text<S>(pub S);
pub struct Extension<S>(pub S); pub struct Extension<S>(pub S);
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ParagraphPiece<S> { pub enum ParagraphPiece<StrRepr, ExtRepr> {
Text(Text<S>), Text(Text<StrRepr>),
Extension(Extension<S>), Extension(ExtRepr),
} }
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
pub struct Paragraph<S> { pub struct Paragraph<StrRepr, ExtRepr> {
pieces: Vec<Spanned<ParagraphPiece<S>>>, pub pieces: Vec<Spanned<ParagraphPiece<StrRepr, ExtRepr>>>,
} }
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
pub enum Block<S> { pub enum Block<StrRepr, ExtRepr> {
Header(Header<S>), Header(Header<StrRepr>),
Paragraph(Paragraph<S>), Paragraph(Paragraph<StrRepr, ExtRepr>),
} }
pub type Result<T> = core::result::Result<T, Spanned<Error>>; pub type Result<T, ExtError> =
core::result::Result<T, Spanned<Error<ExtError>>>;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
struct State<'a> { struct State<'a> {
@ -175,9 +225,10 @@ impl<'a> State<'a> {
} }
} }
fn header<'a, S>(state: &mut State<'a>) -> Result<Header<S>> fn header<'a, S, E>(state: &mut State<'a>) -> Result<Header<S>, E::Error>
where where
S: From<&'a str>, S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
let mut level = 1; let mut level = 1;
@ -212,9 +263,13 @@ where
}) })
} }
fn extension<'a, S>(state: &mut State<'a>) -> Result<Extension<S>> fn extension<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<E::Output, E::Error>
where where
S: From<&'a str>, S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
debug_assert!({ debug_assert!({
let mut peek_buf = [None; 2]; let mut peek_buf = [None; 2];
@ -246,7 +301,9 @@ where
let content = &state.corpus[start.0..bo.0]; let content = &state.corpus[start.0..bo.0];
return Ok(Extension(content.into())); return pex
.parse(content, span(start.0, bo.0))
.map_err(|e| spanned(start.0, bo.0, Error::Extension(e)));
} }
_ => { _ => {
state.forward(); state.forward();
@ -256,9 +313,10 @@ where
} }
fn text<'a, S>(state: &mut State<'a>) -> Result<Text<S>> fn text<'a, S, E>(state: &mut State<'a>) -> Result<Text<S>, E::Error>
where where
S: From<&'a str>, S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
let start = state.current_offset; let start = state.current_offset;
@ -280,9 +338,13 @@ where
Ok(Text(state.corpus[start.0..end.0].into())) Ok(Text(state.corpus[start.0..end.0].into()))
} }
fn paragraph<'a, S>(state: &mut State<'a>) -> Result<Paragraph<S>> fn paragraph<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<Paragraph<S, E::Output>, E::Error>
where where
S: From<&'a str>, S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
let mut pieces = Vec::new(); let mut pieces = Vec::new();
@ -308,10 +370,10 @@ where
} }
// extension // extension
[Some(('@', _)), Some(('{', _))] => { [Some(('@', _)), Some(('{', _))] => {
ParagraphPiece::Extension(extension(state)?) ParagraphPiece::Extension(extension::<S, E>(state, pex)?)
} }
// regular text // regular text
_ => ParagraphPiece::Text(text(state)?), _ => ParagraphPiece::Text(text::<S, E>(state)?),
}; };
let end = state.current_offset; let end = state.current_offset;
@ -322,9 +384,13 @@ where
Ok(Paragraph { pieces }) Ok(Paragraph { pieces })
} }
fn blocks<'a, S>(state: &mut State<'a>) -> Result<Vec<Spanned<Block<S>>>> fn blocks<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
where where
S: From<&'a str>, S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
let mut out = Vec::new(); let mut out = Vec::new();
while let Some((ch, _)) = state.peek() { while let Some((ch, _)) = state.peek() {
@ -332,12 +398,8 @@ where
let start = state.current_offset; let start = state.current_offset;
let block = match ch { let block = match ch {
'#' => Block::Header(header(state)?), '#' => Block::Header(header::<S, E>(state)?),
ch if ch.is_whitespace() => { _ => Block::Paragraph(paragraph(state, pex)?),
state.forward();
break;
}
_ => Block::Paragraph(paragraph(state)?),
}; };
let end = state.current_offset; let end = state.current_offset;
@ -347,18 +409,56 @@ where
Ok(out) Ok(out)
} }
pub fn parse<'a, S>(corpus: &'a str) -> Result<Vec<Spanned<Block<S>>>> pub struct ParseExtensionsRaw<S>(core::marker::PhantomData<S>);
impl<S> ParseExtensionsRaw<S> {
fn new() -> Self {
Self(Default::default())
}
}
impl<'a, S> ParseExtensions<'a> for ParseExtensionsRaw<S>
where where
S: From<&'a str>, S: From<&'a str>,
{
type Output = Extension<S>;
type Error = kor::Never;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error> {
Ok(Extension(content.into()))
}
}
pub fn parse<'a, S>(
corpus: &'a str,
) -> Result<Vec<Spanned<Block<S, Extension<S>>>>, kor::Never>
where
S: From<&'a str>,
{
parse_extended(corpus, ParseExtensionsRaw::new())
}
pub fn parse_extended<'a, S, E>(
corpus: &'a str,
pex: E,
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{ {
let mut state = State { let mut state = State {
corpus, corpus,
current_offset: 0.into(), current_offset: 0.into(),
}; };
blocks(&mut state) blocks(&mut state, &pex)
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -487,4 +587,12 @@ mod test {
)] )]
); );
} }
#[test]
fn complex_test_1() {
let corpus = include_str!("../test/hello_mup.mup");
let output = parse::<&str>(corpus).unwrap();
assert_eq!(output, &[]);
}
} }

@ -0,0 +1,8 @@
# Hello mup!
This is mup, a lightweight markup language with a focus on extensibility.
Mup can be used for many different types of documents!
This is an example of an @{extension block}@. Extension blocks are
demarcated in the syntax tree, allowing you to run whatever processing you
would like on the content!
Loading…
Cancel
Save