diff --git a/Cargo.toml b/Cargo.toml index ca2804c..30ffac8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mup" -version = "0.1.0" +version = "0.0.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/parse.rs b/src/parse.rs index f24255b..5cc4a4f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,3 +1,46 @@ +pub trait ParseExtensions<'a> { + type Output; + type Error; + + fn parse( + &self, + content: &'a str, + span: Span, + ) -> core::result::Result; +} +impl<'a, T, E, F> ParseExtensions<'a> for F +where + F: Fn(&'a str, Span) -> core::result::Result, +{ + type Output = T; + + type Error = E; + + fn parse( + &self, + content: &'a str, + span: Span, + ) -> core::result::Result { + self(content, span) + } +} + +// impl<'a, T, E> ParseExtension<'a> +// for &fn(&'a str, Span) -> core::result::Result +// { +// type Output = T; + +// type Error = E; + +// fn parse( +// &self, +// content: &'a str, +// span: Span, +// ) -> core::result::Result { +// self(content, span) +// } +// } + #[derive(Debug, Copy, Clone)] struct InvalidCharBoundary; trait StrExt { @@ -38,11 +81,16 @@ impl From for usize { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Span { - start: ByteOffset, + pub start: ByteOffset, /// exclusive - end: ByteOffset, + pub end: ByteOffset, } -fn span(start: A, end: B) -> Span +impl Span { + pub fn enclose(self, t: T) -> Spanned { + spanned(self.start, self.end, t) + } +} +pub fn span(start: A, end: B) -> Span where A: Into, B: Into, @@ -55,11 +103,11 @@ where #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Spanned { - span: Span, - value: T, + pub span: Span, + pub value: T, } -fn spanned(start: A, end: B, value: T) -> Spanned +pub fn spanned(start: A, end: B, value: T) -> Spanned where A: Into, B: Into, @@ -70,14 +118,15 @@ where } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Error { +pub enum Error { UnclosedExtension, + Extension(ExtError), } #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Header { - level: u8, - content: S, + pub level: u8, + pub content: S, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -87,23 +136,24 @@ pub struct Text(pub S); pub struct Extension(pub S); #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum ParagraphPiece { - Text(Text), - Extension(Extension), +pub enum ParagraphPiece { + Text(Text), + Extension(ExtRepr), } #[derive(Debug, Clone, Eq, PartialEq)] -pub struct Paragraph { - pieces: Vec>>, +pub struct Paragraph { + pub pieces: Vec>>, } #[derive(Debug, Clone, Eq, PartialEq)] -pub enum Block { - Header(Header), - Paragraph(Paragraph), +pub enum Block { + Header(Header), + Paragraph(Paragraph), } -pub type Result = core::result::Result>; +pub type Result = + core::result::Result>>; #[derive(Copy, Clone)] struct State<'a> { @@ -175,9 +225,10 @@ impl<'a> State<'a> { } } -fn header<'a, S>(state: &mut State<'a>) -> Result> +fn header<'a, S, E>(state: &mut State<'a>) -> Result, E::Error> where S: From<&'a str>, + E: ParseExtensions<'a>, { let mut level = 1; @@ -212,9 +263,13 @@ where }) } -fn extension<'a, S>(state: &mut State<'a>) -> Result> +fn extension<'a, S, E>( + state: &mut State<'a>, + pex: &E, +) -> Result where S: From<&'a str>, + E: ParseExtensions<'a>, { debug_assert!({ let mut peek_buf = [None; 2]; @@ -246,7 +301,9 @@ where let content = &state.corpus[start.0..bo.0]; - return Ok(Extension(content.into())); + return pex + .parse(content, span(start.0, bo.0)) + .map_err(|e| spanned(start.0, bo.0, Error::Extension(e))); } _ => { state.forward(); @@ -256,9 +313,10 @@ where } -fn text<'a, S>(state: &mut State<'a>) -> Result> +fn text<'a, S, E>(state: &mut State<'a>) -> Result, E::Error> where S: From<&'a str>, + E: ParseExtensions<'a>, { let start = state.current_offset; @@ -280,9 +338,13 @@ where Ok(Text(state.corpus[start.0..end.0].into())) } -fn paragraph<'a, S>(state: &mut State<'a>) -> Result> +fn paragraph<'a, S, E>( + state: &mut State<'a>, + pex: &E, +) -> Result, E::Error> where S: From<&'a str>, + E: ParseExtensions<'a>, { let mut pieces = Vec::new(); @@ -308,10 +370,10 @@ where } // extension [Some(('@', _)), Some(('{', _))] => { - ParagraphPiece::Extension(extension(state)?) + ParagraphPiece::Extension(extension::(state, pex)?) } // regular text - _ => ParagraphPiece::Text(text(state)?), + _ => ParagraphPiece::Text(text::(state)?), }; let end = state.current_offset; @@ -322,9 +384,13 @@ where Ok(Paragraph { pieces }) } -fn blocks<'a, S>(state: &mut State<'a>) -> Result>>> +fn blocks<'a, S, E>( + state: &mut State<'a>, + pex: &E, +) -> Result>>, E::Error> where S: From<&'a str>, + E: ParseExtensions<'a>, { let mut out = Vec::new(); while let Some((ch, _)) = state.peek() { @@ -332,12 +398,8 @@ where let start = state.current_offset; let block = match ch { - '#' => Block::Header(header(state)?), - ch if ch.is_whitespace() => { - state.forward(); - break; - } - _ => Block::Paragraph(paragraph(state)?), + '#' => Block::Header(header::(state)?), + _ => Block::Paragraph(paragraph(state, pex)?), }; let end = state.current_offset; @@ -347,18 +409,56 @@ where Ok(out) } -pub fn parse<'a, S>(corpus: &'a str) -> Result>>> +pub struct ParseExtensionsRaw(core::marker::PhantomData); +impl ParseExtensionsRaw { + fn new() -> Self { + Self(Default::default()) + } +} + +impl<'a, S> ParseExtensions<'a> for ParseExtensionsRaw where S: From<&'a str>, +{ + type Output = Extension; + + type Error = kor::Never; + + fn parse( + &self, + content: &'a str, + span: Span, + ) -> core::result::Result { + Ok(Extension(content.into())) + } +} + +pub fn parse<'a, S>( + corpus: &'a str, +) -> Result>>>, kor::Never> +where + S: From<&'a str>, +{ + parse_extended(corpus, ParseExtensionsRaw::new()) +} + +pub fn parse_extended<'a, S, E>( + corpus: &'a str, + pex: E, +) -> Result>>, E::Error> +where + S: From<&'a str>, + E: ParseExtensions<'a>, { let mut state = State { corpus, current_offset: 0.into(), }; - blocks(&mut state) + blocks(&mut state, &pex) } + #[cfg(test)] mod test { use super::*; @@ -487,4 +587,12 @@ mod test { )] ); } + + #[test] + fn complex_test_1() { + let corpus = include_str!("../test/hello_mup.mup"); + let output = parse::<&str>(corpus).unwrap(); + + assert_eq!(output, &[]); + } } diff --git a/test/hello_mup.mup b/test/hello_mup.mup new file mode 100644 index 0000000..948f93c --- /dev/null +++ b/test/hello_mup.mup @@ -0,0 +1,8 @@ +# Hello mup! + +This is mup, a lightweight markup language with a focus on extensibility. +Mup can be used for many different types of documents! + +This is an example of an @{extension block}@. Extension blocks are +demarcated in the syntax tree, allowing you to run whatever processing you +would like on the content!