Add strongly-typed extensions to parse functionality

This allows the user to supply custom handling to the extension element
parsing
main
idylls 2 years ago
parent 366fd53a67
commit cb91b7b9d6
Signed by: idylls
GPG Key ID: 8A7167CBC2CC9F0F

@ -1,6 +1,6 @@
[package]
name = "mup"
version = "0.1.0"
version = "0.0.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

@ -1,3 +1,46 @@
pub trait ParseExtensions<'a> {
type Output;
type Error;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error>;
}
impl<'a, T, E, F> ParseExtensions<'a> for F
where
F: Fn(&'a str, Span) -> core::result::Result<T, E>,
{
type Output = T;
type Error = E;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error> {
self(content, span)
}
}
// impl<'a, T, E> ParseExtension<'a>
// for &fn(&'a str, Span) -> core::result::Result<T, E>
// {
// type Output = T;
// type Error = E;
// fn parse(
// &self,
// content: &'a str,
// span: Span,
// ) -> core::result::Result<Self::Output, Self::Error> {
// self(content, span)
// }
// }
#[derive(Debug, Copy, Clone)]
struct InvalidCharBoundary;
trait StrExt {
@ -38,11 +81,16 @@ impl From<ByteOffset> for usize {
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Span {
start: ByteOffset,
pub start: ByteOffset,
/// exclusive
end: ByteOffset,
pub end: ByteOffset,
}
fn span<A, B>(start: A, end: B) -> Span
impl Span {
pub fn enclose<T>(self, t: T) -> Spanned<T> {
spanned(self.start, self.end, t)
}
}
pub fn span<A, B>(start: A, end: B) -> Span
where
A: Into<ByteOffset>,
B: Into<ByteOffset>,
@ -55,11 +103,11 @@ where
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Spanned<T> {
span: Span,
value: T,
pub span: Span,
pub value: T,
}
fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
pub fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
where
A: Into<ByteOffset>,
B: Into<ByteOffset>,
@ -70,14 +118,15 @@ where
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Error {
pub enum Error<ExtError> {
UnclosedExtension,
Extension(ExtError),
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Header<S> {
level: u8,
content: S,
pub level: u8,
pub content: S,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -87,23 +136,24 @@ pub struct Text<S>(pub S);
pub struct Extension<S>(pub S);
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ParagraphPiece<S> {
Text(Text<S>),
Extension(Extension<S>),
pub enum ParagraphPiece<StrRepr, ExtRepr> {
Text(Text<StrRepr>),
Extension(ExtRepr),
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Paragraph<S> {
pieces: Vec<Spanned<ParagraphPiece<S>>>,
pub struct Paragraph<StrRepr, ExtRepr> {
pub pieces: Vec<Spanned<ParagraphPiece<StrRepr, ExtRepr>>>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Block<S> {
Header(Header<S>),
Paragraph(Paragraph<S>),
pub enum Block<StrRepr, ExtRepr> {
Header(Header<StrRepr>),
Paragraph(Paragraph<StrRepr, ExtRepr>),
}
pub type Result<T> = core::result::Result<T, Spanned<Error>>;
pub type Result<T, ExtError> =
core::result::Result<T, Spanned<Error<ExtError>>>;
#[derive(Copy, Clone)]
struct State<'a> {
@ -175,9 +225,10 @@ impl<'a> State<'a> {
}
}
fn header<'a, S>(state: &mut State<'a>) -> Result<Header<S>>
fn header<'a, S, E>(state: &mut State<'a>) -> Result<Header<S>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
let mut level = 1;
@ -212,9 +263,13 @@ where
})
}
fn extension<'a, S>(state: &mut State<'a>) -> Result<Extension<S>>
fn extension<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<E::Output, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
debug_assert!({
let mut peek_buf = [None; 2];
@ -246,7 +301,9 @@ where
let content = &state.corpus[start.0..bo.0];
return Ok(Extension(content.into()));
return pex
.parse(content, span(start.0, bo.0))
.map_err(|e| spanned(start.0, bo.0, Error::Extension(e)));
}
_ => {
state.forward();
@ -256,9 +313,10 @@ where
}
fn text<'a, S>(state: &mut State<'a>) -> Result<Text<S>>
fn text<'a, S, E>(state: &mut State<'a>) -> Result<Text<S>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
let start = state.current_offset;
@ -280,9 +338,13 @@ where
Ok(Text(state.corpus[start.0..end.0].into()))
}
fn paragraph<'a, S>(state: &mut State<'a>) -> Result<Paragraph<S>>
fn paragraph<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<Paragraph<S, E::Output>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
let mut pieces = Vec::new();
@ -308,10 +370,10 @@ where
}
// extension
[Some(('@', _)), Some(('{', _))] => {
ParagraphPiece::Extension(extension(state)?)
ParagraphPiece::Extension(extension::<S, E>(state, pex)?)
}
// regular text
_ => ParagraphPiece::Text(text(state)?),
_ => ParagraphPiece::Text(text::<S, E>(state)?),
};
let end = state.current_offset;
@ -322,9 +384,13 @@ where
Ok(Paragraph { pieces })
}
fn blocks<'a, S>(state: &mut State<'a>) -> Result<Vec<Spanned<Block<S>>>>
fn blocks<'a, S, E>(
state: &mut State<'a>,
pex: &E,
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
let mut out = Vec::new();
while let Some((ch, _)) = state.peek() {
@ -332,12 +398,8 @@ where
let start = state.current_offset;
let block = match ch {
'#' => Block::Header(header(state)?),
ch if ch.is_whitespace() => {
state.forward();
break;
}
_ => Block::Paragraph(paragraph(state)?),
'#' => Block::Header(header::<S, E>(state)?),
_ => Block::Paragraph(paragraph(state, pex)?),
};
let end = state.current_offset;
@ -347,18 +409,56 @@ where
Ok(out)
}
pub fn parse<'a, S>(corpus: &'a str) -> Result<Vec<Spanned<Block<S>>>>
pub struct ParseExtensionsRaw<S>(core::marker::PhantomData<S>);
impl<S> ParseExtensionsRaw<S> {
fn new() -> Self {
Self(Default::default())
}
}
impl<'a, S> ParseExtensions<'a> for ParseExtensionsRaw<S>
where
S: From<&'a str>,
{
type Output = Extension<S>;
type Error = kor::Never;
fn parse(
&self,
content: &'a str,
span: Span,
) -> core::result::Result<Self::Output, Self::Error> {
Ok(Extension(content.into()))
}
}
pub fn parse<'a, S>(
corpus: &'a str,
) -> Result<Vec<Spanned<Block<S, Extension<S>>>>, kor::Never>
where
S: From<&'a str>,
{
parse_extended(corpus, ParseExtensionsRaw::new())
}
pub fn parse_extended<'a, S, E>(
corpus: &'a str,
pex: E,
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
where
S: From<&'a str>,
E: ParseExtensions<'a>,
{
let mut state = State {
corpus,
current_offset: 0.into(),
};
blocks(&mut state)
blocks(&mut state, &pex)
}
#[cfg(test)]
mod test {
use super::*;
@ -487,4 +587,12 @@ mod test {
)]
);
}
#[test]
fn complex_test_1() {
let corpus = include_str!("../test/hello_mup.mup");
let output = parse::<&str>(corpus).unwrap();
assert_eq!(output, &[]);
}
}

@ -0,0 +1,8 @@
# Hello mup!
This is mup, a lightweight markup language with a focus on extensibility.
Mup can be used for many different types of documents!
This is an example of an @{extension block}@. Extension blocks are
demarcated in the syntax tree, allowing you to run whatever processing you
would like on the content!
Loading…
Cancel
Save