|
|
|
@ -1,3 +1,46 @@
|
|
|
|
|
pub trait ParseExtensions<'a> {
|
|
|
|
|
type Output;
|
|
|
|
|
type Error;
|
|
|
|
|
|
|
|
|
|
fn parse(
|
|
|
|
|
&self,
|
|
|
|
|
content: &'a str,
|
|
|
|
|
span: Span,
|
|
|
|
|
) -> core::result::Result<Self::Output, Self::Error>;
|
|
|
|
|
}
|
|
|
|
|
impl<'a, T, E, F> ParseExtensions<'a> for F
|
|
|
|
|
where
|
|
|
|
|
F: Fn(&'a str, Span) -> core::result::Result<T, E>,
|
|
|
|
|
{
|
|
|
|
|
type Output = T;
|
|
|
|
|
|
|
|
|
|
type Error = E;
|
|
|
|
|
|
|
|
|
|
fn parse(
|
|
|
|
|
&self,
|
|
|
|
|
content: &'a str,
|
|
|
|
|
span: Span,
|
|
|
|
|
) -> core::result::Result<Self::Output, Self::Error> {
|
|
|
|
|
self(content, span)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// impl<'a, T, E> ParseExtension<'a>
|
|
|
|
|
// for &fn(&'a str, Span) -> core::result::Result<T, E>
|
|
|
|
|
// {
|
|
|
|
|
// type Output = T;
|
|
|
|
|
|
|
|
|
|
// type Error = E;
|
|
|
|
|
|
|
|
|
|
// fn parse(
|
|
|
|
|
// &self,
|
|
|
|
|
// content: &'a str,
|
|
|
|
|
// span: Span,
|
|
|
|
|
// ) -> core::result::Result<Self::Output, Self::Error> {
|
|
|
|
|
// self(content, span)
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
|
|
|
struct InvalidCharBoundary;
|
|
|
|
|
trait StrExt {
|
|
|
|
@ -38,11 +81,16 @@ impl From<ByteOffset> for usize {
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Span {
|
|
|
|
|
start: ByteOffset,
|
|
|
|
|
pub start: ByteOffset,
|
|
|
|
|
/// exclusive
|
|
|
|
|
end: ByteOffset,
|
|
|
|
|
pub end: ByteOffset,
|
|
|
|
|
}
|
|
|
|
|
fn span<A, B>(start: A, end: B) -> Span
|
|
|
|
|
impl Span {
|
|
|
|
|
pub fn enclose<T>(self, t: T) -> Spanned<T> {
|
|
|
|
|
spanned(self.start, self.end, t)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
pub fn span<A, B>(start: A, end: B) -> Span
|
|
|
|
|
where
|
|
|
|
|
A: Into<ByteOffset>,
|
|
|
|
|
B: Into<ByteOffset>,
|
|
|
|
@ -55,11 +103,11 @@ where
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Spanned<T> {
|
|
|
|
|
span: Span,
|
|
|
|
|
value: T,
|
|
|
|
|
pub span: Span,
|
|
|
|
|
pub value: T,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
|
|
|
|
|
pub fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
|
|
|
|
|
where
|
|
|
|
|
A: Into<ByteOffset>,
|
|
|
|
|
B: Into<ByteOffset>,
|
|
|
|
@ -70,14 +118,15 @@ where
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum Error {
|
|
|
|
|
pub enum Error<ExtError> {
|
|
|
|
|
UnclosedExtension,
|
|
|
|
|
Extension(ExtError),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Header<S> {
|
|
|
|
|
level: u8,
|
|
|
|
|
content: S,
|
|
|
|
|
pub level: u8,
|
|
|
|
|
pub content: S,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
@ -87,23 +136,24 @@ pub struct Text<S>(pub S);
|
|
|
|
|
pub struct Extension<S>(pub S);
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum ParagraphPiece<S> {
|
|
|
|
|
Text(Text<S>),
|
|
|
|
|
Extension(Extension<S>),
|
|
|
|
|
pub enum ParagraphPiece<StrRepr, ExtRepr> {
|
|
|
|
|
Text(Text<StrRepr>),
|
|
|
|
|
Extension(ExtRepr),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Paragraph<S> {
|
|
|
|
|
pieces: Vec<Spanned<ParagraphPiece<S>>>,
|
|
|
|
|
pub struct Paragraph<StrRepr, ExtRepr> {
|
|
|
|
|
pub pieces: Vec<Spanned<ParagraphPiece<StrRepr, ExtRepr>>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum Block<S> {
|
|
|
|
|
Header(Header<S>),
|
|
|
|
|
Paragraph(Paragraph<S>),
|
|
|
|
|
pub enum Block<StrRepr, ExtRepr> {
|
|
|
|
|
Header(Header<StrRepr>),
|
|
|
|
|
Paragraph(Paragraph<StrRepr, ExtRepr>),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub type Result<T> = core::result::Result<T, Spanned<Error>>;
|
|
|
|
|
pub type Result<T, ExtError> =
|
|
|
|
|
core::result::Result<T, Spanned<Error<ExtError>>>;
|
|
|
|
|
|
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
|
struct State<'a> {
|
|
|
|
@ -175,9 +225,10 @@ impl<'a> State<'a> {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn header<'a, S>(state: &mut State<'a>) -> Result<Header<S>>
|
|
|
|
|
fn header<'a, S, E>(state: &mut State<'a>) -> Result<Header<S>, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
let mut level = 1;
|
|
|
|
|
|
|
|
|
@ -212,9 +263,13 @@ where
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn extension<'a, S>(state: &mut State<'a>) -> Result<Extension<S>>
|
|
|
|
|
fn extension<'a, S, E>(
|
|
|
|
|
state: &mut State<'a>,
|
|
|
|
|
pex: &E,
|
|
|
|
|
) -> Result<E::Output, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
debug_assert!({
|
|
|
|
|
let mut peek_buf = [None; 2];
|
|
|
|
@ -246,7 +301,9 @@ where
|
|
|
|
|
|
|
|
|
|
let content = &state.corpus[start.0..bo.0];
|
|
|
|
|
|
|
|
|
|
return Ok(Extension(content.into()));
|
|
|
|
|
return pex
|
|
|
|
|
.parse(content, span(start.0, bo.0))
|
|
|
|
|
.map_err(|e| spanned(start.0, bo.0, Error::Extension(e)));
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
state.forward();
|
|
|
|
@ -256,9 +313,10 @@ where
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn text<'a, S>(state: &mut State<'a>) -> Result<Text<S>>
|
|
|
|
|
fn text<'a, S, E>(state: &mut State<'a>) -> Result<Text<S>, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
|
|
|
|
@ -280,9 +338,13 @@ where
|
|
|
|
|
Ok(Text(state.corpus[start.0..end.0].into()))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn paragraph<'a, S>(state: &mut State<'a>) -> Result<Paragraph<S>>
|
|
|
|
|
fn paragraph<'a, S, E>(
|
|
|
|
|
state: &mut State<'a>,
|
|
|
|
|
pex: &E,
|
|
|
|
|
) -> Result<Paragraph<S, E::Output>, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
let mut pieces = Vec::new();
|
|
|
|
|
|
|
|
|
@ -308,10 +370,10 @@ where
|
|
|
|
|
}
|
|
|
|
|
// extension
|
|
|
|
|
[Some(('@', _)), Some(('{', _))] => {
|
|
|
|
|
ParagraphPiece::Extension(extension(state)?)
|
|
|
|
|
ParagraphPiece::Extension(extension::<S, E>(state, pex)?)
|
|
|
|
|
}
|
|
|
|
|
// regular text
|
|
|
|
|
_ => ParagraphPiece::Text(text(state)?),
|
|
|
|
|
_ => ParagraphPiece::Text(text::<S, E>(state)?),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let end = state.current_offset;
|
|
|
|
@ -322,9 +384,13 @@ where
|
|
|
|
|
Ok(Paragraph { pieces })
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn blocks<'a, S>(state: &mut State<'a>) -> Result<Vec<Spanned<Block<S>>>>
|
|
|
|
|
fn blocks<'a, S, E>(
|
|
|
|
|
state: &mut State<'a>,
|
|
|
|
|
pex: &E,
|
|
|
|
|
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
let mut out = Vec::new();
|
|
|
|
|
while let Some((ch, _)) = state.peek() {
|
|
|
|
@ -332,12 +398,8 @@ where
|
|
|
|
|
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
let block = match ch {
|
|
|
|
|
'#' => Block::Header(header(state)?),
|
|
|
|
|
ch if ch.is_whitespace() => {
|
|
|
|
|
state.forward();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
_ => Block::Paragraph(paragraph(state)?),
|
|
|
|
|
'#' => Block::Header(header::<S, E>(state)?),
|
|
|
|
|
_ => Block::Paragraph(paragraph(state, pex)?),
|
|
|
|
|
};
|
|
|
|
|
let end = state.current_offset;
|
|
|
|
|
|
|
|
|
@ -347,18 +409,56 @@ where
|
|
|
|
|
Ok(out)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse<'a, S>(corpus: &'a str) -> Result<Vec<Spanned<Block<S>>>>
|
|
|
|
|
pub struct ParseExtensionsRaw<S>(core::marker::PhantomData<S>);
|
|
|
|
|
impl<S> ParseExtensionsRaw<S> {
|
|
|
|
|
fn new() -> Self {
|
|
|
|
|
Self(Default::default())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, S> ParseExtensions<'a> for ParseExtensionsRaw<S>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
type Output = Extension<S>;
|
|
|
|
|
|
|
|
|
|
type Error = kor::Never;
|
|
|
|
|
|
|
|
|
|
fn parse(
|
|
|
|
|
&self,
|
|
|
|
|
content: &'a str,
|
|
|
|
|
span: Span,
|
|
|
|
|
) -> core::result::Result<Self::Output, Self::Error> {
|
|
|
|
|
Ok(Extension(content.into()))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse<'a, S>(
|
|
|
|
|
corpus: &'a str,
|
|
|
|
|
) -> Result<Vec<Spanned<Block<S, Extension<S>>>>, kor::Never>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
parse_extended(corpus, ParseExtensionsRaw::new())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse_extended<'a, S, E>(
|
|
|
|
|
corpus: &'a str,
|
|
|
|
|
pex: E,
|
|
|
|
|
) -> Result<Vec<Spanned<Block<S, E::Output>>>, E::Error>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
E: ParseExtensions<'a>,
|
|
|
|
|
{
|
|
|
|
|
let mut state = State {
|
|
|
|
|
corpus,
|
|
|
|
|
current_offset: 0.into(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
blocks(&mut state)
|
|
|
|
|
blocks(&mut state, &pex)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod test {
|
|
|
|
|
use super::*;
|
|
|
|
@ -487,4 +587,12 @@ mod test {
|
|
|
|
|
)]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn complex_test_1() {
|
|
|
|
|
let corpus = include_str!("../test/hello_mup.mup");
|
|
|
|
|
let output = parse::<&str>(corpus).unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(output, &[]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|