|
|
|
@ -0,0 +1,396 @@
|
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
|
|
|
struct InvalidCharBoundary;
|
|
|
|
|
trait StrExt {
|
|
|
|
|
fn char_at_byte_offset<T>(
|
|
|
|
|
&self,
|
|
|
|
|
offset: T,
|
|
|
|
|
) -> core::result::Result<Option<char>, InvalidCharBoundary>
|
|
|
|
|
where
|
|
|
|
|
T: Into<usize>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl StrExt for str {
|
|
|
|
|
fn char_at_byte_offset<T>(
|
|
|
|
|
&self,
|
|
|
|
|
offset: T,
|
|
|
|
|
) -> core::result::Result<Option<char>, InvalidCharBoundary>
|
|
|
|
|
where
|
|
|
|
|
T: Into<usize>,
|
|
|
|
|
{
|
|
|
|
|
self.get(offset.into()..)
|
|
|
|
|
.ok_or(InvalidCharBoundary)
|
|
|
|
|
.map(|s| s.chars().next())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct ByteOffset(pub usize);
|
|
|
|
|
impl From<usize> for ByteOffset {
|
|
|
|
|
fn from(bo: usize) -> Self {
|
|
|
|
|
Self(bo)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
impl From<ByteOffset> for usize {
|
|
|
|
|
fn from(bo: ByteOffset) -> Self {
|
|
|
|
|
bo.0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Span {
|
|
|
|
|
start: ByteOffset,
|
|
|
|
|
/// exclusive
|
|
|
|
|
end: ByteOffset,
|
|
|
|
|
}
|
|
|
|
|
fn span<A, B>(start: A, end: B) -> Span
|
|
|
|
|
where
|
|
|
|
|
A: Into<ByteOffset>,
|
|
|
|
|
B: Into<ByteOffset>,
|
|
|
|
|
{
|
|
|
|
|
Span {
|
|
|
|
|
start: start.into(),
|
|
|
|
|
end: end.into(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Spanned<T> {
|
|
|
|
|
span: Span,
|
|
|
|
|
value: T,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn spanned<A, B, T>(start: A, end: B, value: T) -> Spanned<T>
|
|
|
|
|
where
|
|
|
|
|
A: Into<ByteOffset>,
|
|
|
|
|
B: Into<ByteOffset>,
|
|
|
|
|
{
|
|
|
|
|
let span = span(start, end);
|
|
|
|
|
|
|
|
|
|
Spanned { value, span }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum Error {}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Header<S> {
|
|
|
|
|
level: u8,
|
|
|
|
|
content: S,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum Inline<S> {
|
|
|
|
|
Text(S),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
|
|
|
|
pub struct Paragraph<S> {
|
|
|
|
|
pieces: Vec<Spanned<Inline<S>>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
|
|
|
|
pub enum Block<S> {
|
|
|
|
|
Header(Header<S>),
|
|
|
|
|
Paragraph(Paragraph<S>),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub type Result<T> = core::result::Result<T, Spanned<Error>>;
|
|
|
|
|
|
|
|
|
|
struct State<'a> {
|
|
|
|
|
corpus: &'a str,
|
|
|
|
|
current_offset: ByteOffset,
|
|
|
|
|
}
|
|
|
|
|
impl<'a> State<'a> {
|
|
|
|
|
fn is_eof(&self) -> bool {
|
|
|
|
|
self.current_offset.0 >= self.corpus.len()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn peek(&self) -> Option<(char, ByteOffset)> {
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Some((
|
|
|
|
|
self.corpus
|
|
|
|
|
.char_at_byte_offset(self.current_offset)
|
|
|
|
|
.unwrap()
|
|
|
|
|
.unwrap(),
|
|
|
|
|
self.current_offset,
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<(char, ByteOffset)> {
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let out = Some((
|
|
|
|
|
self.corpus
|
|
|
|
|
.char_at_byte_offset(self.current_offset)
|
|
|
|
|
.unwrap()
|
|
|
|
|
.unwrap(),
|
|
|
|
|
self.current_offset,
|
|
|
|
|
));
|
|
|
|
|
|
|
|
|
|
self.current_offset.0 += 1;
|
|
|
|
|
|
|
|
|
|
out
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn forward(&mut self) {
|
|
|
|
|
self.next();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
|
while let Some((ch, _)) = self.peek() {
|
|
|
|
|
if !ch.is_whitespace() {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.forward();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn header<'a, S>(state: &mut State<'a>) -> Result<Header<S>>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
let mut level = 1;
|
|
|
|
|
|
|
|
|
|
state.forward();
|
|
|
|
|
|
|
|
|
|
while let Some((ch, _)) = state.peek() {
|
|
|
|
|
match ch {
|
|
|
|
|
'#' => {
|
|
|
|
|
level += 1;
|
|
|
|
|
state.forward();
|
|
|
|
|
}
|
|
|
|
|
_ => break,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
state.skip_whitespace();
|
|
|
|
|
|
|
|
|
|
let content_start = state.current_offset;
|
|
|
|
|
let mut content_end = None;
|
|
|
|
|
while let Some((ch, bo)) = state.next() {
|
|
|
|
|
if ch == '\n' {
|
|
|
|
|
content_end = Some(bo);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
let end = state.current_offset;
|
|
|
|
|
let content_end = content_end.unwrap_or(end);
|
|
|
|
|
|
|
|
|
|
Ok(Header {
|
|
|
|
|
level,
|
|
|
|
|
content: state.corpus[content_start.0..content_end.0].into(),
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn inline_text<'a, S>(state: &mut State<'a>) -> Result<(S, bool)>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
|
|
|
|
|
let mut end: Option<ByteOffset> = None;
|
|
|
|
|
while let Some((ch, bo)) = state.next() {
|
|
|
|
|
match ch {
|
|
|
|
|
'\n' => match end {
|
|
|
|
|
Some(end) => {
|
|
|
|
|
return Ok((state.corpus[start.0..end.0].into(), true))
|
|
|
|
|
}
|
|
|
|
|
None => end = Some(bo),
|
|
|
|
|
},
|
|
|
|
|
_ => {
|
|
|
|
|
end = None;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
state.corpus
|
|
|
|
|
[start.0..end.map(|e| e.0).unwrap_or_else(|| state.corpus.len())]
|
|
|
|
|
.into(),
|
|
|
|
|
true,
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn inline<'a, S>(state: &mut State<'a>) -> Result<(Inline<S>, bool)>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
match state.peek() {
|
|
|
|
|
_ => {
|
|
|
|
|
let (inline, done) = inline_text(state)?;
|
|
|
|
|
Ok((Inline::Text(inline), done))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn paragraph<'a, S>(state: &mut State<'a>) -> Result<Paragraph<S>>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
let mut pieces = Vec::new();
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
let (piece, done) = inline::<S>(state)?;
|
|
|
|
|
let end = state.current_offset;
|
|
|
|
|
|
|
|
|
|
pieces.push(spanned(start, end, piece));
|
|
|
|
|
|
|
|
|
|
if done {
|
|
|
|
|
break;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(Paragraph { pieces })
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn blocks<'a, S>(state: &mut State<'a>) -> Result<Vec<Spanned<Block<S>>>>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
let mut out = Vec::new();
|
|
|
|
|
while let Some((ch, _)) = state.peek() {
|
|
|
|
|
state.skip_whitespace();
|
|
|
|
|
|
|
|
|
|
let start = state.current_offset;
|
|
|
|
|
let block = match ch {
|
|
|
|
|
'#' => Block::Header(header(state)?),
|
|
|
|
|
ch if ch.is_whitespace() => {
|
|
|
|
|
state.forward();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
_ => Block::Paragraph(paragraph(state)?),
|
|
|
|
|
};
|
|
|
|
|
let end = state.current_offset;
|
|
|
|
|
|
|
|
|
|
out.push(spanned(start, end, block));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(out)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse<'a, S>(corpus: &'a str) -> Result<Vec<Spanned<Block<S>>>>
|
|
|
|
|
where
|
|
|
|
|
S: From<&'a str>,
|
|
|
|
|
{
|
|
|
|
|
let mut state = State {
|
|
|
|
|
corpus,
|
|
|
|
|
current_offset: 0.into(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
blocks(&mut state)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod test {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn header_1() {
|
|
|
|
|
let corpus = "# Header";
|
|
|
|
|
let output = parse(corpus).unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&output,
|
|
|
|
|
&[spanned(
|
|
|
|
|
0,
|
|
|
|
|
corpus.len(),
|
|
|
|
|
Block::Header(Header {
|
|
|
|
|
level: 1,
|
|
|
|
|
content: "Header",
|
|
|
|
|
})
|
|
|
|
|
)]
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn header_2() {
|
|
|
|
|
let corpus = "# Header\n## Header 2\n";
|
|
|
|
|
let output = parse(corpus).unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&output,
|
|
|
|
|
&[
|
|
|
|
|
spanned(
|
|
|
|
|
0,
|
|
|
|
|
9,
|
|
|
|
|
Block::Header(Header {
|
|
|
|
|
level: 1,
|
|
|
|
|
content: "Header",
|
|
|
|
|
})
|
|
|
|
|
),
|
|
|
|
|
spanned(
|
|
|
|
|
9,
|
|
|
|
|
25,
|
|
|
|
|
Block::Header(Header {
|
|
|
|
|
level: 2,
|
|
|
|
|
content: "Header 2",
|
|
|
|
|
})
|
|
|
|
|
),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn paragraph_1() {
|
|
|
|
|
let corpus = "Hello, world";
|
|
|
|
|
let output = parse(corpus).unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&output,
|
|
|
|
|
&[spanned(
|
|
|
|
|
0,
|
|
|
|
|
corpus.len(),
|
|
|
|
|
Block::Paragraph(Paragraph {
|
|
|
|
|
pieces: vec![spanned(
|
|
|
|
|
0,
|
|
|
|
|
corpus.len(),
|
|
|
|
|
Inline::Text("Hello, world")
|
|
|
|
|
)]
|
|
|
|
|
})
|
|
|
|
|
)],
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn paragraph_2() {
|
|
|
|
|
let corpus = "Hello, world\n\nGoodbye, world\n";
|
|
|
|
|
let output = parse(corpus).unwrap();
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&output,
|
|
|
|
|
&[
|
|
|
|
|
spanned(
|
|
|
|
|
0,
|
|
|
|
|
14,
|
|
|
|
|
Block::Paragraph(Paragraph {
|
|
|
|
|
pieces: vec![spanned(
|
|
|
|
|
0,
|
|
|
|
|
14,
|
|
|
|
|
Inline::Text("Hello, world")
|
|
|
|
|
)]
|
|
|
|
|
})
|
|
|
|
|
),
|
|
|
|
|
spanned(
|
|
|
|
|
14,
|
|
|
|
|
corpus.len(),
|
|
|
|
|
Block::Paragraph(Paragraph {
|
|
|
|
|
pieces: vec![spanned(
|
|
|
|
|
14,
|
|
|
|
|
corpus.len(),
|
|
|
|
|
Inline::Text("Goodbye, world")
|
|
|
|
|
)]
|
|
|
|
|
})
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|