diff --git a/CHANGELOG.md b/CHANGELOG.md index a905850..4793dbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Changelog + All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), @@ -6,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add a converter to show Discord formatting in Minecraft + ## [v2.2.0] - 2021-01-23 ### Changed + - Update dependencies - Tokio runtime updated to 1.0 - Serenity updated to 0.10 @@ -20,27 +26,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [v2.1.1] - 2020-12-11 ### Fixed + - Fix full username mentions with discriminator not being parsed correctly ## [v2.1.0] - 2020-12-03 ### Added + - Add ability to mention roles and channels from Minecraft - Add an optional webserver implementation to listen for messages from other machines ### Fixed + - Log files being moved (such as maybe during log rotation) should no longer break the bot, if that was happening - Fix mentions from Minecraft with spaces not creating a mention ### Changed + - Print nicer-looking error messages ## [v2.0.1] - 2020-11-22 ### Fixed + - Fix a bad value in the default configuration ### Changed + - Eliminated a call to the Discord REST API when messages are received from Minecraft - Replace ugly Discord mentions with names in messages to Minecraft - Escape double quote characters in messages to Minecraft @@ -48,12 +60,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [v2.0.0] - 2020-11-8 ### Added + - Add more customization options for chat formatting in Minecraft ### Changed + - Improve experience when a user sends an attachment in Discord -[Unreleased]: https://github.com/EbonJaeger/dolphin-rs/compare/v2.2.0...master +[unreleased]: https://github.com/EbonJaeger/dolphin-rs/compare/v2.2.0...master [v2.2.0]: https://github.com/EbonJaeger/dolphin-rs/compare/v2.1.1...v2.2.0 [v2.1.1]: https://github.com/EbonJaeger/dolphin-rs/compare/v2.1.0...v2.1.1 [v2.1.0]: https://github.com/EbonJaeger/dolphin-rs/compare/v2.0.1...v2.1.0 diff --git a/Cargo.lock b/Cargo.lock index 8e0fe01..5f7af18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,6 +113,21 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.2.1" @@ -366,10 +381,11 @@ dependencies = [ "clap", "confy", "err-derive", + "fancy-regex", "lazy_static", "linemux", + "pipeline", "rcon", - "regex", "serde", "serde_json", "serenity", @@ -408,6 +424,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fancy-regex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36996e5f56f32ca51a937f325094fa450b32df871af1a89be331b7145b931bfc" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "filetime" version = "0.2.13" @@ -1196,6 +1222,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pipeline" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d15b6607fa632996eb8a17c9041cb6071cb75ac057abd45dece578723ea8c7c0" + [[package]] name = "ppv-lite86" version = "0.2.10" diff --git a/Cargo.toml b/Cargo.toml index 663ffda..7e01999 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,9 @@ confy = "0.4.0" err-derive = "0.3" lazy_static = "1.4.0" linemux = {git = "https://github.com/jmagnuson/linemux"} +pipeline = "0.5" rcon = "0.4" -regex = "1.4.2" +fancy-regex = "0.4" serde_json = "1.0" tracing = "0.1.21" tracing-subscriber = "0.2.15" diff --git a/src/discord.rs b/src/discord.rs index f3881f4..b21b22b 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -1,9 +1,10 @@ use crate::config::RootConfig; use crate::errors::DolphinError; use crate::listener::{Listener, LogTailer, Webserver}; +use crate::markdown; use crate::minecraft::{MinecraftMessage, Source}; +use fancy_regex::Regex; use rcon::Connection; -use regex::Regex; use serenity::{ async_trait, model::{ @@ -15,12 +16,9 @@ use serenity::{ prelude::*, utils::parse_channel, }; -use std::{ - str::Split, - sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, - }, +use std::sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, }; use tokio::sync::mpsc; use tracing::{debug, error, info, warn}; @@ -191,8 +189,17 @@ impl EventHandler for Handler { let content = self.sanitize_message(&ctx, &msg).await; // Send a separate message for each line - let lines = content.split("\n"); - let lines = truncate_lines(lines); + let lines = content.split('\n'); + + // Parse and convert any Markdown + let mut marked = Vec::new(); + lines.for_each(|line| { + let blocks = markdown::parse(&line); + debug!("event_handler:message: parsed plocks: {:?}", blocks); + marked.push(markdown::to_minecraft_format(&blocks)); + }); + + let lines = truncate_lines(marked); let mut lines = self.apply_line_template(lines); // Add attachement message if an attachment is present @@ -286,7 +293,7 @@ impl EventHandler for Handler { /// by default 100. If a line is over the limit, it will be split at that /// number of chacacters, and a new line inserted into the line Vector. /// -fn truncate_lines<'a>(lines: Split<'a, &'a str>) -> Vec { +fn truncate_lines(lines: Vec) -> Vec { let mut truncated: Vec = Vec::new(); for mut line in lines { @@ -303,8 +310,8 @@ fn truncate_lines<'a>(lines: Split<'a, &'a str>) -> Vec { // Shorten the line for the next iteration line = match line.get(MAX_LINE_LENGTH..) { - Some(sub) => sub, - None => "", + Some(sub) => sub.to_string(), + None => String::new(), }; } } @@ -482,22 +489,20 @@ fn split_webhook_url(url: &str) -> Option<(u64, &str)> { Regex::new(r"^https://discord.com/api/webhooks/(?P.*)/(?P.*)$").unwrap(); } - let captures = match WEBHOOK_REGEX.captures(&url) { - Some(captures) => captures, - None => return None, - }; + let mut ret = None; - if captures.len() != 3 { - return None; - } + if let Ok(Some(captures)) = WEBHOOK_REGEX.captures(&url) { + if captures.len() != 3 { + return None; + } - let id = captures.name("id").unwrap().as_str(); - let id = match id.parse::() { - Ok(num) => num, - Err(_) => return None, - }; + let id = captures.name("id").unwrap().as_str(); + if let Ok(id) = id.parse::() { + ret = Some((id, captures.name("token").unwrap().as_str())); + } + } - Some((id, captures.name("token").unwrap().as_str())) + ret } #[cfg(test)] @@ -508,12 +513,11 @@ mod tests { #[test] fn split_long_line() { // Given - let input = String::from("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); - let split = input.split("\n"); + let input = vec!(String::from("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789")); let expected = vec!("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", "0123456789"); // When - let result = truncate_lines(split); + let result = truncate_lines(input); // Then assert_eq!(result, expected); @@ -522,12 +526,11 @@ mod tests { #[test] fn no_split_line() { // Given - let input = String::from("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); - let split = input.split("\n"); + let input = vec!(String::from("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789")); let expected = vec!("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); // When - let result = truncate_lines(split); + let result = truncate_lines(input); // Then assert_eq!(result, expected); diff --git a/src/main.rs b/src/main.rs index 71dcebe..ce6450d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,12 +3,15 @@ mod config; mod discord; mod errors; mod listener; +mod markdown; mod minecraft; #[macro_use] extern crate clap; #[macro_use] extern crate lazy_static; +#[macro_use] +extern crate pipeline; use clap::{App, Arg}; use commands::{general::*, hooks::after, minecraft::*}; diff --git a/src/markdown/README.md b/src/markdown/README.md new file mode 100644 index 0000000..49d8020 --- /dev/null +++ b/src/markdown/README.md @@ -0,0 +1,19 @@ +This is a stripped down version of the Rust crate `markdown` written by Johann Hofmann, licensed under the Apache 2.0 license. You can find the original work [here](https://github.com/johannhof/markdown.rs). The vast majority of the credit here goes to him. + +# Differences + +Since this application only deals with Discord's flavor of Markdown and Minecraft formatting, there is a lot of the original crate that isn't needed here. On top of that, there are a couple of formatting types that Discord uses that aren't present in the upstream library. + +Only the following elements are implemented: + +- Blockquotes +- Emphasis +- Strikethrough +- Strong +- Underline + +# License + +All work **except** the strikethrough and underline parsers, and the Minecraft format conversion code is © Johann Hofmann. + +The license for the original work can be found [here](https://github.com/johannhof/markdown.rs/blob/master/LICENSE-APACHE). I really make no claims on top of that. diff --git a/src/markdown/block/blockquote.rs b/src/markdown/block/blockquote.rs new file mode 100644 index 0000000..a394cdd --- /dev/null +++ b/src/markdown/block/blockquote.rs @@ -0,0 +1,41 @@ +use super::Block; +use super::Block::Blockquote; +use crate::markdown::block::parse_blocks; + +pub fn parse_blockquote(line: &str) -> Option { + if line.is_empty() || !line.starts_with("> ") { + return None; + } + + let mut content = String::new(); + + // Push the content of the quote after the opening `>` + content.push_str(&line[2..line.len()]); + + Some(Blockquote(parse_blocks(&content))) +} + +#[cfg(test)] +mod tests { + use super::parse_blockquote; + use super::Block::Blockquote; + + #[test] + fn finds_blockquote() { + match parse_blockquote("> quote") { + Some(Blockquote(_)) => (), + _ => panic!(), + } + } + + #[test] + fn no_false_positives() { + assert_eq!(parse_blockquote(">shouldn't parse"), None); + assert_eq!(parse_blockquote("shouldn't > parse"), None); + } + + #[test] + fn no_early_matching() { + assert_eq!(parse_blockquote("first > quote > another blah"), None); + } +} diff --git a/src/markdown/block/mod.rs b/src/markdown/block/mod.rs new file mode 100644 index 0000000..c2a9eec --- /dev/null +++ b/src/markdown/block/mod.rs @@ -0,0 +1,65 @@ +use super::span::parse_spans; +use super::Block; +use super::Block::Paragraph; + +mod blockquote; + +use self::blockquote::parse_blockquote; + +pub fn parse_blocks(content: &str) -> Vec { + let mut blocks = vec![]; + let mut t = vec![]; + + match parse_block(&content) { + // A block was found + Some(block) => { + // The current paragraph has ended, push it to the blocks Vec + if !t.is_empty() { + blocks.push(Paragraph(t)); + t = Vec::new(); + } + + blocks.push(block); + } + // Didn't find a block, assume it's a Paragraph + None => { + // Empty linebreak; push a new Paragraph + if content.is_empty() && !t.is_empty() { + blocks.push(Paragraph(t)); + t = Vec::new(); + } + + // Parse any span elements in this line + let spans = parse_spans(content); + t.extend_from_slice(&spans); + } + } + + if !t.is_empty() { + blocks.push(Paragraph(t)); + } + + blocks +} + +fn parse_block(content: &str) -> Option { + pipe_opt!( + content + => parse_blockquote + ) +} + +#[cfg(test)] +mod tests { + use super::parse_blocks; + use super::Block::{Blockquote, Paragraph}; + use crate::markdown::Span::Text; + + #[test] + fn finds_blockquotes() { + assert_eq!( + parse_blocks("> One"), + vec![Blockquote(vec![Paragraph(vec![Text("One".to_owned())])])] + ) + } +} diff --git a/src/markdown/mod.rs b/src/markdown/mod.rs new file mode 100644 index 0000000..cc2cc6a --- /dev/null +++ b/src/markdown/mod.rs @@ -0,0 +1,242 @@ +mod block; +mod span; + +const EMPHASIS_TAG: &str = "§o"; +const RESET_TAG: &str = "§r"; +const STRIKETHROUGH_TAG: &str = "§m"; +const STRONG_TAG: &str = "§l"; +const UNDERLINE_TAG: &str = "§n"; + +#[derive(Clone, Debug, PartialEq)] +pub enum Block { + Paragraph(Vec), + Blockquote(Vec), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Span { + Emphasis(Vec), + Literal(char), + Strikethrough(Vec), + Strong(Vec), + Text(String), + Underline(Vec), +} + +impl Span { + /// Converts a single Span element (and any nested Spans, recursively) + /// to a String with the Minecraft format tags. + /// + /// Minecraft doesn't have real closing tags; you have to completely + /// reset the string and re-apply tags that should still be applied. + /// Thus, we need to keep track of any open tags with a `Vec`. + fn to_minecraft(&self, mut open_tags: &mut Vec) -> String { + match self { + Span::Literal(ref c) => c.to_string(), + Span::Text(ref content) => content.to_string(), + Span::Emphasis(ref content) => { + open_tags.push(EMPHASIS_TAG.to_owned()); + let span = format_spans(content, &mut open_tags); + format!("{}{}{}", EMPHASIS_TAG, span, RESET_TAG) + } + Span::Strong(ref content) => { + open_tags.push(STRONG_TAG.to_owned()); + let span = format_spans(content, &mut open_tags); + format!("{}{}{}", STRONG_TAG, span, RESET_TAG) + } + Span::Strikethrough(ref content) => { + open_tags.push(STRIKETHROUGH_TAG.to_owned()); + let span = format_spans(content, &mut open_tags); + format!("{}{}{}", STRIKETHROUGH_TAG, span, RESET_TAG) + } + Span::Underline(ref content) => { + open_tags.push(UNDERLINE_TAG.to_owned()); + let span = format_spans(content, &mut open_tags); + format!("{}{}{}", UNDERLINE_TAG, span, RESET_TAG) + } + } + } +} + +/// Parse a string and turn it into a tree of Markdown elements. +pub fn parse(content: &str) -> Vec { + block::parse_blocks(content) +} + +/// Turn a tree of Markdown blocks into a Minecraft formatted string. +pub fn to_minecraft_format(blocks: &[Block]) -> String { + let mut ret = String::new(); + + for block in blocks { + let next = match block { + Block::Paragraph(ref elements) => format_paragraph(elements), + Block::Blockquote(ref elements) => format_blockquote(elements), + }; + + ret.push_str(&next); + } + + ret +} + +fn format_blockquote(elements: &[Block]) -> String { + format!("> {}", to_minecraft_format(elements)) +} + +fn format_paragraph(elements: &[Span]) -> String { + format_spans(elements, &mut vec![]) +} + +/// Turn a Span tree to a String in the Minecraft chat format. This +/// requires a `Vec` to track any open tags for nested spans +/// because of how formatting in Minecraft works. +/// +/// Minecraft doesn't have real closing tags; you have to completely +/// reset the string and re-apply tags that should still be applied. +fn format_spans(elements: &[Span], mut open_tags: &mut Vec) -> String { + let mut ret = String::new(); + + for element in elements.iter() { + // Append the element to the final String + let next = element.to_minecraft(&mut open_tags); + ret.push_str(&next); + + // Check if we need to add any open tags + if !open_tags.is_empty() && ret.ends_with(RESET_TAG) { + open_tags.pop(); + ret.push_str(&open_tags.concat()); + } + } + + ret +} + +#[cfg(test)] +mod tests { + use super::parse; + use super::to_minecraft_format; + + #[test] + fn formats_regular_text() { + let input = "test"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "test"); + } + + #[test] + fn formats_blockquotes() { + let input = "> blockquote"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "> blockquote"); + + let input = "> *blockquote*"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "> §oblockquote§r"); + } + + #[test] + fn handles_emphasis_inner_strong() { + let input = "*em **strong***"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "§oem §lstrong§r§o§r"); + } + + #[test] + fn handles_strong_inner_emphasis() { + let input = "**strong *em***"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "§lstrong §oem§r§l§r"); + } + + #[test] + fn handles_strong_inner_emphasis2() { + let input = "***em* strong**"; + let md = parse(input); + println!("Blocks: {:?}", md); + assert_eq!(to_minecraft_format(&md), "§l§oem§r§l strong§r"); + } + + #[test] + fn handles_emphasis_inner_underline() { + let input = "___underline__ em_"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "§o§nunderline§r§o em§r"); + } + + #[test] + fn handles_emphasis_inner_underline2() { + let input = "_em __underline___"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "§oem §nunderline§r§o§r"); + } + + #[test] + fn handles_underline_inner_emphasis() { + let input = "__underline _em___"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "§nunderline §oem§r§n§r"); + } + + #[test] + fn handles_underline_inner_strikethrough() { + let input = "__underline ~~strikethrough~~__"; + let md = parse(input); + assert_eq!( + to_minecraft_format(&md), + "§nunderline §mstrikethrough§r§n§r" + ); + } + + #[test] + fn handles_underline_inner_strikethrough2() { + let input = "__~~strikethrough~~ underline__"; + let md = parse(input); + assert_eq!( + to_minecraft_format(&md), + "§n§mstrikethrough§r§n underline§r" + ); + } + + #[test] + fn handles_strikethrough_inner_underline() { + let input = "~~strikethrough __underline__~~"; + let md = parse(input); + assert_eq!( + to_minecraft_format(&md), + "§mstrikethrough §nunderline§r§m§r" + ); + } + + #[test] + fn handles_strikethrough_inner_underline2() { + let input = "~~__underline__ strikethrough~~"; + let md = parse(input); + assert_eq!( + to_minecraft_format(&md), + "§m§nunderline§r§m strikethrough§r" + ); + } + + #[test] + fn escapes_characters() { + let input = "\\*test\\*"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "*test*"); + + let input = "\\*\\*test\\*\\*"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "**test**"); + + let input = "\\_test\\_"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "_test_"); + + let input = "\\_\\_test\\_\\_"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "__test__"); + + let input = "\\~\\~test\\~\\~"; + let md = parse(input); + assert_eq!(to_minecraft_format(&md), "~~test~~"); + } +} diff --git a/src/markdown/span/emphasis.rs b/src/markdown/span/emphasis.rs new file mode 100644 index 0000000..0674943 --- /dev/null +++ b/src/markdown/span/emphasis.rs @@ -0,0 +1,110 @@ +use crate::markdown::span::parse_spans; +use crate::markdown::Span; +use crate::markdown::Span::Emphasis; +use fancy_regex::Regex; + +/// Parses any emphasis (italic) markdown tags in the given text. +/// +/// # Regex +/// +/// This thing uses a monster regex from the `simple-markdown` Github project +/// found [here](https://github.com/Khan/simple-markdown/blob/master/src/index.js#L1607): +/// +/// ```js +/// match: inlineRegex( +/// new RegExp( +/// // only match _s surrounding words. +/// "^\\b_" + +/// "((?:__|\\\\[\\s\\S]|[^\\\\_])+?)_" + +/// "\\b" + +/// // Or match *s: +/// "|" + +/// // Only match *s that are followed by a non-space: +/// "^\\*(?=\\S)(" + +/// // Match at least one of: +/// "(?:" + +/// // - `**`: so that bolds inside italics don't close the +/// // italics +/// "\\*\\*|" + +/// // - escape sequence: so escaped *s don't close us +/// "\\\\[\\s\\S]|" + +/// // - whitespace: followed by a non-* (we don't +/// // want ' *' to close an italics--it might +/// // start a list) +/// "\\s+(?:\\\\[\\s\\S]|[^\\s\\*\\\\]|\\*\\*)|" + +/// // - non-whitespace, non-*, non-backslash characters +/// "[^\\s\\*\\\\]" + +/// ")+?" + +/// // followed by a non-space, non-* then * +/// ")\\*(?!\\*)" +/// ) +/// ) +/// ``` +pub fn parse_emphasis(text: &str) -> Option<(Span, usize)> { + // Slight hack so I don't have to spend any more time + // in Regex Hell. + if text.starts_with("***") { + return None; + } + + lazy_static! { + static ref EMPHASIS: Regex = Regex::new(r"^\b_((?:__|\\[\s\S]|[^\\_])+?)_\b|^\*(?=\S)((?:\*\*|\\[\s\S]|\s+(?:\\[\s\S]|[^\s\*\\]|\*\*)|[^\s\*\\])+?)\*(?!\*)").unwrap(); + } + + let mut span = None; + if let Ok(Some(captures)) = EMPHASIS.captures(text) { + // Look for one form of emphasis tag, then look for the other if + // that isn't found. + let t = match captures.get(2) { + Some(m) => m.as_str(), + None => match captures.get(1) { + Some(m) => m.as_str(), + None => panic!("emphasis regex found matches, but couldn't get capture groups"), + }, + }; + + span = Some((Emphasis(parse_spans(t)), t.len() + 2)); + } + + span +} + +#[cfg(test)] +mod tests { + use super::parse_emphasis; + use super::Span::{Emphasis, Text}; + + #[test] + fn finds_emphasis() { + assert_eq!( + parse_emphasis("*this is an* italic string"), + Some((Emphasis(vec![Text("this is an".to_owned())]), 12)) + ); + + assert_eq!( + parse_emphasis("*testing* italic* string"), + Some((Emphasis(vec![Text("testing".to_owned())]), 9)) + ); + + assert_eq!( + parse_emphasis("_this is also_ an italic string"), + Some((Emphasis(vec![Text("this is also".to_owned())]), 14)) + ); + } + + #[test] + fn no_false_positives() { + assert_eq!(parse_emphasis("* testing string"), None); + assert_eq!(parse_emphasis("** testing string"), None); + + assert_eq!(parse_emphasis("_ testing string"), None); + assert_eq!(parse_emphasis("__ testing string"), None); + } + + #[test] + fn no_early_matching() { + assert_eq!(parse_emphasis("test *test* test"), None); + + assert_eq!(parse_emphasis("test _test_ test"), None); + } +} diff --git a/src/markdown/span/mod.rs b/src/markdown/span/mod.rs new file mode 100644 index 0000000..5282578 --- /dev/null +++ b/src/markdown/span/mod.rs @@ -0,0 +1,87 @@ +use super::Span; +use super::Span::{Literal, Text}; + +mod emphasis; +mod strikethrough; +mod strong; +mod underline; + +use self::emphasis::parse_emphasis; +use self::strikethrough::parse_strikethrough; +use self::strong::parse_strong; +use self::underline::parse_underline; + +/// Parses a piece of text for span-type elements, returning the whole thing in +/// a Vector tree. +pub fn parse_spans(content: &str) -> Vec { + let mut tokens = vec![]; + let mut t = String::new(); + let mut index = 0; + + while index < content.len() { + match parse_span(&content[index..content.len()]) { + // Found a span element + Some((span, consumed)) => { + if !t.is_empty() { + // This token is on the far left, so trim left whitespace + if tokens.is_empty() { + t = t.trim_start().to_owned() + } + // Put the text for this element inside the span + tokens.push(Text(t)); + } + + tokens.push(span); + t = String::new(); + index += consumed; + } + // No span elements found, so push the rest of the content + None => { + let mut end = index + 1; + while !content.is_char_boundary(end) { + end += 1; + } + + t.push_str(&content[index..end]); + index += end - index; + } + } + } + + if !t.is_empty() { + // Trim whitespaces + if tokens.is_empty() { + t = t.trim_start().to_owned() + } + t = t.trim_end().to_owned(); + + tokens.push(Text(t)); + } + + tokens +} + +fn parse_escape(content: &str) -> Option<(Span, usize)> { + let mut chars = content.chars(); + if let Some('\\') = chars.next() { + return match chars.next() { + Some(x @ '\\') | Some(x @ '`') | Some(x @ '*') | Some(x @ '_') | Some(x @ '~') => { + Some((Literal(x), 2)) + } + _ => None, + }; + } + + None +} + +fn parse_span(content: &str) -> Option<(Span, usize)> { + pipe_opt!( + content + => parse_escape + => parse_emphasis + => parse_underline + => parse_strong + => parse_strikethrough + ) +} diff --git a/src/markdown/span/strikethrough.rs b/src/markdown/span/strikethrough.rs new file mode 100644 index 0000000..f126510 --- /dev/null +++ b/src/markdown/span/strikethrough.rs @@ -0,0 +1,49 @@ +use crate::markdown::span::parse_spans; +use crate::markdown::Span; +use crate::markdown::Span::Strikethrough; +use fancy_regex::Regex; + +/// Parses any strikethrough markdown tags in the given text. +pub fn parse_strikethrough(text: &str) -> Option<(Span, usize)> { + lazy_static! { + static ref STRIKETHROUGH: Regex = Regex::new(r"^~~(?P.+?)~~").unwrap(); + } + + let mut span = None; + if let Ok(Some(captures)) = STRIKETHROUGH.captures(text) { + let t = captures.name("text").expect("no named capture").as_str(); + span = Some((Strikethrough(parse_spans(t)), t.len() + 4)); + } + + span +} + +#[cfg(test)] +mod tests { + use super::parse_strikethrough; + use super::Span::{Strikethrough, Text}; + + #[test] + fn finds_strikethrough() { + assert_eq!( + parse_strikethrough("~~this is a~~ strong string"), + Some((Strikethrough(vec![Text("this is a".to_owned())]), 13)) + ); + + assert_eq!( + parse_strikethrough("~~testing~~ strong~~ string"), + Some((Strikethrough(vec![Text("testing".to_owned())]), 11)) + ); + } + + #[test] + fn no_false_positives() { + assert_eq!(parse_strikethrough("~~ testing string"), None); + assert_eq!(parse_strikethrough("~~~~ testing string"), None); + } + + #[test] + fn no_early_matching() { + assert_eq!(parse_strikethrough("test ~~test~~ test"), None); + } +} diff --git a/src/markdown/span/strong.rs b/src/markdown/span/strong.rs new file mode 100644 index 0000000..56302f9 --- /dev/null +++ b/src/markdown/span/strong.rs @@ -0,0 +1,49 @@ +use crate::markdown::span::parse_spans; +use crate::markdown::Span; +use crate::markdown::Span::Strong; +use fancy_regex::Regex; + +/// Parses any strong (bold) markdown tags in the given text. +pub fn parse_strong(text: &str) -> Option<(Span, usize)> { + lazy_static! { + static ref STRONG: Regex = Regex::new(r"^\*\*(?P.+?)\*\*(?!\*)").unwrap(); + } + + let mut span = None; + if let Ok(Some(captures)) = STRONG.captures(text) { + let t = captures.name("text").expect("no named capture").as_str(); + span = Some((Strong(parse_spans(t)), t.len() + 4)); + } + + span +} + +#[cfg(test)] +mod tests { + use super::parse_strong; + use super::Span::{Strong, Text}; + + #[test] + fn finds_strong() { + assert_eq!( + parse_strong("**this is a** strong string"), + Some((Strong(vec![Text("this is a".to_owned())]), 13)) + ); + + assert_eq!( + parse_strong("**testing** strong** string"), + Some((Strong(vec![Text("testing".to_owned())]), 11)) + ); + } + + #[test] + fn no_false_positives() { + assert_eq!(parse_strong("** testing string"), None); + assert_eq!(parse_strong("**** testing string"), None); + } + + #[test] + fn no_early_matching() { + assert_eq!(parse_strong("test **test** test"), None); + } +} diff --git a/src/markdown/span/underline.rs b/src/markdown/span/underline.rs new file mode 100644 index 0000000..57237eb --- /dev/null +++ b/src/markdown/span/underline.rs @@ -0,0 +1,50 @@ +use crate::markdown::span::parse_spans; +use crate::markdown::Span; +use crate::markdown::Span::Underline; +use fancy_regex::Regex; + +/// Parses any underline markdown tags in the given text. +pub fn parse_underline(text: &str) -> Option<(Span, usize)> { + lazy_static! { + static ref UNDERLINE: Regex = Regex::new(r"^__(?P.+?)__(?!_)").unwrap(); + } + + let mut span = None; + + if let Ok(Some(captures)) = UNDERLINE.captures(text) { + let t = captures.name("text").expect("no named capture").as_str(); + span = Some((Underline(parse_spans(t)), t.len() + 4)); + } + + span +} + +#[cfg(test)] +mod tests { + use super::parse_underline; + use super::Span::{Text, Underline}; + + #[test] + fn finds_underline() { + assert_eq!( + parse_underline("__this is an__ underlined string"), + Some((Underline(vec![Text("this is an".to_owned())]), 14)) + ); + + assert_eq!( + parse_underline("__testing__ underlined__ strings"), + Some((Underline(vec![Text("testing".to_owned())]), 11)) + ); + } + + #[test] + fn no_false_positives() { + assert_eq!(parse_underline("__ testing string"), None); + assert_eq!(parse_underline("____ testing string"), None); + } + + #[test] + fn no_early_matching() { + assert_eq!(parse_underline("test __test__ test"), None); + } +}