diff --git a/Cargo.toml b/Cargo.toml
index 7c0c620..24e4db7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,7 @@
[profile.release]
-# debug = true
+debug = false
lto = "thin"
+opt-level = "s"
[workspace]
diff --git a/editor-support/vscode/syntaxes/lopez-crawl-directive.rion b/editor-support/vscode/syntaxes/lopez-crawl-directive.rion
index aa13d95..86f80ba 100644
--- a/editor-support/vscode/syntaxes/lopez-crawl-directive.rion
+++ b/editor-support/vscode/syntaxes/lopez-crawl-directive.rion
@@ -263,24 +263,32 @@ keywords : context {
ruleset_namespace : context {
: pattern {
- regex \= (first|collect|count|sum)
+ regex \= (first|collect|distinct|count|sum|group)
styles[] = .ruleset_aggregator;
}
: pattern {
- regex \= (name|text|html|inner-html|attr)
+ regex \= (name|text|html|inner-html|attrs?|classes|id|parent|children|select-any|select-all)
styles[] = .ruleset_extractor;
}
: pattern {
- regex \= (is-null|is-not-null|length|hash|get|flatten|each|capture|all-captures)
+ regex \= (is-null|is-not-null|hash|not|as-number|greater-than|lesser-than|equals|length|is-empty|get|flatten|each|filter|pretty|capture|all-captures|matches|replace|with)
styles[] = .ruleset_transformer;
}
+ : pattern {
+ regex \= (\!explode)
+ styles[] = .keyword;
+ }
}
numeric : context {
: pattern {
- regex \= (\b\d+)
+ regex \= (\b[0-9_]+e?[+-]\d+)
styles [] = .numeric;
}
+ : pattern {
+ regex \= (true|false)
+ styles[] = .keyword;
+ }
}
}
diff --git a/editor-support/vscode/syntaxes/lopez-crawl-directives.tmLanguage.plist b/editor-support/vscode/syntaxes/lopez-crawl-directives.tmLanguage.plist
index ea7ba91..623fe07 100644
--- a/editor-support/vscode/syntaxes/lopez-crawl-directives.tmLanguage.plist
+++ b/editor-support/vscode/syntaxes/lopez-crawl-directives.tmLanguage.plist
@@ -268,10 +268,6 @@
include
#string
-
- include
- #numeric
-
main__4
@@ -286,10 +282,16 @@
match
- (\b\d+)
+ (\b[0-9_]+e?[\x{002b}-]\d+)
name
constant.numeric.lcd
+
+ match
+ (true|false)
+ name
+ keyword.lcd
+
punctuation
@@ -318,11 +320,11 @@
match
(name|text|html|inner-html|attrs?|classes|id|parent|children|select-any|select-all)
name
- variable.lcd
+ entity.name.function.lcd
match
- (is-null|is-not-null|hash|not|as-number|greater-than|lesser-than|equals|length|get|flatten|each|filter|pretty|capture|all-captures|replace)
+ (is-null|is-not-null|hash|not|as-number|greater-than|lesser-than|equals|length|is-empty|get|flatten|each|filter|pretty|capture|all-captures|matches|replace|with)
name
entity.name.function.lcd
diff --git a/entalator/src/main.rs b/entalator/src/main.rs
index 77b5f3e..a28a28b 100644
--- a/entalator/src/main.rs
+++ b/entalator/src/main.rs
@@ -6,15 +6,18 @@ use std::{env, fs, io};
const LOPEZ_BIN: &[u8] = include_bytes!("../../target/release/lopez");
const LOPEZ_LIB: Dir = include_dir::include_dir!("../std-lopez");
+const LIB_PATH: &str = "/usr/share/lopez/lib";
+const BIN_PATH: &str = "/usr/local/bin/lopez";
+
fn install() -> io::Result<()> {
- let lib_path: PathBuf = "/usr/share/lopez/lib".parse().expect("infallible");
- println!("Installing `lopez` to `/usr/local/bin`");
+ println!("Installing `lopez` to `{}`", BIN_PATH);
- fs::write("/usr/local/bin/lopez", LOPEZ_BIN)?;
- fs::set_permissions("/usr/local/bin/lopez", fs::Permissions::from_mode(0o711))?;
+ fs::write(BIN_PATH, LOPEZ_BIN)?;
+ fs::set_permissions(BIN_PATH, fs::Permissions::from_mode(0o711))?;
- println!("Installing `std-lopez` to `usr/share/lopez`");
+ let lib_path: PathBuf = LIB_PATH.parse().expect("infallible");
+ println!("Installing `std-lopez` to `{}`", LIB_PATH);
println!("Creating folder structure");
diff --git a/lib-lopez/src/crawler/counter.rs b/lib-lopez/src/crawler/counter.rs
index d249450..5d38fa6 100644
--- a/lib-lopez/src/crawler/counter.rs
+++ b/lib-lopez/src/crawler/counter.rs
@@ -123,7 +123,8 @@ impl StatsTracker {
self.quota as usize,
),
hit_rate: Human(
- (self.already_done + self.counter.n_closed() - self.counter.n_error()
+ (self.already_done + self.counter.n_closed()
+ - self.counter.n_error()
- self
.last
.as_ref()
@@ -216,3 +217,28 @@ impl Display for Stats {
Ok(())
}
}
+
+struct Smoother {
+ last_state: f64,
+ last_variance: f64,
+ state_variance: f64,
+ output_variance: f64,
+}
+
+fn par(a: f64, b: f64) -> f64 {
+ a * b / (a + b)
+}
+
+impl Smoother {
+ fn smooth(&mut self, input: f64) -> f64 {
+ let variance = self.last_variance + self.state_variance;
+ let new_state = self.last_state
+ + variance / (variance + self.output_variance) * (input - self.last_state);
+ let new_variance = par(variance, self.output_variance);
+
+ self.last_state = new_state;
+ self.last_variance = new_variance;
+
+ new_state
+ }
+}
diff --git a/lib-lopez/src/crawler/worker.rs b/lib-lopez/src/crawler/worker.rs
index 2038700..e0dbbc7 100644
--- a/lib-lopez/src/crawler/worker.rs
+++ b/lib-lopez/src/crawler/worker.rs
@@ -397,7 +397,7 @@ impl CrawlWorker {
.ensure_error(page_url)
.await
.map_err(|err| err.into())?;
-
+
// This needs to be the last thing (because of `?`).
self.task_counter.register_error();
}
@@ -407,7 +407,7 @@ impl CrawlWorker {
.ensure_error(page_url)
.await
.map_err(|err| err.into())?;
-
+
// This needs to be the last thing (because of `?`).
self.task_counter.register_error();
}
@@ -480,7 +480,7 @@ impl CrawlWorker {
// Register close, no matter the status.
worker_ref.task_counter.register_closed();
-
+
// Now, analyze results:
if let Err(error) = result {
worker_ref.task_counter.register_error();
diff --git a/lib-lopez/src/directives/mod.rs b/lib-lopez/src/directives/mod.rs
index ae88611..76af3ec 100644
--- a/lib-lopez/src/directives/mod.rs
+++ b/lib-lopez/src/directives/mod.rs
@@ -1,6 +1,7 @@
mod aggregator;
mod extractor;
mod parse;
+mod parse_utils;
mod transformer;
mod value_ext;
mod variable;
@@ -49,13 +50,18 @@ fn load_items_from<'a, P: AsRef>(
module_name: &str,
paths: &'a [P],
) -> Result<(&'a P, Vec- ), String> {
+ let formatted_module_name = if module_name.is_empty() {
+ ""
+ } else {
+ module_name
+ };
+
let (path, module_str) = read_from_many(paths)
- .map_err(|err| format!("could not open module `{}`: {}", module_name, err))?;
+ .map_err(|err| format!("could not open module `{}`: {}", formatted_module_name, err))?;
let module = parse::entrypoint(&module_str)
- .map_err(|err| format!("failed to parse `{}`: {}", module_name, err))?
- .1
- .map_err(|err| format!("failed to interpret `{}`: {}", module_name, err))?;
+ .map_err(|err| format!("failed to parse `{}`: {}", formatted_module_name, err))?
+ .map_err(|err| format!("failed to interpret `{}`: {}", formatted_module_name, err))?;
Ok((path, module))
}
@@ -295,20 +301,20 @@ impl Directives {
let duplicates = self.find_duplicate_rules();
if !duplicates.is_empty() {
issues.push(format!(
- "There are duplicated rules in directives: \n\t- {}",
- duplicates.into_iter().collect::>().join("\n\t- ")
+ "There are duplicated rules in directives: \n {}",
+ duplicates.into_iter().collect::>().join("\n ")
));
}
let invalid_seeds = self.find_invalid_seeds();
if !invalid_seeds.is_empty() {
issues.push(format!(
- "There are seeds on the frontier or outside your boundaries: \n\t- {}",
+ "There are seeds on the frontier or outside your boundaries: \n {}",
invalid_seeds
.into_iter()
.map(|url| url.as_str().to_owned())
.collect::>()
- .join("\n\nt- ")
+ .join("\n ")
));
}
@@ -316,8 +322,8 @@ impl Directives {
if !invalid.is_empty() {
issues.push(format!(
"There are invalid set-variable definitions \
- (these name are not known): \n\t- {}",
- invalid.into_iter().collect::>().join("\n\t- "),
+ (these name are not known): \n {}",
+ invalid.into_iter().collect::>().join("\n "),
));
}
@@ -325,40 +331,43 @@ impl Directives {
if !duplicates.is_empty() {
issues.push(format!(
"There are duplicate set-variable definitions \
- (these definitions are global): \n\t- {}",
- duplicates.into_iter().collect::>().join("\n\t- "),
+ (these definitions are global): \n {}",
+ duplicates.into_iter().collect::>().join("\n "),
));
}
let bad_values = self.find_bad_set_variable_values();
if !bad_values.is_empty() {
issues.push(format!(
- "There are bad values for set-variables: \n\t- {}",
+ "There are bad values for set-variables: \n {}",
bad_values
.into_iter()
.map(|err| err.to_string())
.collect::>()
- .join("\n\nt- "),
+ .join("\n "),
))
}
let type_errors = self.find_type_errors();
if !type_errors.is_empty() {
issues.push(format!(
- "There are type errors for these rules: \n\t- {}",
+ "There are type errors for these rules: \n {}",
type_errors
.into_iter()
.map(|(name, err)| format!("{}: {}", name, err))
.collect::>()
- .join("\n\t- ")
+ .join("\n ")
))
}
if !issues.is_empty() {
- return Err(issues.join("\n"));
+ Err(format!(
+ "There are issues with your configuration: \n{}",
+ issues.join("\n")
+ ))
+ } else {
+ Ok(())
}
-
- Ok(())
}
/// Loads directives from a given file while also loading all dependencies.
diff --git a/lib-lopez/src/directives/parse.rs b/lib-lopez/src/directives/parse.rs
index 954c245..50414b7 100644
--- a/lib-lopez/src/directives/parse.rs
+++ b/lib-lopez/src/directives/parse.rs
@@ -14,6 +14,7 @@ use std::str::FromStr;
use url::Url;
use super::*;
+use super::parse_utils::ParseError;
/// Defines end of file (lol!):
fn eof(i: &str) -> IResult<&str, ()> {
@@ -839,6 +840,8 @@ fn boundary_test() {
fn literal(i: &str) -> IResult<&str, Value> {
alt((
map(escaped_string, Value::String),
+ map(tag("true"), |_| true.into()),
+ map(tag("false"), |_| false.into()),
map_res(tuple((digit1, not(tag(".")))), |(number, _): (&str, ())| {
number.parse::().map(|num| num.into())
}),
@@ -967,11 +970,11 @@ fn item_test() {
// ));
}
-pub fn entrypoint(i: &str) -> IResult<&str, Result, String>> {
- all_consuming(map(
+pub fn entrypoint(i: &str) -> Result, String>, ParseError> {
+ ParseError::map_iresult(i, all_consuming(map(
tuple((whitespace, many0(trailing_whitespace(item)))),
|(_, results)| results.into_iter().collect::, _>>(),
- ))(i)
+ ))(i))
}
#[test]
@@ -980,6 +983,5 @@ fn entrypoint_test() {
"select * { } set foo = \"bar\"; allow \"foo\";\n"
))
.unwrap()
- .1
.unwrap();
}
diff --git a/lib-lopez/src/directives/parse_utils.rs b/lib-lopez/src/directives/parse_utils.rs
new file mode 100644
index 0000000..854affe
--- /dev/null
+++ b/lib-lopez/src/directives/parse_utils.rs
@@ -0,0 +1,69 @@
+use nom::error::ErrorKind;
+use nom::IResult;
+use std::fmt;
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct Position {
+ line: usize,
+ column: usize,
+}
+
+impl fmt::Display for Position {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "line {}, column {}", self.line + 1, self.column + 1)
+ }
+}
+
+impl Position {
+ fn of(text: &str, fragment: &str) -> Position {
+ let fragment_pos = text.len() - fragment.len();
+ let mut line = 0;
+ let mut column = 0;
+
+ for ch in text[..fragment_pos].chars() {
+ if ch == '\n' {
+ line += 1;
+ column = 0;
+ } else if ch != '\r' {
+ column += 1;
+ }
+ }
+
+ Position { line, column }
+ }
+}
+
+#[derive(Debug)]
+pub struct ParseError {
+ position: Position,
+ hint: String,
+ message: String,
+}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "at {} ({:?}): {}", self.position, self.hint, self.message)
+ }
+}
+
+impl ParseError {
+ pub fn new(text: &str, err: nom::Err<(&str, ErrorKind)>) -> ParseError {
+ match err {
+ nom::Err::Error((fragment, error_kind)) | nom::Err::Failure((fragment, error_kind)) => {
+ ParseError {
+ position: Position::of(text, fragment),
+ hint: fragment.lines().map(str::to_owned).next().unwrap_or_default().chars().take(10).collect::() + "...",
+ message: error_kind.description().to_owned(),
+ }
+ }
+ nom::Err::Incomplete(_) => panic!("incomplete variant no accepted"),
+ }
+ }
+
+ pub fn map_iresult(text: &str, iresult: IResult<&str, T>) -> Result {
+ match iresult {
+ Ok((_left_over, result)) => Ok(result),
+ Err(err) => Err(ParseError::new(text, err)),
+ }
+ }
+}
diff --git a/lib-lopez/src/error.rs b/lib-lopez/src/error.rs
index 7b3dd56..97f011e 100644
--- a/lib-lopez/src/error.rs
+++ b/lib-lopez/src/error.rs
@@ -19,7 +19,7 @@ pub enum Error {
UnknownContentEncoding(String),
#[fail(display = "timed out")]
Timeout,
- #[fail(display = "bad set-variable value for {}: {:?}", _0, _1)]
+ #[fail(display = "bad set-variable value for {}: {}", _0, _1)]
BadSetVariableValue(crate::directives::Variable, serde_json::Value),
#[fail(display = "type error: no type for `{}` of `{}`", _0, _1)]
TypeError(String, crate::directives::Type),
diff --git a/lib-lopez/src/lib.rs b/lib-lopez/src/lib.rs
index a97f330..69ef966 100644
--- a/lib-lopez/src/lib.rs
+++ b/lib-lopez/src/lib.rs
@@ -55,29 +55,41 @@ macro_rules! main {
$crate::cli_impl!($backend_ty);
#[tokio::main(basic_scheduler)]
- async fn main() -> Result<(), $crate::Error> {
+ pub async fn main() -> Result<(), $crate::Error> {
+ use $crate::ansi_term::Color::{Green, Red};
+
+ match run().await {
+ Ok(Some(msg)) => println!("{}: {}", Green.bold().paint("ok"), msg),
+ Ok(None) => {}
+ Err(err) => println!("{}: {}", Red.bold().paint("error"), err),
+ }
+
+ Ok(())
+ }
+
+ async fn run() -> Result