From 8df238c20dd73e1cfefc6f71bca62fb2cc5fadd9 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 12:33:49 +1000 Subject: [PATCH 01/27] Stub module for parsing --- src/main.rs | 4 +++- src/parsing/mod.rs | 8 ++++++++ src/parsing/parser.rs | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 src/parsing/mod.rs create mode 100644 src/parsing/parser.rs diff --git a/src/main.rs b/src/main.rs index 8a81de0..52c5cde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use tracing::debug; use tracing_subscriber; mod rendering; +mod parsing; fn main() { const VERSION: &str = concat!("v", env!("CARGO_PKG_VERSION")); @@ -89,10 +90,11 @@ fn main() { let filename = submatches .get_one::("filename") - .unwrap(); // argument are required by definitin so always present + .unwrap(); // argument are required by definition so always present debug!(filename); + parsing::load(&Path::new(filename)); todo!(); } Some(("format", submatches)) => { diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..30cadc0 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,8 @@ +// parser for the Technique language +use std::path::Path; + +pub mod parser; + +pub fn load(source: &Path) { + +} diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..5675879 --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,4 @@ +// parsing machinery + +pub fn load() { +} From 48a0ae9a11fe2b22c920aa20d699c0bffd919431 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 13:20:46 +1000 Subject: [PATCH 02/27] Add pest dependency --- Cargo.lock | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + 2 files changed, 146 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d3f609d..e5500bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -97,6 +106,35 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "errno" version = "0.3.9" @@ -107,6 +145,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -171,6 +219,51 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "pest" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c73c26c01b8c87956cea613c907c9d6ecffd8d18a2a5908e5de0adfaa185cea" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "664d22978e2815783adbdd2c588b455b1bd625299ce36b2a99881ac9627e6d8d" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d5487022d5d33f4c30d91c22afa240ce2a644e87fe08caad974d4eab6badbe" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0091754bbd0ea592c4deb3a122ce8ecbb0753b738aa82bc055fcc2eccc8d8174" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -246,6 +339,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -283,6 +387,8 @@ name = "technique" version = "0.3.0" dependencies = [ "clap", + "pest", + "pest_derive", "serde", "tinytemplate", "tracing", @@ -299,6 +405,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -376,6 +502,18 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -394,6 +532,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 75da6fc..86ded5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,8 @@ license = "MIT" [dependencies] clap = { version = "4.5.16", features = [ "wrap_help" ] } +pest = "2.7.11" +pest_derive = "2.7.11" serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" From 1e440ea01c771661642976a1e9530145e82b658f Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 13:21:43 +1000 Subject: [PATCH 03/27] Grammar for procedure declaration --- src/parsing/parser.rs | 34 +++++++++++++++++++++++++++++++++- technique.pest | 15 +++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 technique.pest diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5675879..2fb9ca2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,4 +1,36 @@ // parsing machinery -pub fn load() { +use pest::Parser; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "../technique.pest"] +struct TechniqueParser; + +pub fn load() {} + +#[cfg(test)] +mod tests { + use super::*; // Import all parent module items + + #[test] + fn check_procedure_declaration() { + let input = "making_coffee : Beans -> Coffee"; + + let declaration = TechniqueParser::parse(Rule::declaration, &input) + .expect("Unsuccessful Parse") + .next() + .unwrap(); + + assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); + assert_eq!(declaration.as_rule(), Rule::declaration); + + let identifier = declaration + .into_inner() + .next() + .unwrap(); + + assert_eq!(identifier.as_str(), "making_coffee"); + assert_eq!(identifier.as_rule(), Rule::identifier); + } } diff --git a/technique.pest b/technique.pest new file mode 100644 index 0000000..acfc4a4 --- /dev/null +++ b/technique.pest @@ -0,0 +1,15 @@ +// Parsing Expression Grammar for v1 of the Technique Procedure Language + +WHITESPACE = _{ " " | "\t" } + +declaration = { identifier ~ ":" ~ signature? } + +identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } + +signature = { type ~ "->" ~ type } + +type = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } + + + + From 5fb162a0630ddd6d4eec89cf6089ee1811534ecf Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 14:45:17 +1000 Subject: [PATCH 04/27] Type is a reserved word so call types typas --- src/parsing/parser.rs | 24 ++++++++++++++++++++++-- technique.pest | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 2fb9ca2..31e1ecf 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -25,12 +25,32 @@ mod tests { assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); assert_eq!(declaration.as_rule(), Rule::declaration); - let identifier = declaration - .into_inner() + let mut pairs = declaration.into_inner(); + + let identifier = pairs .next() .unwrap(); assert_eq!(identifier.as_str(), "making_coffee"); assert_eq!(identifier.as_rule(), Rule::identifier); + + let signature = pairs + .next() + .unwrap(); + + assert_eq!(signature.as_str(), "Beans -> Coffee"); + assert_eq!(signature.as_rule(), Rule::signature); + + let mut pairs = signature.into_inner(); + + let domain = pairs.next().unwrap(); + + assert_eq!(domain.as_str(), "Beans"); + assert_eq!(domain.as_rule(), Rule::typa); + + let range = pairs.next().unwrap(); + + assert_eq!(range.as_str(), "Coffee"); + assert_eq!(range.as_rule(), Rule::typa); } } diff --git a/technique.pest b/technique.pest index acfc4a4..6468854 100644 --- a/technique.pest +++ b/technique.pest @@ -6,9 +6,9 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { type ~ "->" ~ type } +signature = { typa ~ "->" ~ typa } -type = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } +typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From 163c33bcc83d2e2c878d6c254a490224706f48ef Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 16:31:12 +1000 Subject: [PATCH 05/27] Handle multiple types in domain --- src/parsing/parser.rs | 28 +++++++++++++++++++++------- technique.pest | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 31e1ecf..dc78b5c 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -15,14 +15,17 @@ mod tests { #[test] fn check_procedure_declaration() { - let input = "making_coffee : Beans -> Coffee"; + let input = "making_coffee : Beans, Milk -> Coffee"; let declaration = TechniqueParser::parse(Rule::declaration, &input) .expect("Unsuccessful Parse") .next() .unwrap(); - assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); + assert_eq!( + declaration.as_str(), + "making_coffee : Beans, Milk -> Coffee" + ); assert_eq!(declaration.as_rule(), Rule::declaration); let mut pairs = declaration.into_inner(); @@ -38,17 +41,28 @@ mod tests { .next() .unwrap(); - assert_eq!(signature.as_str(), "Beans -> Coffee"); + assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); assert_eq!(signature.as_rule(), Rule::signature); let mut pairs = signature.into_inner(); - let domain = pairs.next().unwrap(); + let domain1 = pairs + .next() + .unwrap(); + + assert_eq!(domain1.as_str(), "Beans"); + assert_eq!(domain1.as_rule(), Rule::typa); + + let domain2 = pairs + .next() + .unwrap(); - assert_eq!(domain.as_str(), "Beans"); - assert_eq!(domain.as_rule(), Rule::typa); + assert_eq!(domain2.as_str(), "Milk"); + assert_eq!(domain2.as_rule(), Rule::typa); - let range = pairs.next().unwrap(); + let range = pairs + .next() + .unwrap(); assert_eq!(range.as_str(), "Coffee"); assert_eq!(range.as_rule(), Rule::typa); diff --git a/technique.pest b/technique.pest index 6468854..ee2df97 100644 --- a/technique.pest +++ b/technique.pest @@ -6,7 +6,7 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { typa ~ "->" ~ typa } +signature = { typa ~ ("," ~ typa)* ~ "->" ~ typa } typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From 024a6d0584b722ec0caf3c188dd53cbe4e84a2bb Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 16:44:04 +1000 Subject: [PATCH 06/27] Add parser test using macro --- src/parsing/parser.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index dc78b5c..2e47192 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,6 +1,6 @@ // parsing machinery -use pest::Parser; +use pest::{consumes_to, parses_to, Parser}; use pest_derive::Parser; #[derive(Parser)] @@ -14,7 +14,7 @@ mod tests { use super::*; // Import all parent module items #[test] - fn check_procedure_declaration() { + fn check_procedure_declaration_explicit() { let input = "making_coffee : Beans, Milk -> Coffee"; let declaration = TechniqueParser::parse(Rule::declaration, &input) @@ -67,4 +67,23 @@ mod tests { assert_eq!(range.as_str(), "Coffee"); assert_eq!(range.as_rule(), Rule::typa); } + + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + typa(16, 21), + typa(23, 27), + typa(31, 37) + ]) + ]) + ] + }; + } } From 31b2873cc2f5c7ad88ab2e787a349ee41b75e63e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:35:57 +1000 Subject: [PATCH 07/27] Rename type again, to forma --- src/parsing/parser.rs | 12 ++++++------ technique.pest | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 2e47192..3e44b85 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -51,21 +51,21 @@ mod tests { .unwrap(); assert_eq!(domain1.as_str(), "Beans"); - assert_eq!(domain1.as_rule(), Rule::typa); + assert_eq!(domain1.as_rule(), Rule::forma); let domain2 = pairs .next() .unwrap(); assert_eq!(domain2.as_str(), "Milk"); - assert_eq!(domain2.as_rule(), Rule::typa); + assert_eq!(domain2.as_rule(), Rule::forma); let range = pairs .next() .unwrap(); assert_eq!(range.as_str(), "Coffee"); - assert_eq!(range.as_rule(), Rule::typa); + assert_eq!(range.as_rule(), Rule::forma); } #[test] @@ -78,9 +78,9 @@ mod tests { declaration(0, 37, [ identifier(0, 13), signature(16, 37, [ - typa(16, 21), - typa(23, 27), - typa(31, 37) + forma(16, 21), + forma(23, 27), + forma(31, 37) ]) ]) ] diff --git a/technique.pest b/technique.pest index ee2df97..804c355 100644 --- a/technique.pest +++ b/technique.pest @@ -6,9 +6,9 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { typa ~ ("," ~ typa)* ~ "->" ~ typa } +signature = { forma ~ ("," ~ forma )* ~ "->" ~ forma } -typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } +forma = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From c2ae2e8ab2d072e2a7b323704f466b6e15c12f1f Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:36:49 +1000 Subject: [PATCH 08/27] Pass input file to parser when checking --- src/parsing/mod.rs | 3 +++ src/parsing/parser.rs | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 30cadc0..fbfb83f 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -4,5 +4,8 @@ use std::path::Path; pub mod parser; pub fn load(source: &Path) { + // read source to a str + let content = std::fs::read_to_string(source).expect("Failed to read the source file"); + parser::parse_via_pest(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 3e44b85..c205038 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -7,7 +7,10 @@ use pest_derive::Parser; #[grammar = "../technique.pest"] struct TechniqueParser; -pub fn load() {} +pub fn parse_via_pest(content: &str) { + let technique = TechniqueParser::parse(Rule::technique, &content); + println!("{:?}", technique); +} #[cfg(test)] mod tests { From 35ca52073a228895edfbfc128ae3aeed689cdf38 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:37:07 +1000 Subject: [PATCH 09/27] Grammar for header lines in a Technique file --- technique.pest | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/technique.pest b/technique.pest index 804c355..98f80c7 100644 --- a/technique.pest +++ b/technique.pest @@ -2,7 +2,27 @@ WHITESPACE = _{ " " | "\t" } -declaration = { identifier ~ ":" ~ signature? } +technique = { SOI ~ magic_line ~ spdx_line? ~ NEWLINE* ~ declaration ~ EOI } + +// File Format Header + +magic_line = { "%" ~ "technique" ~ "v1" ~ NEWLINE } + +// License and Copyright Header + +spdx_line = { "!" ~ license ~ ";" ~ copyright ~ NEWLINE } + +license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } + +copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } + +year = @{ ASCII_DIGIT{4} } + +owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | ",")* } + +// Procedure Declaration + +declaration = { identifier ~ ":" ~ signature? ~ NEWLINE } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 9a7abaa0b8022debae06663a3ee2cce7e2292c31 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:40:02 +1000 Subject: [PATCH 10/27] Reconsider place of newlines in grammar --- technique.pest | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/technique.pest b/technique.pest index 98f80c7..8bf62fc 100644 --- a/technique.pest +++ b/technique.pest @@ -2,15 +2,24 @@ WHITESPACE = _{ " " | "\t" } -technique = { SOI ~ magic_line ~ spdx_line? ~ NEWLINE* ~ declaration ~ EOI } +technique = { + SOI + ~ magic_line + ~ NEWLINE + ~ spdx_line? + ~ NEWLINE+ + ~ declaration + ~ NEWLINE+ + ~ EOI +} // File Format Header -magic_line = { "%" ~ "technique" ~ "v1" ~ NEWLINE } +magic_line = { "%" ~ "technique" ~ "v1" } // License and Copyright Header -spdx_line = { "!" ~ license ~ ";" ~ copyright ~ NEWLINE } +spdx_line = { "!" ~ license ~ ";" ~ copyright } license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } @@ -22,7 +31,7 @@ owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" // Procedure Declaration -declaration = { identifier ~ ":" ~ signature? ~ NEWLINE } +declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 2132ef4b9510f91112b5383e5e45c47bd3c86bc8 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 10:27:25 +1000 Subject: [PATCH 11/27] Test copyright string on spdx line --- src/parsing/parser.rs | 40 ++++++++++++++++++++++++++++++++++++++++ technique.pest | 24 ++++++++++++------------ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index c205038..4ceaf9b 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -89,4 +89,44 @@ mod tests { ] }; } + + #[test] + fn check_header_spdx() { + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 21, [ + license(2, 5), + copyright(7, 21, [ + owner(11, 21) + ]) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) 2024 ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 26, [ + license(2, 5), + copyright(7, 26, [ + year(11,15), + owner(16, 26) + ]) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "2024", + rule: Rule::year, + tokens: [ + year(0,4), + ] + }; + } } diff --git a/technique.pest b/technique.pest index 8bf62fc..31cddb0 100644 --- a/technique.pest +++ b/technique.pest @@ -3,14 +3,14 @@ WHITESPACE = _{ " " | "\t" } technique = { - SOI - ~ magic_line - ~ NEWLINE - ~ spdx_line? - ~ NEWLINE+ - ~ declaration - ~ NEWLINE+ - ~ EOI + SOI ~ + magic_line ~ + NEWLINE ~ + spdx_line? ~ + NEWLINE+ ~ + declaration ~ + NEWLINE+ ~ + EOI } // File Format Header @@ -19,15 +19,15 @@ magic_line = { "%" ~ "technique" ~ "v1" } // License and Copyright Header -spdx_line = { "!" ~ license ~ ";" ~ copyright } +spdx_line = { "!" ~ license ~ (";" ~ copyright)? } -license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } +license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "." )* } copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } -year = @{ ASCII_DIGIT{4} } +year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } -owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | ",")* } +owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } // Procedure Declaration From 08d1e040b142a84b22e494ee15ebbf78c670336a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 10:43:08 +1000 Subject: [PATCH 12/27] Grammar for template line in header --- src/parsing/parser.rs | 38 +++++++++++++++++++++++++++++++++++++- technique.pest | 8 ++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 4ceaf9b..3106b0d 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,6 +1,6 @@ // parsing machinery -use pest::{consumes_to, parses_to, Parser}; +use pest::Parser; use pest_derive::Parser; #[derive(Parser)] @@ -14,6 +14,8 @@ pub fn parse_via_pest(content: &str) { #[cfg(test)] mod tests { + use pest::{consumes_to, fails_with, parses_to}; + use super::*; // Import all parent module items #[test] @@ -129,4 +131,38 @@ mod tests { ] }; } + + #[test] + fn check_header_template() { + parses_to! { + parser: TechniqueParser, + input: "& checklist", + rule: Rule::template_line, + tokens: [ + template_line(0, 11, [ + template(2, 11) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "& nasa-flight-plan-v4.0", + rule: Rule::template_line, + tokens: [ + template_line(0, 23, [ + template(2, 23) + ]) + ] + }; + + fails_with! { + parser: TechniqueParser, + input: "&", + rule: Rule::template_line, + positives: [Rule::template], + negatives: [], + pos: 1 + }; + } } diff --git a/technique.pest b/technique.pest index 31cddb0..13504d4 100644 --- a/technique.pest +++ b/technique.pest @@ -7,6 +7,8 @@ technique = { magic_line ~ NEWLINE ~ spdx_line? ~ + NEWLINE ~ + template_line? ~ NEWLINE+ ~ declaration ~ NEWLINE+ ~ @@ -29,6 +31,12 @@ year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } +// Template Header + +template_line = { "&" ~ template } + +template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | "." | "," )* } + // Procedure Declaration declaration = { identifier ~ ":" ~ signature? } From 73a2427fe95e5432de3dce18ad0ca3da7cc8ca1e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:21:54 +1000 Subject: [PATCH 13/27] Additional year tests --- src/parsing/parser.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 3106b0d..035fea2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -130,6 +130,38 @@ mod tests { year(0,4), ] }; + parses_to! { + parser: TechniqueParser, + input: "2024-", + rule: Rule::year, + tokens: [ + year(0,5), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2002-2024", + rule: Rule::year, + tokens: [ + year(0,9), + ] + }; + fails_with! { + parser: TechniqueParser, + input: "02", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + fails_with! { + parser: TechniqueParser, + input: "02-24", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; } #[test] From 900077efea9d22e4552f8a2d68db4dc26a7dc92c Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:28:54 +1000 Subject: [PATCH 14/27] Additional license tests --- src/parsing/parser.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 035fea2..f3362b2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -121,6 +121,41 @@ mod tests { ]) ] }; + parses_to! { + parser: TechniqueParser, + input: "! PD", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 4, [ + license(2, 4) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "MIT", + rule: Rule::license, + tokens: [ + license(0,3), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "Public Domain", + rule: Rule::license, + tokens: [ + license(0,13), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "CC BY-SA 3.0 IGO", + rule: Rule::license, + tokens: [ + license(0,16), + ] + }; parses_to! { parser: TechniqueParser, From e7da2c1557f5c4f767e9d437e2a799be45c0d693 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:58:09 +1000 Subject: [PATCH 15/27] Test identifiers and declarations --- src/parsing/parser.rs | 88 ++++++++++++++++++++++++++++++++++++++----- technique.pest | 2 +- 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index f3362b2..692ac78 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -115,7 +115,7 @@ mod tests { spdx_line(0, 26, [ license(2, 5), copyright(7, 26, [ - year(11,15), + year(11, 15), owner(16, 26) ]) ]) @@ -137,7 +137,7 @@ mod tests { input: "MIT", rule: Rule::license, tokens: [ - license(0,3), + license(0, 3), ] }; parses_to! { @@ -145,7 +145,7 @@ mod tests { input: "Public Domain", rule: Rule::license, tokens: [ - license(0,13), + license(0, 13), ] }; parses_to! { @@ -153,7 +153,7 @@ mod tests { input: "CC BY-SA 3.0 IGO", rule: Rule::license, tokens: [ - license(0,16), + license(0, 16), ] }; @@ -162,7 +162,7 @@ mod tests { input: "2024", rule: Rule::year, tokens: [ - year(0,4), + year(0, 4), ] }; parses_to! { @@ -170,7 +170,7 @@ mod tests { input: "2024-", rule: Rule::year, tokens: [ - year(0,5), + year(0, 5), ] }; parses_to! { @@ -178,7 +178,7 @@ mod tests { input: "2002-2024", rule: Rule::year, tokens: [ - year(0,9), + year(0, 9), ] }; fails_with! { @@ -211,7 +211,6 @@ mod tests { ]) ] }; - parses_to! { parser: TechniqueParser, input: "& nasa-flight-plan-v4.0", @@ -222,7 +221,6 @@ mod tests { ]) ] }; - fails_with! { parser: TechniqueParser, input: "&", @@ -232,4 +230,76 @@ mod tests { pos: 1 }; } + + #[test] + fn check_identifier_rules() { + parses_to! { + parser: TechniqueParser, + input: "p", + rule: Rule::identifier, + tokens: [ + identifier(0, 1) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "pizza", + rule: Rule::identifier, + tokens: [ + identifier(0, 5) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "cook_pizza", + rule: Rule::identifier, + tokens: [ + identifier(0, 10) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "0trust", + rule: Rule::identifier, + positives: [Rule::identifier], + negatives: [], + pos: 0 + }; + } + + #[test] + fn check_declaration_syntax() { + parses_to! { + parser: TechniqueParser, + input: "p :", + rule: Rule::declaration, + tokens: [ + declaration(0, 3, [ + identifier(0, 1) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "p : A -> B", + rule: Rule::declaration, + tokens: [ + declaration(0, 10, [ + identifier(0, 1), + signature(4, 10, [ + forma(4, 5), + forma(9, 10) + ]) + ]) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "cook-pizza :", + rule: Rule::declaration, + positives: [Rule::declaration], + negatives: [], + pos: 0 + }; + } } diff --git a/technique.pest b/technique.pest index 13504d4..5063adf 100644 --- a/technique.pest +++ b/technique.pest @@ -39,7 +39,7 @@ template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | " // Procedure Declaration -declaration = { identifier ~ ":" ~ signature? } +declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 8f595953b3efbcb4d284c4ec2e1c47958864bfc8 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 23 Sep 2024 12:15:59 +1000 Subject: [PATCH 16/27] Add winnow parser dependency --- Cargo.lock | 10 ++++++++++ Cargo.toml | 1 + 2 files changed, 11 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e5500bf..db4d7f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -393,6 +393,7 @@ dependencies = [ "tinytemplate", "tracing", "tracing-subscriber", + "winnow", ] [[package]] @@ -698,3 +699,12 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 86ded5a..86e4478 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" +winnow = "0.6.18" From df2cbea96fd95738b295143bd817bb5da5e19e43 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Wed, 25 Sep 2024 17:57:00 +1000 Subject: [PATCH 17/27] Reimplement identifier parser --- src/parsing/mod.rs | 2 +- src/parsing/parser.rs | 450 ++++++++++++++++++++---------------------- 2 files changed, 214 insertions(+), 238 deletions(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index fbfb83f..a607228 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -7,5 +7,5 @@ pub fn load(source: &Path) { // read source to a str let content = std::fs::read_to_string(source).expect("Failed to read the source file"); - parser::parse_via_pest(content.as_str()); + parser::parse_via_winnow(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 692ac78..007b6cd 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,271 +1,246 @@ // parsing machinery -use pest::Parser; -use pest_derive::Parser; +// struct TechniqueParser; -#[derive(Parser)] -#[grammar = "../technique.pest"] -struct TechniqueParser; +use winnow::token::take_while; +use winnow::{PResult, Parser}; -pub fn parse_via_pest(content: &str) { - let technique = TechniqueParser::parse(Rule::technique, &content); - println!("{:?}", technique); +pub fn parse_via_winnow(_content: &str) { + // let technique = TechniqueParser::parse(Rule::technique, &content); + // println!("{:?}", technique); +} + +fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { + take_while(1.., (('0'..='9'), ('A'..='Z'), ('a'..='z'), ('_'))).parse_next(input) } #[cfg(test)] mod tests { - use pest::{consumes_to, fails_with, parses_to}; - - use super::*; // Import all parent module items - + use super::*; + #[test] - fn check_procedure_declaration_explicit() { - let input = "making_coffee : Beans, Milk -> Coffee"; + fn check_identifier_rules() { + let mut input = "p"; - let declaration = TechniqueParser::parse(Rule::declaration, &input) - .expect("Unsuccessful Parse") - .next() + let result = parse_identifier + .parse_next(&mut input) .unwrap(); - assert_eq!( - declaration.as_str(), - "making_coffee : Beans, Milk -> Coffee" - ); - assert_eq!(declaration.as_rule(), Rule::declaration); + assert_eq!(result, "p"); - let mut pairs = declaration.into_inner(); - - let identifier = pairs - .next() + let mut input = "pizza"; + let result = parse_identifier + .parse_next(&mut input) .unwrap(); + assert_eq!(result, "pizza"); - assert_eq!(identifier.as_str(), "making_coffee"); - assert_eq!(identifier.as_rule(), Rule::identifier); - - let signature = pairs - .next() + let mut input = "cook_pizza"; + let result = parse_identifier + .parse_next(&mut input) .unwrap(); + assert_eq!(result, "cook_pizza"); + /* + fails_with! { + parser: TechniqueParser, + input: "0trust", + rule: Rule::identifier, + positives: [Rule::identifier], + negatives: [], + pos: 0 + }; + */ + } - assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); - assert_eq!(signature.as_rule(), Rule::signature); - - let mut pairs = signature.into_inner(); + // Import all parent module items + /* + #[test] + fn check_procedure_declaration_explicit() { + let input = "making_coffee : Beans, Milk -> Coffee"; - let domain1 = pairs - .next() - .unwrap(); + // let declaration = TechniqueParser::parse(Rule::declaration, &input) + // .expect("Unsuccessful Parse") + // .next() + // .unwrap(); - assert_eq!(domain1.as_str(), "Beans"); - assert_eq!(domain1.as_rule(), Rule::forma); + assert_eq!( + input, // FIXME + "making_coffee : Beans, Milk -> Coffee" + ); - let domain2 = pairs - .next() - .unwrap(); + // assert_eq!(identifier.as_str(), "making_coffee"); + // assert_eq!(identifier.as_rule(), Rule::identifier); - assert_eq!(domain2.as_str(), "Milk"); - assert_eq!(domain2.as_rule(), Rule::forma); + // assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); + // assert_eq!(signature.as_rule(), Rule::signature); - let range = pairs - .next() - .unwrap(); + // assert_eq!(domain1.as_str(), "Beans"); + // assert_eq!(domain1.as_rule(), Rule::forma); - assert_eq!(range.as_str(), "Coffee"); - assert_eq!(range.as_rule(), Rule::forma); - } + // assert_eq!(domain2.as_str(), "Milk"); + // assert_eq!(domain2.as_rule(), Rule::forma); - #[test] - fn check_procedure_declaration_macro() { - parses_to! { - parser: TechniqueParser, - input: "making_coffee : Beans, Milk -> Coffee", - rule: Rule::declaration, - tokens: [ - declaration(0, 37, [ - identifier(0, 13), - signature(16, 37, [ - forma(16, 21), - forma(23, 27), - forma(31, 37) + // assert_eq!(range.as_str(), "Coffee"); + // assert_eq!(range.as_rule(), Rule::forma); + } + */ + /* + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + forma(16, 21), + forma(23, 27), + forma(31, 37) + ]) ]) - ]) - ] - }; - } + ] + }; + } - #[test] - fn check_header_spdx() { - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 21, [ - license(2, 5), - copyright(7, 21, [ - owner(11, 21) + #[test] + fn check_header_spdx() { + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 21, [ + license(2, 5), + copyright(7, 21, [ + owner(11, 21) + ]) ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) 2024 ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 26, [ - license(2, 5), - copyright(7, 26, [ - year(11, 15), - owner(16, 26) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) 2024 ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 26, [ + license(2, 5), + copyright(7, 26, [ + year(11, 15), + owner(16, 26) + ]) ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! PD", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 4, [ - license(2, 4) - ]) - ] - }; + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! PD", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 4, [ + license(2, 4) + ]) + ] + }; - parses_to! { - parser: TechniqueParser, - input: "MIT", - rule: Rule::license, - tokens: [ - license(0, 3), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "Public Domain", - rule: Rule::license, - tokens: [ - license(0, 13), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "CC BY-SA 3.0 IGO", - rule: Rule::license, - tokens: [ - license(0, 16), - ] - }; + parses_to! { + parser: TechniqueParser, + input: "MIT", + rule: Rule::license, + tokens: [ + license(0, 3), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "Public Domain", + rule: Rule::license, + tokens: [ + license(0, 13), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "CC BY-SA 3.0 IGO", + rule: Rule::license, + tokens: [ + license(0, 16), + ] + }; - parses_to! { - parser: TechniqueParser, - input: "2024", - rule: Rule::year, - tokens: [ - year(0, 4), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2024-", - rule: Rule::year, - tokens: [ - year(0, 5), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2002-2024", - rule: Rule::year, - tokens: [ - year(0, 9), - ] - }; - fails_with! { - parser: TechniqueParser, - input: "02", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - fails_with! { - parser: TechniqueParser, - input: "02-24", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - } - - #[test] - fn check_header_template() { - parses_to! { - parser: TechniqueParser, - input: "& checklist", - rule: Rule::template_line, - tokens: [ - template_line(0, 11, [ - template(2, 11) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "& nasa-flight-plan-v4.0", - rule: Rule::template_line, - tokens: [ - template_line(0, 23, [ - template(2, 23) - ]) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "&", - rule: Rule::template_line, - positives: [Rule::template], - negatives: [], - pos: 1 - }; - } + parses_to! { + parser: TechniqueParser, + input: "2024", + rule: Rule::year, + tokens: [ + year(0, 4), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2024-", + rule: Rule::year, + tokens: [ + year(0, 5), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2002-2024", + rule: Rule::year, + tokens: [ + year(0, 9), + ] + }; + fails_with! { + parser: TechniqueParser, + input: "02", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + fails_with! { + parser: TechniqueParser, + input: "02-24", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + } - #[test] - fn check_identifier_rules() { - parses_to! { - parser: TechniqueParser, - input: "p", - rule: Rule::identifier, - tokens: [ - identifier(0, 1) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "pizza", - rule: Rule::identifier, - tokens: [ - identifier(0, 5) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "cook_pizza", - rule: Rule::identifier, - tokens: [ - identifier(0, 10) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "0trust", - rule: Rule::identifier, - positives: [Rule::identifier], - negatives: [], - pos: 0 - }; - } + #[test] + fn check_header_template() { + parses_to! { + parser: TechniqueParser, + input: "& checklist", + rule: Rule::template_line, + tokens: [ + template_line(0, 11, [ + template(2, 11) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "& nasa-flight-plan-v4.0", + rule: Rule::template_line, + tokens: [ + template_line(0, 23, [ + template(2, 23) + ]) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "&", + rule: Rule::template_line, + positives: [Rule::template], + negatives: [], + pos: 1 + }; + } #[test] fn check_declaration_syntax() { @@ -302,4 +277,5 @@ mod tests { pos: 0 }; } + */ } From 47ce87050dad85e4e540c08d003506adaa29ce9e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 26 Sep 2024 13:04:25 +1000 Subject: [PATCH 18/27] Improve parse_identifier to enforce first character --- src/parsing/parser.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 007b6cd..9f63f97 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,7 +2,9 @@ // struct TechniqueParser; -use winnow::token::take_while; +use winnow::combinator::empty; +use winnow::stream::AsChar; +use winnow::token::{one_of, take_while}; use winnow::{PResult, Parser}; pub fn parse_via_winnow(_content: &str) { @@ -10,14 +12,20 @@ pub fn parse_via_winnow(_content: &str) { // println!("{:?}", technique); } +// a winnow parser that takes an alpha and then any character fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { - take_while(1.., (('0'..='9'), ('A'..='Z'), ('a'..='z'), ('_'))).parse_next(input) + ( + one_of('a'..='z'), + take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), + ) + .take() + .parse_next(input) } #[cfg(test)] mod tests { use super::*; - + #[test] fn check_identifier_rules() { let mut input = "p"; @@ -39,16 +47,9 @@ mod tests { .parse_next(&mut input) .unwrap(); assert_eq!(result, "cook_pizza"); - /* - fails_with! { - parser: TechniqueParser, - input: "0trust", - rule: Rule::identifier, - positives: [Rule::identifier], - negatives: [], - pos: 0 - }; - */ + + assert!(parse_identifier(&mut "0trust").is_err()); + assert!(parse_identifier(&mut "Pizza").is_err()); } // Import all parent module items From 61502e28f5fe7bcdb2d6f9b45d49c7fd0065c850 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sat, 28 Sep 2024 17:06:40 +1000 Subject: [PATCH 19/27] Use verify to ensure identifier parse is valid --- src/parsing/parser.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 9f63f97..cce1c73 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,14 +2,15 @@ // struct TechniqueParser; -use winnow::combinator::empty; -use winnow::stream::AsChar; +use winnow::error::StrContext; use winnow::token::{one_of, take_while}; use winnow::{PResult, Parser}; -pub fn parse_via_winnow(_content: &str) { +pub fn parse_via_winnow(content: &str) { // let technique = TechniqueParser::parse(Rule::technique, &content); // println!("{:?}", technique); + let result = parse_identifier.parse(content).unwrap(); + println!("{}", result); } // a winnow parser that takes an alpha and then any character @@ -19,6 +20,8 @@ fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), ) .take() + .verify(|s: &str| s.len() == input.len()) + .context(StrContext::Label("identifier")) .parse_next(input) } @@ -50,6 +53,10 @@ mod tests { assert!(parse_identifier(&mut "0trust").is_err()); assert!(parse_identifier(&mut "Pizza").is_err()); + assert!(parse_identifier(&mut "pizZa").is_err()); + + assert_eq!(parse_identifier(&mut "cook_pizza"), Ok("cook_pizza")); + assert!(parse_identifier(&mut "cook-pizza").is_err()); } // Import all parent module items From 244a9db1776cd3c85987924ec74f4ba936d4923a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 12:51:59 +1000 Subject: [PATCH 20/27] Add chumsky parser dependency --- Cargo.lock | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 97 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index db4d7f9..e2fbd9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,24 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -66,12 +84,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "cc" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9540e661f81799159abee814118cc139a2004b3a3aa3ea37724a1b66530b90e0" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "clap" version = "4.5.16" @@ -155,6 +192,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -279,6 +326,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.37" @@ -359,12 +415,31 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "strsim" version = "0.11.1" @@ -386,6 +461,7 @@ dependencies = [ name = "technique" version = "0.3.0" dependencies = [ + "chumsky", "clap", "pest", "pest_derive", @@ -708,3 +784,23 @@ checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" dependencies = [ "memchr", ] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 86e4478..ec5e3b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ repository = "https://github.com/technique-lang/technique" license = "MIT" [dependencies] +chumsky = "0.9.3" clap = { version = "4.5.16", features = [ "wrap_help" ] } pest = "2.7.11" pest_derive = "2.7.11" From bb5d9ffe68680afa690158250b23cd6ec0a92295 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 12:54:40 +1000 Subject: [PATCH 21/27] Reimplement identifier parser again --- src/parsing/mod.rs | 2 +- src/parsing/parser.rs | 66 ++++++++++++++++++------------------------- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index a607228..1b4b2ff 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -7,5 +7,5 @@ pub fn load(source: &Path) { // read source to a str let content = std::fs::read_to_string(source).expect("Failed to read the source file"); - parser::parse_via_winnow(content.as_str()); + parser::parse_via_chumsky(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index cce1c73..5cef353 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,27 +2,26 @@ // struct TechniqueParser; -use winnow::error::StrContext; -use winnow::token::{one_of, take_while}; -use winnow::{PResult, Parser}; +use chumsky::{prelude::*, Span}; -pub fn parse_via_winnow(content: &str) { - // let technique = TechniqueParser::parse(Rule::technique, &content); - // println!("{:?}", technique); - let result = parse_identifier.parse(content).unwrap(); - println!("{}", result); +pub fn parse_via_chumsky(content: &str) { + let result = parse_identifier().parse(content); + println!("{:?}", result); + std::process::exit(0); } -// a winnow parser that takes an alpha and then any character -fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { - ( - one_of('a'..='z'), - take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), - ) - .take() - .verify(|s: &str| s.len() == input.len()) - .context(StrContext::Label("identifier")) - .parse_next(input) +type Identifier = String; + +// takes a single lower case character then any lower case character, digit, +// or unerscore. Based on the parser code in chumsky::text::ident(). + +fn parse_identifier() -> impl Parser> { + filter(|c: &char| c.is_ascii_lowercase()) + .map(Some) + .chain::, _>( + filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), + ) + .collect() } #[cfg(test)] @@ -31,32 +30,23 @@ mod tests { #[test] fn check_identifier_rules() { - let mut input = "p"; + let input = "make_dinner"; + + let result = parse_identifier().parse(input); + + assert_eq!(result, Ok("make_dinner".to_string())); - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); + let input = ""; - assert_eq!(result, "p"); + let result = parse_identifier().parse(input); - let mut input = "pizza"; - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); - assert_eq!(result, "pizza"); + assert!(result.is_err()); - let mut input = "cook_pizza"; - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); - assert_eq!(result, "cook_pizza"); + let input = "MakeDinner"; - assert!(parse_identifier(&mut "0trust").is_err()); - assert!(parse_identifier(&mut "Pizza").is_err()); - assert!(parse_identifier(&mut "pizZa").is_err()); + let result = parse_identifier().parse(input); - assert_eq!(parse_identifier(&mut "cook_pizza"), Ok("cook_pizza")); - assert!(parse_identifier(&mut "cook-pizza").is_err()); + assert!(result.is_err()); } // Import all parent module items From 33aa001adf41cf0a64be1934a616176fabfb9b3a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 17:58:40 +1000 Subject: [PATCH 22/27] Parse magic line --- src/parsing/parser.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5cef353..b36dee1 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,7 +2,7 @@ // struct TechniqueParser; -use chumsky::{prelude::*, Span}; +use chumsky::prelude::*; pub fn parse_via_chumsky(content: &str) { let result = parse_identifier().parse(content); @@ -24,6 +24,12 @@ fn parse_identifier() -> impl Parser> { .collect() } +fn parse_magic_line() -> impl Parser> { + just('%') + .ignore_then(just("technique").padded()) + .ignore_then(just("v1").to(1u8)) +} + #[cfg(test)] mod tests { use super::*; @@ -49,6 +55,15 @@ mod tests { assert!(result.is_err()); } + #[test] + fn check_magic_line() { + assert_eq!(parse_magic_line().parse("% technique v1"), Ok(1)); + assert_eq!(parse_magic_line().parse("%technique v1"), Ok(1)); + // this isn't really ideal, but there's no absolutely vital reason it + // has to be rejected. + assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); + } + // Import all parent module items /* #[test] From b32204d28908349502d557311adf753306da32fa Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 23:22:57 +1000 Subject: [PATCH 23/27] Parse license and copyright header parts --- src/parsing/parser.rs | 254 +++++++++++++++++++++--------------------- 1 file changed, 129 insertions(+), 125 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index b36dee1..e255e33 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -30,6 +30,74 @@ fn parse_magic_line() -> impl Parser> { .ignore_then(just("v1").to(1u8)) } +fn parse_spdx_line() -> impl Parser> +{ + just('!') + .ignore_then(parse_license()) + .then_ignore(just(';')) + .then(parse_copyright()) +} + +fn parse_license() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || *c != ';' // symbol which separates license and copyright probably shouldn't ever encounter it + || c.is_ascii_punctuation() + || *c == ' ' + }) + .repeated() + .at_least(1) + .collect() +} + +// change to a semantic Copyright type +fn parse_copyright() -> impl Parser> { + let p = parse_copyright_year() + .padded() + .then(parse_copyright_owner()); + + p.map(|((y1, y2), o)| { + let mut r = String::new(); + r.push_str(&y1); + r.push_str(&y2); + r.push_str(&o); + r + }) +} + +fn year() -> impl Parser> { + filter(|c: &char| c.is_ascii_digit()) + .repeated() + .at_least(4) + .at_most(4) + .collect() +} + +fn parse_copyright_year() -> impl Parser> { + year() + .then_ignore(just('-')) + .then(year()) + .or(year() + .then_ignore(just('-')) + .map(|yyyy| (yyyy, "".to_string()))) + .or(year().map(|yyyy| (yyyy, "".to_string()))) +} + +fn parse_copyright_owner() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ' + }) + .repeated() + .at_least(1) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -64,7 +132,48 @@ mod tests { assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); } - // Import all parent module items + #[test] + fn check_header_spdx() { + assert_eq!(parse_license().parse("MIT"), Ok("MIT".to_string())); + assert_eq!( + parse_license().parse("Public Domain"), + Ok("Public Domain".to_string()) + ); + assert_eq!( + parse_license().parse("CC BY-SA 3.0 IGO"), + Ok("CC BY-SA 3.0 IGO".to_string()) + ); + + assert_eq!( + parse_copyright_year().parse("2024"), + Ok(("2024".to_string(), "".to_string())) + ); + assert_eq!( + parse_copyright_year().parse("2024-"), + Ok(("2024".to_string(), "".to_string())) + ); + assert_eq!( + parse_copyright_year().parse("2002-2024"), + Ok(("2002".to_string(), "2024".to_string())) + ); + + assert!(parse_copyright_year() + .parse("24") + .is_err()); + assert!(parse_copyright_year() + .parse("02-24") + .is_err()); + + assert_eq!( + parse_copyright_owner().parse("ACME"), + Ok("ACME".to_string()) + ); + assert_eq!( + parse_copyright_owner().parse("ACME, Inc."), + Ok("ACME, Inc.".to_string()) + ); + } + /* #[test] fn check_procedure_declaration_explicit() { @@ -97,132 +206,27 @@ mod tests { } */ /* - #[test] - fn check_procedure_declaration_macro() { - parses_to! { - parser: TechniqueParser, - input: "making_coffee : Beans, Milk -> Coffee", - rule: Rule::declaration, - tokens: [ - declaration(0, 37, [ - identifier(0, 13), - signature(16, 37, [ - forma(16, 21), - forma(23, 27), - forma(31, 37) + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + forma(16, 21), + forma(23, 27), + forma(31, 37) + ]) ]) - ]) - ] - }; - } - - #[test] - fn check_header_spdx() { - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 21, [ - license(2, 5), - copyright(7, 21, [ - owner(11, 21) - ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) 2024 ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 26, [ - license(2, 5), - copyright(7, 26, [ - year(11, 15), - owner(16, 26) - ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! PD", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 4, [ - license(2, 4) - ]) - ] - }; - - parses_to! { - parser: TechniqueParser, - input: "MIT", - rule: Rule::license, - tokens: [ - license(0, 3), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "Public Domain", - rule: Rule::license, - tokens: [ - license(0, 13), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "CC BY-SA 3.0 IGO", - rule: Rule::license, - tokens: [ - license(0, 16), - ] - }; - - parses_to! { - parser: TechniqueParser, - input: "2024", - rule: Rule::year, - tokens: [ - year(0, 4), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2024-", - rule: Rule::year, - tokens: [ - year(0, 5), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2002-2024", - rule: Rule::year, - tokens: [ - year(0, 9), - ] - }; - fails_with! { - parser: TechniqueParser, - input: "02", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - fails_with! { - parser: TechniqueParser, - input: "02-24", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - } + ] + }; + } + */ + /* #[test] fn check_header_template() { parses_to! { From f15e5926059aa7dbf6ffc66399848cf92a667ef3 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Tue, 1 Oct 2024 12:42:12 +1000 Subject: [PATCH 24/27] Complete SPDX header line parser --- src/parsing/parser.rs | 114 +++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index e255e33..c74ec79 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -22,6 +22,7 @@ fn parse_identifier() -> impl Parser> { filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), ) .collect() + // .validate(|s : String, span : Range, emit| if s.len() != span.end() - span.start() { emit(Simple::custom(span, "Wrong length")) }) } fn parse_magic_line() -> impl Parser> { @@ -30,62 +31,41 @@ fn parse_magic_line() -> impl Parser> { .ignore_then(just("v1").to(1u8)) } -fn parse_spdx_line() -> impl Parser> -{ +fn parse_spdx_line() -> impl Parser, Option), Error = Simple> { just('!') - .ignore_then(parse_license()) - .then_ignore(just(';')) - .then(parse_copyright()) + .ignore_then( + parse_license() + .padded() + .or_not(), + ) + .then( + just(';') + .ignore_then( + just("(c)") + .or(just("(C)")) + .or(just("©")) + .padded(), + ) + .ignore_then(parse_copyright().padded()) + .or_not(), + ) } fn parse_license() -> impl Parser> { filter(|c: &char| { - c.is_ascii_uppercase() - || c.is_ascii_lowercase() - || c.is_ascii_digit() - || *c != ';' // symbol which separates license and copyright probably shouldn't ever encounter it - || c.is_ascii_punctuation() - || *c == ' ' + *c != ';' + && (c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ') }) .repeated() .at_least(1) .collect() } -// change to a semantic Copyright type fn parse_copyright() -> impl Parser> { - let p = parse_copyright_year() - .padded() - .then(parse_copyright_owner()); - - p.map(|((y1, y2), o)| { - let mut r = String::new(); - r.push_str(&y1); - r.push_str(&y2); - r.push_str(&o); - r - }) -} - -fn year() -> impl Parser> { - filter(|c: &char| c.is_ascii_digit()) - .repeated() - .at_least(4) - .at_most(4) - .collect() -} - -fn parse_copyright_year() -> impl Parser> { - year() - .then_ignore(just('-')) - .then(year()) - .or(year() - .then_ignore(just('-')) - .map(|yyyy| (yyyy, "".to_string()))) - .or(year().map(|yyyy| (yyyy, "".to_string()))) -} - -fn parse_copyright_owner() -> impl Parser> { filter(|c: &char| { c.is_ascii_uppercase() || c.is_ascii_lowercase() @@ -144,33 +124,43 @@ mod tests { Ok("CC BY-SA 3.0 IGO".to_string()) ); + assert_eq!(parse_copyright().parse("ACME"), Ok("ACME".to_string())); assert_eq!( - parse_copyright_year().parse("2024"), - Ok(("2024".to_string(), "".to_string())) + parse_copyright().parse("ACME, Inc."), + Ok("ACME, Inc.".to_string()) ); + assert_eq!( - parse_copyright_year().parse("2024-"), - Ok(("2024".to_string(), "".to_string())) + parse_copyright().parse("2024 ACME, Inc."), + Ok("2024 ACME, Inc.".to_string()) ); + assert_eq!( - parse_copyright_year().parse("2002-2024"), - Ok(("2002".to_string(), "2024".to_string())) + parse_spdx_line().parse("! PD"), + Ok((Some("PD".to_string()), None)) ); - - assert!(parse_copyright_year() - .parse("24") - .is_err()); - assert!(parse_copyright_year() - .parse("02-24") - .is_err()); - assert_eq!( - parse_copyright_owner().parse("ACME"), - Ok("ACME".to_string()) + parse_spdx_line().parse("! MIT; (c) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) ); assert_eq!( - parse_copyright_owner().parse("ACME, Inc."), - Ok("ACME, Inc.".to_string()) + parse_spdx_line().parse("! MIT; (C) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; © ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; (c) 2024 ACME, Inc."), + Ok((Some("MIT".to_string()), Some("2024 ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."), + Ok(( + Some("CC BY-SA 3.0 [IGO]".to_string()), + Some("2024 ACME, Inc.".to_string()) + )) ); } From 410cdccc391cdbaff0f540e278da9d61ce48ea34 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 19:11:05 +1000 Subject: [PATCH 25/27] Parse template line --- src/parsing/parser.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index c74ec79..d3d6900 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -78,6 +78,28 @@ fn parse_copyright() -> impl Parser> { .collect() } +fn parse_template_line() -> impl Parser, Error = Simple> { + just('&').ignore_then( + parse_template() + .padded() + .or_not(), + ) +} + +fn parse_template() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || *c == '.' + || *c == ',' + || *c == '-' + }) + .repeated() + .at_least(1) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -164,6 +186,25 @@ mod tests { ); } + #[test] + fn check_header_template() { + assert_eq!( + parse_template().parse("checklist"), + Ok("checklist".to_string()) + ); + assert_eq!( + parse_template().parse("checklist,v1"), + Ok("checklist,v1".to_string()) + ); + assert_eq!( + parse_template().parse("checklist-v1.0"), + Ok("checklist-v1.0".to_string()) + ); + assert_eq!( + parse_template_line().parse("& checklist-v1"), + Ok(Some("checklist-v1".to_string())) + ); + } /* #[test] fn check_procedure_declaration_explicit() { From e67d11066a0e04ab3103e4f56cfce834bfbff47e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Tue, 8 Oct 2024 14:42:29 +1100 Subject: [PATCH 26/27] Remove obscelete test code --- src/parsing/parser.rs | 123 ------------------------------------------ 1 file changed, 123 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index d3d6900..b2f7490 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,7 +1,5 @@ // parsing machinery -// struct TechniqueParser; - use chumsky::prelude::*; pub fn parse_via_chumsky(content: &str) { @@ -205,125 +203,4 @@ mod tests { Ok(Some("checklist-v1".to_string())) ); } - /* - #[test] - fn check_procedure_declaration_explicit() { - let input = "making_coffee : Beans, Milk -> Coffee"; - - // let declaration = TechniqueParser::parse(Rule::declaration, &input) - // .expect("Unsuccessful Parse") - // .next() - // .unwrap(); - - assert_eq!( - input, // FIXME - "making_coffee : Beans, Milk -> Coffee" - ); - - // assert_eq!(identifier.as_str(), "making_coffee"); - // assert_eq!(identifier.as_rule(), Rule::identifier); - - // assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); - // assert_eq!(signature.as_rule(), Rule::signature); - - // assert_eq!(domain1.as_str(), "Beans"); - // assert_eq!(domain1.as_rule(), Rule::forma); - - // assert_eq!(domain2.as_str(), "Milk"); - // assert_eq!(domain2.as_rule(), Rule::forma); - - // assert_eq!(range.as_str(), "Coffee"); - // assert_eq!(range.as_rule(), Rule::forma); - } - */ - /* - #[test] - fn check_procedure_declaration_macro() { - parses_to! { - parser: TechniqueParser, - input: "making_coffee : Beans, Milk -> Coffee", - rule: Rule::declaration, - tokens: [ - declaration(0, 37, [ - identifier(0, 13), - signature(16, 37, [ - forma(16, 21), - forma(23, 27), - forma(31, 37) - ]) - ]) - ] - }; - } - */ - - /* - #[test] - fn check_header_template() { - parses_to! { - parser: TechniqueParser, - input: "& checklist", - rule: Rule::template_line, - tokens: [ - template_line(0, 11, [ - template(2, 11) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "& nasa-flight-plan-v4.0", - rule: Rule::template_line, - tokens: [ - template_line(0, 23, [ - template(2, 23) - ]) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "&", - rule: Rule::template_line, - positives: [Rule::template], - negatives: [], - pos: 1 - }; - } - - #[test] - fn check_declaration_syntax() { - parses_to! { - parser: TechniqueParser, - input: "p :", - rule: Rule::declaration, - tokens: [ - declaration(0, 3, [ - identifier(0, 1) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "p : A -> B", - rule: Rule::declaration, - tokens: [ - declaration(0, 10, [ - identifier(0, 1), - signature(4, 10, [ - forma(4, 5), - forma(9, 10) - ]) - ]) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "cook-pizza :", - rule: Rule::declaration, - positives: [Rule::declaration], - negatives: [], - pos: 0 - }; - } - */ } From 2a5030e8e57159c38ebcd2a338119b0ef1fc2f22 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Tue, 8 Oct 2024 14:45:32 +1100 Subject: [PATCH 27/27] Add failing test --- src/parsing/parser.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index b2f7490..5f29f4e 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -121,6 +121,12 @@ mod tests { let result = parse_identifier().parse(input); assert!(result.is_err()); + + let input = "make-dinner"; + + let result = parse_identifier().parse(input); + + assert!(result.is_err()); } #[test]