diff --git a/Cargo.lock b/Cargo.lock index d3f609d..e2fbd9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,24 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -57,12 +75,40 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "cc" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9540e661f81799159abee814118cc139a2004b3a3aa3ea37724a1b66530b90e0" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "clap" version = "4.5.16" @@ -97,6 +143,35 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "errno" version = "0.3.9" @@ -107,6 +182,26 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -171,6 +266,51 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "pest" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c73c26c01b8c87956cea613c907c9d6ecffd8d18a2a5908e5de0adfaa185cea" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "664d22978e2815783adbdd2c588b455b1bd625299ce36b2a99881ac9627e6d8d" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d5487022d5d33f4c30d91c22afa240ce2a644e87fe08caad974d4eab6badbe" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0091754bbd0ea592c4deb3a122ce8ecbb0753b738aa82bc055fcc2eccc8d8174" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -186,6 +326,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.37" @@ -246,6 +395,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -255,12 +415,31 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "strsim" version = "0.11.1" @@ -282,11 +461,15 @@ dependencies = [ name = "technique" version = "0.3.0" dependencies = [ + "chumsky", "clap", + "pest", + "pest_derive", "serde", "tinytemplate", "tracing", "tracing-subscriber", + "winnow", ] [[package]] @@ -299,6 +482,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -376,6 +579,18 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -394,6 +609,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "winapi" version = "0.3.9" @@ -554,3 +775,32 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +dependencies = [ + "memchr", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 75da6fc..ec5e3b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,12 @@ repository = "https://github.com/technique-lang/technique" license = "MIT" [dependencies] +chumsky = "0.9.3" clap = { version = "4.5.16", features = [ "wrap_help" ] } +pest = "2.7.11" +pest_derive = "2.7.11" serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" +winnow = "0.6.18" diff --git a/src/main.rs b/src/main.rs index 8a81de0..52c5cde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use tracing::debug; use tracing_subscriber; mod rendering; +mod parsing; fn main() { const VERSION: &str = concat!("v", env!("CARGO_PKG_VERSION")); @@ -89,10 +90,11 @@ fn main() { let filename = submatches .get_one::("filename") - .unwrap(); // argument are required by definitin so always present + .unwrap(); // argument are required by definition so always present debug!(filename); + parsing::load(&Path::new(filename)); todo!(); } Some(("format", submatches)) => { diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..1b4b2ff --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,11 @@ +// parser for the Technique language +use std::path::Path; + +pub mod parser; + +pub fn load(source: &Path) { + // read source to a str + let content = std::fs::read_to_string(source).expect("Failed to read the source file"); + + parser::parse_via_chumsky(content.as_str()); +} diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..5f29f4e --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,212 @@ +// parsing machinery + +use chumsky::prelude::*; + +pub fn parse_via_chumsky(content: &str) { + let result = parse_identifier().parse(content); + println!("{:?}", result); + std::process::exit(0); +} + +type Identifier = String; + +// takes a single lower case character then any lower case character, digit, +// or unerscore. Based on the parser code in chumsky::text::ident(). + +fn parse_identifier() -> impl Parser> { + filter(|c: &char| c.is_ascii_lowercase()) + .map(Some) + .chain::, _>( + filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), + ) + .collect() + // .validate(|s : String, span : Range, emit| if s.len() != span.end() - span.start() { emit(Simple::custom(span, "Wrong length")) }) +} + +fn parse_magic_line() -> impl Parser> { + just('%') + .ignore_then(just("technique").padded()) + .ignore_then(just("v1").to(1u8)) +} + +fn parse_spdx_line() -> impl Parser, Option), Error = Simple> { + just('!') + .ignore_then( + parse_license() + .padded() + .or_not(), + ) + .then( + just(';') + .ignore_then( + just("(c)") + .or(just("(C)")) + .or(just("©")) + .padded(), + ) + .ignore_then(parse_copyright().padded()) + .or_not(), + ) +} + +fn parse_license() -> impl Parser> { + filter(|c: &char| { + *c != ';' + && (c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ') + }) + .repeated() + .at_least(1) + .collect() +} + +fn parse_copyright() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ' + }) + .repeated() + .at_least(1) + .collect() +} + +fn parse_template_line() -> impl Parser, Error = Simple> { + just('&').ignore_then( + parse_template() + .padded() + .or_not(), + ) +} + +fn parse_template() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || *c == '.' + || *c == ',' + || *c == '-' + }) + .repeated() + .at_least(1) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn check_identifier_rules() { + let input = "make_dinner"; + + let result = parse_identifier().parse(input); + + assert_eq!(result, Ok("make_dinner".to_string())); + + let input = ""; + + let result = parse_identifier().parse(input); + + assert!(result.is_err()); + + let input = "MakeDinner"; + + let result = parse_identifier().parse(input); + + assert!(result.is_err()); + + let input = "make-dinner"; + + let result = parse_identifier().parse(input); + + assert!(result.is_err()); + } + + #[test] + fn check_magic_line() { + assert_eq!(parse_magic_line().parse("% technique v1"), Ok(1)); + assert_eq!(parse_magic_line().parse("%technique v1"), Ok(1)); + // this isn't really ideal, but there's no absolutely vital reason it + // has to be rejected. + assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); + } + + #[test] + fn check_header_spdx() { + assert_eq!(parse_license().parse("MIT"), Ok("MIT".to_string())); + assert_eq!( + parse_license().parse("Public Domain"), + Ok("Public Domain".to_string()) + ); + assert_eq!( + parse_license().parse("CC BY-SA 3.0 IGO"), + Ok("CC BY-SA 3.0 IGO".to_string()) + ); + + assert_eq!(parse_copyright().parse("ACME"), Ok("ACME".to_string())); + assert_eq!( + parse_copyright().parse("ACME, Inc."), + Ok("ACME, Inc.".to_string()) + ); + + assert_eq!( + parse_copyright().parse("2024 ACME, Inc."), + Ok("2024 ACME, Inc.".to_string()) + ); + + assert_eq!( + parse_spdx_line().parse("! PD"), + Ok((Some("PD".to_string()), None)) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; (c) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; (C) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; © ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; (c) 2024 ACME, Inc."), + Ok((Some("MIT".to_string()), Some("2024 ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."), + Ok(( + Some("CC BY-SA 3.0 [IGO]".to_string()), + Some("2024 ACME, Inc.".to_string()) + )) + ); + } + + #[test] + fn check_header_template() { + assert_eq!( + parse_template().parse("checklist"), + Ok("checklist".to_string()) + ); + assert_eq!( + parse_template().parse("checklist,v1"), + Ok("checklist,v1".to_string()) + ); + assert_eq!( + parse_template().parse("checklist-v1.0"), + Ok("checklist-v1.0".to_string()) + ); + assert_eq!( + parse_template_line().parse("& checklist-v1"), + Ok(Some("checklist-v1".to_string())) + ); + } +} diff --git a/technique.pest b/technique.pest new file mode 100644 index 0000000..5063adf --- /dev/null +++ b/technique.pest @@ -0,0 +1,52 @@ +// Parsing Expression Grammar for v1 of the Technique Procedure Language + +WHITESPACE = _{ " " | "\t" } + +technique = { + SOI ~ + magic_line ~ + NEWLINE ~ + spdx_line? ~ + NEWLINE ~ + template_line? ~ + NEWLINE+ ~ + declaration ~ + NEWLINE+ ~ + EOI +} + +// File Format Header + +magic_line = { "%" ~ "technique" ~ "v1" } + +// License and Copyright Header + +spdx_line = { "!" ~ license ~ (";" ~ copyright)? } + +license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "." )* } + +copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } + +year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } + +owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } + +// Template Header + +template_line = { "&" ~ template } + +template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | "." | "," )* } + +// Procedure Declaration + +declaration = { identifier ~ ":" ~ signature? } + +identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } + +signature = { forma ~ ("," ~ forma )* ~ "->" ~ forma } + +forma = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } + + + +