(* * PXF (Proto eXpressive Format) — concrete syntax in EBNF. * * Notation: ISO/IEC 14977 EBNF. * '...' terminal string literal * | alternation * { x } zero or more * [ x ] zero or one (optional) * ( x ) grouping * ?...? inlined description (e.g. character class) * (* *) comment * * Whitespace (spaces, tabs, CR) and newlines are insignificant between tokens * and are not shown explicitly in the rules below. Comments may appear * anywhere whitespace may appear. *) (* === Document === *) document = [ type_directive ] , { entry } ; type_directive = '@type' , identifier ; (* === Entries === *) entry = key , ( assignment_tail | map_tail | block_tail ) ; assignment_tail = '=' , value ; map_tail = ':' , value ; block_tail = '{' , { entry } , '}' ; key = identifier | string | integer ; (* === Values === *) value = string | integer | float | bool | null | bytes | timestamp | duration | identifier | list | block_value ; list = '[' , [ value , { [ ',' ] , value } ] , ']' ; block_value = '{' , { entry } , '}' ; (* === Lexical: identifiers, literals === *) identifier = ident_start , { ident_part } ; ident_start = letter | '_' ; ident_part = letter | digit | '_' | '.' ; bool = 'true' | 'false' ; null = 'null' ; (* === Lexical: numbers === *) integer = [ '-' ] , digit , { digit } ; float = [ '-' ] , digit , { digit } , ( '.' , { digit } , [ exponent ] | exponent ) ; exponent = ( 'e' | 'E' ) , [ '+' | '-' ] , digit , { digit } ; (* === Lexical: timestamp & duration === *) (* RFC 3339 timestamp; recognized when the input begins with exactly four digits followed by '-'. Examples: 2024-01-15T10:30:00Z, 2024-01-15T10:30:00.123456789+02:00. *) timestamp = ?RFC 3339 date-time? ; (* Go time.ParseDuration syntax. Examples: 30s, 1h30m, 500ms, 1.5h. *) duration = duration_segment , { duration_segment } ; duration_segment = digit , { digit } , [ '.' , digit , { digit } ] , time_unit ; time_unit = 'ns' | 'us' | 'µs' | 'ms' | 's' | 'm' | 'h' ; (* === Lexical: strings === *) string = simple_string | triple_string ; simple_string = '"' , { string_char | escape_seq } , '"' ; string_char = ?any byte except '"' or '\' or LF? ; (* Triple-quoted strings preserve raw content between """ delimiters. No escape interpretation; the leading newline is stripped and the closing line's indent is removed from each preceding line. *) triple_string = '"""' , ?any text not containing """? , '"""' ; escape_seq = '\' , ( simple_escape | hex_escape | octal_escape | unicode_4_escape | unicode_8_escape ) ; simple_escape = '"' | '\' | "'" | '?' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' ; hex_escape = 'x' , hex_digit , hex_digit ; octal_escape = oct_lead , oct_digit , oct_digit ; (* 3 octal digits, value <= 0xFF *) unicode_4_escape = 'u' , hex_digit , hex_digit , hex_digit , hex_digit ; unicode_8_escape = 'U' , hex_digit , hex_digit , hex_digit , hex_digit , hex_digit , hex_digit , hex_digit , hex_digit ; (* === Lexical: bytes === *) (* The body must be valid base64 (standard or raw, padding optional). Backslashes are not interpreted inside b"...". *) bytes = 'b' , '"' , { base64_char } , '"' ; base64_char = letter | digit | '+' | '/' | '=' ; (* === Lexical: comments === *) comment = line_comment | block_comment ; line_comment = ( '#' | '//' ) , { ?any byte except LF? } ; block_comment = '/*' , { ?any byte? } , '*/' ; (* === Character classes === *) letter = 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' ; digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ; hex_digit = digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' ; oct_digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' ; oct_lead = '0' | '1' | '2' | '3' ; (* keeps \nnn <= 0xFF *) (* === Notes === * * - Unicode escapes \uHHHH and \UHHHHHHHH must form a valid scalar value: * the codepoint must be <= U+10FFFF and must not be a surrogate half * (U+D800..U+DFFF). * - List elements may be separated by commas, by newlines, or both. The * comma is consumed if present after each element, otherwise the next * value is parsed directly. * - The lexer recognizes a 4-digit-year-then-'-' prefix as a timestamp, * and digits-followed-by-a-time-unit-letter as a duration. Negative * integers and identifiers that begin with letters take precedence * over those forms. *)