Skip to content

Commit

Permalink
fixed ipv4 for general dot-decimal notation
Browse files Browse the repository at this point in the history
  • Loading branch information
bondeje committed Sep 9, 2024
1 parent 8e8d27b commit da9aa0e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 18 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ All `LOG_EVENTS` in the code that are lower on this list will be ommitted (in co
I have included a couple toy parsers generated with <b>peggy</b> in `examples/`. These are not intended to be stand alone project as they are not in the best of condition and of limited practical use--the possible exception being the C parser. They are meant to show different ways to use <b>peggy</b>. If you have any interesting use cases or ideas, let me know.

### ipv4
This validates a string as ipv4. This is only included because it is the simplest possible grammar I could think of to show how to build a parser with <b>peggy</b>. It is of no other use.
This validates a string in dotted decimal notation as ipv4. This is only included because it is the simplest possible grammar I could think of to show how to build a parser with <b>peggy</b>. It is of no other use.

### csv
A simple csv parser. Generates a `CSVData` struct from an input file. Data is not converted to any data types but kept as strings. This example is more to demonstrate the linearity of the parser. The build command `make test_all` will parse the files in `examples/csv/sample_files` and time the results.
Expand Down Expand Up @@ -176,7 +176,7 @@ int main(int narg, char ** args) {
initialize the parser for the given grammar. This is a generic parser that will
simply provide a token list an AST
*/
Parser_init(&parser, (Rule *)&token, (Rule *)&root, MYDSL_NRULES);
Parser_init(&parser, (Rule *)&mydsl_token, (Rule *)&mydsl_root, MYDSL_NRULES, 0);
// optional for logging. default is "stdout" with LOG_LEVEL_ERROR
Parser_set_log_file(&parser, "stdout", LOG_LEVEL_WARN);
// LOG_LEVEL_* above are the same that can be specified at build time (see above)
Expand Down
6 changes: 3 additions & 3 deletions examples/ipv4/ipv4.grmr
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// ipv4.grmr
export = ipv4 // redundant but allows change of file name
import = ipv4parser // check_ipv4 is declared in ipv4parser.h
export = ipv4 // redundant but allows change of file name
import = ipv4parser // check_ipv4 is declared in ipv4parser.h

punctuator: '.' // needed to be able to use '.' in ipv4
digit: "[0-9]"
octet: digit{1,3} // 0 digits and more than 3 are definitely errors
octet: digit+ // 0 digits and more than 3 are definitely errors
token: punctuator | digit
ipv4(check_ipv4): '.'.octet // check_ipv4 as a build transform function triggers AST traversal
46 changes: 33 additions & 13 deletions examples/ipv4/ipv4parser.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// ipv4parser.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include <peggy/astnode.h>
#include <peggy/rule.h>
Expand All @@ -12,36 +14,54 @@

ASTNode ASTNode_syntax_error = {0}; // used to signal a failure that has already been addressed

int check_octet(Parser * parser, ASTNode * node, unsigned char loc) {
size_t max_ull_len = 0;

int check_octet(Parser * parser, ASTNode * node, unsigned char loc, size_t max) {
// only success of the subrule will make it here. Do not have to check node for failure, node == NULL is a more serious failure so assume it is non-null
Token * toks = node->token_start;
char const * const str = toks->string;
char const * const str = node->token_start->string;
size_t len = ASTNode_string_length(node);
// if the octet is < 3 decimals or numerically <= 255, it is valid
//printf("octet (%zu)(%zu)(%zu): %.*s\n", len, node->str_length, node->nchildren, (int)len, str);
if (len && (len < 3 || str[0] < '2' || (str[0] == '2' && (str[1] < '5' || (str[1] == '5' && str[2] <= '5'))))) {
return 0;
if (len > max_ull_len) { // if length of string is longer than an unsigned long long can possibly be, fail
printf("ipv4 failed. invalid octet at %hu (%.*s). too many characters for valid number\n", loc, (int)len, str);
return 1;
}
unsigned long long val = strtoull(str, NULL, 10);
if (val >= max) {
printf("ipv4 failed. invalid octet at %hu (%.*s). must be in range [0-%zu)\n", loc, (int)len, str, max);
return 1;
}
printf("ipv4 failed. invalid octet at %hu (%.*s). must be in range 0-255\n", loc, (int)len, str);
return 1;
return 0;
}
ASTNode * check_ipv4(Production * octet, Parser * parser, ASTNode * node) {
// if the parser has already failed or resulted in something other than than 3 '.' + 4 octets = 7, fail
if (node->nchildren != 7 || Parser_is_fail_node(parser, node)) {
printf("%s is NOT valid ipv4. %zu octets not found, need 4.\n", parser->token_head->next->string, (node->nchildren + 1) / 2);
if (node->nchildren < 3 || node->nchildren > 7 || Parser_is_fail_node(parser, node)) {
printf("%s is NOT valid ipv4. %zu octets not found, need 2-4.\n", parser->token_head->next->string, (node->nchildren + 1) / 2);
return &ASTNode_syntax_error;
}

for (size_t i = 0; i < node->nchildren; i++) {
if (check_octet(parser, node->children[i], i + 1)) {
unsigned char noctets = (node->nchildren >> 1) + 1;
// maximum value for the
size_t max = 1 << (8 * (5 - noctets));
unsigned char i = 0;
while (i < noctets - 1) {
if (check_octet(parser, node->children[2 * i], i, 256)) {
return &ASTNode_syntax_error;
}
i++;
}
while (i < noctets) {
if (check_octet(parser, node->children[2 * i], i, max)) {
return &ASTNode_syntax_error;
}
max >>= 8;
i++;
}
printf("%s is valid ipv4\n", parser->token_head->next->string);
return node;
}

int main(int narg, char ** args) {
char buffer[256];
max_ull_len = snprintf(buffer, 256, "%llu", ULLONG_MAX);
if (narg > 1) {
Parser parser = {._class = &Parser_class};
Parser_init(&parser, (Rule *)&ipv4_token, (Rule *)&ipv4_ipv4, IPV4_NRULES, 0);
Expand Down

0 comments on commit da9aa0e

Please sign in to comment.