From 65d96b2ca30b34bcd942126c4965f4c20eede37c Mon Sep 17 00:00:00 2001 From: fuzzy-boiii23a Date: Tue, 21 Nov 2023 00:33:25 -0800 Subject: [PATCH] added dictionary and corpus --- CONTRIBUTING.md | 10 +- gumbo-parser/Makefile | 2 +- gumbo-parser/fuzzer/build.sh | 6 + gumbo-parser/fuzzer/gumbo.dict | 560 +++++++++++++++++++++++++++ gumbo-parser/fuzzer/gumbo_corpus.zip | Bin 0 -> 418446 bytes 5 files changed, 575 insertions(+), 3 deletions(-) create mode 100644 gumbo-parser/fuzzer/gumbo.dict create mode 100644 gumbo-parser/fuzzer/gumbo_corpus.zip diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index beb83ffb713..acfba273129 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -240,13 +240,19 @@ bundle exec rake compile test When making changes or adding new features to `gumbo-parser`, it's recommended to run [libfuzzer](https://llvm.org/docs/LibFuzzer.html) against `gumbo-parser` using various [sanitizers](https://github.com/google/sanitizers/wiki). -Build the fuzzers by navigating to the `gumbo-parser` directory and running `make fuzzers`. Once built, navigate to the `gumbo-parser/fuzzer/build` directory and execute one of the following binaries in this directory with no arguments to start fuzzing: +Build the fuzzers by navigating to the `gumbo-parser` directory and running `make fuzzers`. Once built, navigate to the `gumbo-parser/fuzzer/build` directory and execute one of the following binaries in this directory: -- parse_fuzzer-normal (standard fuzzer with no sanitizer) +- parse_fuzzer (standard fuzzer with no sanitizer) - parse_fuzzer-asan (fuzzer built using [ASAN](https://clang.llvm.org/docs/AddressSanitizer.html)) - parse_fuzzer-msan (fuzzer built using [MSAN](https://clang.llvm.org/docs/MemorySanitizer.html)) - parse_fuzzer-ubsan (fuzzer built using [UBSAN](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html)) +To fuzz more efficiently, use the dictionary (gumbo.dict) and corpus (gumbo_corpus) found in `gumbo-parser/fuzzer` using the following arguments (assuming parse_fuzzer is in use): + +``` +./parse_fuzzer -dict=../gumbo.dict ../gumbo_corpus +``` + If the binary executed successfully you should now be seeing the following output filling up your terminal (see https://llvm.org/docs/LibFuzzer.html#output for more information): ``` diff --git a/gumbo-parser/Makefile b/gumbo-parser/Makefile index c22dfc0b09f..dd729bc15d9 100644 --- a/gumbo-parser/Makefile +++ b/gumbo-parser/Makefile @@ -95,7 +95,7 @@ coverage: clean: $(RM) -r build - $(RM) -r fuzzer/build fuzzer/src-* + $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus build/src/flags: | build/src @echo 'old_CC := $(CC)' > $@ diff --git a/gumbo-parser/fuzzer/build.sh b/gumbo-parser/fuzzer/build.sh index dc75c516e31..849cd12f2a4 100755 --- a/gumbo-parser/fuzzer/build.sh +++ b/gumbo-parser/fuzzer/build.sh @@ -4,6 +4,12 @@ set -eu cd $(dirname $0) +echo $PWD + +if [ ! -d gumbo_corpus ]; then + unzip gumbo_corpus.zip -d gumbo_corpus +fi + SANITIZER_OPTS="" SANITIZER_LINK="" SANITIZER=${SANITIZER:-normal} diff --git a/gumbo-parser/fuzzer/gumbo.dict b/gumbo-parser/fuzzer/gumbo.dict new file mode 100644 index 00000000000..7a10b3b4d74 --- /dev/null +++ b/gumbo-parser/fuzzer/gumbo.dict @@ -0,0 +1,560 @@ +# +# AFL dictionary for HTML parsers +# ------------------------------- +# +# A basic collection of HTML string likely to matter to HTML parsers. +# +# Created by Michal Zalewski +# + +tag_a="" +tag_abbr="" +tag_acronym="" +tag_address="
" +tag_annotation_xml="" +tag_applet="" +tag_area="" +tag_article="
" +tag_aside="